From 9d034ac47400b8a232a940153e1364e9171ae64b Mon Sep 17 00:00:00 2001 From: Adeel Hassan Date: Thu, 4 Jan 2024 16:53:05 -0500 Subject: [PATCH] Remove legacy test, overfit, and predict modes from Learner (#2024) Co-authored-by: Adeel Hassan --- .../rastervision/pytorch_backend/__init__.py | 2 +- .../chip_classification/spacenet_rio.py | 7 +- .../examples/object_detection/cowc_potsdam.py | 11 +- .../examples/object_detection/xview.py | 11 +- .../semantic_segmentation/isprs_potsdam.py | 12 +- .../isprs_potsdam_multi_source.py | 14 +- .../semantic_segmentation/spacenet_vegas.py | 12 +- .../pytorch_chip_classification_config.py | 1 - .../pytorch_learner_backend_config.py | 18 +- .../pytorch_object_detection_config.py | 1 - .../pytorch_semantic_segmentation_config.py | 1 - .../rastervision/pytorch_learner/__init__.py | 2 +- .../rastervision/pytorch_learner/learner.py | 59 ++---- .../pytorch_learner/learner_config.py | 169 +++++------------- .../pytorch_learner/regression_learner.py | 3 - 15 files changed, 91 insertions(+), 232 deletions(-) diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/__init__.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/__init__.py index 05cda3d51..bbf25d179 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/__init__.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/__init__.py @@ -2,7 +2,7 @@ def register_plugin(registry): - registry.set_plugin_version('rastervision.pytorch_backend', 1) + registry.set_plugin_version('rastervision.pytorch_backend', 2) import rastervision.pipeline diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/chip_classification/spacenet_rio.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/chip_classification/spacenet_rio.py index b95166d6a..79115558d 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/chip_classification/spacenet_rio.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/chip_classification/spacenet_rio.py @@ -51,8 +51,7 @@ def get_config(runner, test (bool, optional): If True, does the following simplifications: (1) Uses only the first 1 scene (2) Uses only a 600x600 crop of the scenes - (3) Enables test mode in the learner, which makes it use the - test_batch_sz and test_num_epochs, among other things. + (3) Trains for only 4 epochs. Defaults to False. Returns: @@ -173,8 +172,7 @@ def make_scene(scene_info) -> SceneConfig: solver = SolverConfig( lr=1e-4, - num_epochs=20, - test_num_epochs=4, + num_epochs=20 if not test else 4, batch_sz=32, one_cycle=True, external_loss_def=external_loss_def) @@ -183,7 +181,6 @@ def make_scene(scene_info) -> SceneConfig: data=data, model=model, solver=solver, - test_mode=test, log_tensorboard=True, run_tensorboard=False) diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/cowc_potsdam.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/cowc_potsdam.py index 91591283c..f993bb37e 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/cowc_potsdam.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/cowc_potsdam.py @@ -47,8 +47,7 @@ def get_config(runner, test (bool, optional): If True, does the following simplifications: (1) Uses only the first 2 scenes (2) Uses only a 2000x2000 crop of the scenes - (3) Enables test mode in the learner, which makes it use the - test_batch_sz and test_num_epochs, among other things. + (3) Trains for only 2 epochs. Defaults to False. Returns: @@ -181,13 +180,13 @@ def make_scene(id: str) -> SceneConfig: model=model, solver=SolverConfig( lr=1e-4, - num_epochs=10, - test_num_epochs=2, + num_epochs=10 if not test else 2, batch_sz=16, - one_cycle=True), + one_cycle=True, + ), log_tensorboard=False, run_tensorboard=False, - test_mode=test) + ) predict_options = ObjectDetectionPredictOptions( merge_thresh=0.5, score_thresh=0.9) diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/xview.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/xview.py index e7b86183a..2f2b7d8d6 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/xview.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/xview.py @@ -35,8 +35,7 @@ def get_config(runner, test (bool, optional): If True, does the following simplifications: (1) Uses only the first 2 scenes. (2) Uses only a 2000x2000 crop of the scenes. - (3) Enables test mode in the learner, which makes it use the - test_batch_sz and test_num_epochs, among other things. + (3) Trains for only 2 epochs. Defaults to False. Returns: @@ -114,13 +113,13 @@ def make_scene(scene_info): model=ObjectDetectionModelConfig(backbone=Backbone.resnet50), solver=SolverConfig( lr=1e-4, - num_epochs=10, - test_num_epochs=2, + num_epochs=10 if not test else 2, batch_sz=16, - one_cycle=True), + one_cycle=True, + ), log_tensorboard=True, run_tensorboard=False, - test_mode=test) + ) return ObjectDetectionConfig( root_uri=root_uri, diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam.py index 7cfded14f..f98074b84 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam.py @@ -64,8 +64,7 @@ def get_config(runner, test (bool, optional): If True, does the following simplifications: (1) Uses only the first 2 scenes (2) Uses only a 600x600 crop of the scenes - (3) Enables test mode in the learner, which makes it use the - test_batch_sz and test_num_epochs, among other things. + (3) Trains for only 2 epochs and uses a batch size of 2. Defaults to False. Returns: @@ -222,15 +221,10 @@ def make_scene(id) -> SceneConfig: data=data, model=model, solver=SolverConfig( - lr=1e-4, - num_epochs=10, - test_num_epochs=2, - batch_sz=8, - test_batch_sz=2, - one_cycle=True), + lr=1e-4, num_epochs=10, batch_sz=8, one_cycle=True), log_tensorboard=True, run_tensorboard=False, - test_mode=test) + ) pipeline = SemanticSegmentationConfig( root_uri=root_uri, diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam_multi_source.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam_multi_source.py index 17e1f32a1..1e4d5ca72 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam_multi_source.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam_multi_source.py @@ -96,8 +96,7 @@ def get_config(runner, test (bool, optional): If True, does the following simplifications: (1) Uses only the first 2 scenes (2) Uses only a 600x600 crop of the scenes - (3) Enables test mode in the learner, which makes it use the - test_batch_sz and test_num_epochs, among other things. + (3) Trains for only 2 epochs and uses a batch size of 2. Defaults to False. Returns: @@ -153,13 +152,10 @@ def get_config(runner, # -------------------------------------------- model_config = SemanticSegmentationModelConfig(backbone=Backbone.resnet50) + num_epochs = NUM_EPOCHS if not test else TEST_MODE_NUM_EPOCHS + batch_sz = BATCH_SIZE if not test else TEST_MODE_BATCH_SIZE solver_config = SolverConfig( - lr=LR, - num_epochs=NUM_EPOCHS, - batch_sz=BATCH_SIZE, - test_num_epochs=TEST_MODE_NUM_EPOCHS, - test_batch_sz=TEST_MODE_BATCH_SIZE, - one_cycle=ONE_CYCLE) + lr=LR, num_epochs=num_epochs, batch_sz=batch_sz, one_cycle=ONE_CYCLE) backend_config = PyTorchSemanticSegmentationConfig( data=data, @@ -167,7 +163,7 @@ def get_config(runner, solver=solver_config, log_tensorboard=LOG_TENSORBOARD, run_tensorboard=RUN_TENSORBOARD, - test_mode=test) + ) # ----------------------------------------------- # Pass configurations to the pipeline config diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/spacenet_vegas.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/spacenet_vegas.py index fb679488d..4f7ea4848 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/spacenet_vegas.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/spacenet_vegas.py @@ -147,8 +147,7 @@ def get_config(runner, True. test (bool, optional): If True, does the following simplifications: (1) Uses only a small subset of training and validation scenes. - (2) Enables test mode in the learner, which makes it use the - test_batch_sz and test_num_epochs, among other things. + (2) Trains for only 2 epochs. Defaults to False. Returns: @@ -214,15 +213,10 @@ def get_config(runner, backend = PyTorchSemanticSegmentationConfig( data=data, model=SemanticSegmentationModelConfig(backbone=Backbone.resnet50), - solver=SolverConfig( - lr=1e-4, - num_epochs=5, - test_num_epochs=2, - batch_sz=8, - one_cycle=True), + solver=SolverConfig(lr=1e-4, num_epochs=5, batch_sz=8, one_cycle=True), log_tensorboard=True, run_tensorboard=False, - test_mode=test) + ) return SemanticSegmentationConfig( root_uri=root_uri, diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_chip_classification_config.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_chip_classification_config.py index 7871419f6..85f300964 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_chip_classification_config.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_chip_classification_config.py @@ -50,7 +50,6 @@ def get_learner_config(self, pipeline): data=self.data, model=self.model, solver=self.solver, - test_mode=self.test_mode, output_uri=pipeline.train_uri, log_tensorboard=self.log_tensorboard, run_tensorboard=self.run_tensorboard, diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_learner_backend_config.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_learner_backend_config.py index 35a8ce517..59465bffe 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_learner_backend_config.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_learner_backend_config.py @@ -11,7 +11,17 @@ log = logging.getLogger(__name__) -@register_config('pytorch_learner_backend') +def pytorch_learner_backend_config_upgrader(cfg_dict: dict, + version: int) -> dict: + if version == 1: + # removed in version 2 + cfg_dict.pop('test_mode', None) + return cfg_dict + + +@register_config( + 'pytorch_learner_backend', + upgrader=pytorch_learner_backend_config_upgrader) class PyTorchLearnerBackendConfig(BackendConfig): """Configure a :class:`.PyTorchLearnerBackend`.""" @@ -23,12 +33,6 @@ class PyTorchLearnerBackendConfig(BackendConfig): run_tensorboard: bool = Field( False, description='If True, run Tensorboard server pointing at log files.') - test_mode: bool = Field( - False, - description= - ('This field is passed along to the LearnerConfig which is returned by ' - 'get_learner_config(). For more info, see the docs for' - 'pytorch_learner.learner_config.LearnerConfig.test_mode.')) save_all_checkpoints: bool = Field( False, description=( diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_object_detection_config.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_object_detection_config.py index b33bc1c70..67c6ec5ac 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_object_detection_config.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_object_detection_config.py @@ -50,7 +50,6 @@ def get_learner_config(self, pipeline): data=self.data, model=self.model, solver=self.solver, - test_mode=self.test_mode, output_uri=pipeline.train_uri, log_tensorboard=self.log_tensorboard, run_tensorboard=self.run_tensorboard, diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_semantic_segmentation_config.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_semantic_segmentation_config.py index 333e402d7..8d0beb201 100644 --- a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_semantic_segmentation_config.py +++ b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_semantic_segmentation_config.py @@ -50,7 +50,6 @@ def get_learner_config(self, pipeline): data=self.data, model=self.model, solver=self.solver, - test_mode=self.test_mode, output_uri=pipeline.train_uri, log_tensorboard=self.log_tensorboard, run_tensorboard=self.run_tensorboard, diff --git a/rastervision_pytorch_learner/rastervision/pytorch_learner/__init__.py b/rastervision_pytorch_learner/rastervision/pytorch_learner/__init__.py index 9113b3dfa..77c55967b 100644 --- a/rastervision_pytorch_learner/rastervision/pytorch_learner/__init__.py +++ b/rastervision_pytorch_learner/rastervision/pytorch_learner/__init__.py @@ -2,7 +2,7 @@ def register_plugin(registry): - registry.set_plugin_version('rastervision.pytorch_learner', 4) + registry.set_plugin_version('rastervision.pytorch_learner', 5) import rastervision.pipeline diff --git a/rastervision_pytorch_learner/rastervision/pytorch_learner/learner.py b/rastervision_pytorch_learner/rastervision/pytorch_learner/learner.py index 6ce356a86..955517417 100644 --- a/rastervision_pytorch_learner/rastervision/pytorch_learner/learner.py +++ b/rastervision_pytorch_learner/rastervision/pytorch_learner/learner.py @@ -221,7 +221,7 @@ def __init__(self, else: self.output_dir_local = get_local_path(self.output_dir, tmp_dir) make_dir(self.output_dir_local, force_empty=True) - if self.training and not cfg.overfit_mode: + if self.training: self.sync_from_cloud() log.info(f'Local output dir: {self.output_dir_local}') log.info(f'Remote output dir: {self.output_dir}') @@ -394,25 +394,19 @@ def main(self): resume if interrupted), logs stats, plots predictions, and syncs results to the cloud. """ + cfg = self.cfg if not self.avoid_activating_cuda_runtime: log_system_details() - log.info(self.cfg) + log.info(cfg) log.info(f'Using device: {self.device}') self.log_data_stats() self.run_tensorboard() - cfg = self.cfg - if not cfg.predict_mode: - if not self.avoid_activating_cuda_runtime: - self.plot_dataloaders(self.cfg.data.preview_batch_limit) - if cfg.overfit_mode: - self.overfit() - else: - self.train() - if cfg.save_model_bundle: - self.save_model_bundle() - else: - self.load_checkpoint() + if not self.avoid_activating_cuda_runtime: + self.plot_dataloaders(cfg.data.preview_batch_limit) + self.train() + if cfg.save_model_bundle: + self.save_model_bundle() self.stop_tensorboard() if cfg.eval_train: @@ -756,31 +750,6 @@ def on_epoch_end(self, curr_epoch: int, metrics: MetricDict) -> None: if (curr_epoch + 1) % self.cfg.solver.sync_interval == 0: self.sync_to_cloud() - def overfit(self): - """Optimize model using the same batch repeatedly.""" - self.on_overfit_start() - - x, y = next(iter(self.train_dl)) - x = self.to_device(x, self.device) - y = self.to_device(y, self.device) - batch = (x, y) - - num_steps = self.cfg.solver.overfit_num_steps - with tqdm(range(num_steps), desc='Overfitting') as bar: - for step in bar: - loss = self.train_step(batch, step)['train_loss'] - loss.backward() - self.opt.step() - - if (step + 1) % 25 == 0: - log.info('\nstep: %d', step) - log.info('train_loss: %f', loss) - - self.save_weights(self.last_model_weights_path) - - def on_overfit_start(self): - """Hook that is called at start of overfit routine.""" - ######################## # Prediction/inference ######################## @@ -1238,11 +1207,7 @@ def setup_data(self, distributed: Optional[bool] = None): def build_datasets(self) -> Tuple['Dataset', 'Dataset', 'Dataset']: """Build Datasets for train, validation, and test splits.""" log.info(f'Building datasets ...') - cfg = self.cfg - train_ds, val_ds, test_ds = self.cfg.data.build( - tmp_dir=self.tmp_dir, - overfit_mode=cfg.overfit_mode, - test_mode=cfg.test_mode) + train_ds, val_ds, test_ds = self.cfg.data.build(tmp_dir=self.tmp_dir) return train_ds, val_ds, test_ds def build_dataset(self, split: Literal['train', 'valid', 'test'] @@ -1250,11 +1215,7 @@ def build_dataset(self, split: Literal['train', 'valid', 'test'] """Build Dataset for split.""" log.info('Building %s dataset ...', split) cfg = self.cfg - ds = cfg.data.build_dataset( - split=split, - tmp_dir=self.tmp_dir, - overfit_mode=cfg.overfit_mode, - test_mode=cfg.test_mode) + ds = cfg.data.build_dataset(split=split, tmp_dir=self.tmp_dir) return ds def build_dataloaders(self, distributed: Optional[bool] = None diff --git a/rastervision_pytorch_learner/rastervision/pytorch_learner/learner_config.py b/rastervision_pytorch_learner/rastervision/pytorch_learner/learner_config.py index 13e543b72..4316b9d3d 100644 --- a/rastervision_pytorch_learner/rastervision/pytorch_learner/learner_config.py +++ b/rastervision_pytorch_learner/rastervision/pytorch_learner/learner_config.py @@ -318,13 +318,17 @@ def build_external_model(self, def solver_config_upgrader(cfg_dict: dict, version: int) -> dict: - if version < 4: + if version == 3: # 'ignore_last_class' replaced by 'ignore_class_index' in version 4 ignore_last_class = cfg_dict.get('ignore_last_class') if ignore_last_class is not None: if ignore_last_class is not False: cfg_dict['ignore_class_index'] = -1 del cfg_dict['ignore_last_class'] + if version == 4: + # removed in version 5 + cfg_dict.pop('test_batch_sz', None) + cfg_dict.pop('test_num_epochs', None) return cfg_dict @@ -336,12 +340,6 @@ class SolverConfig(Config): 10, description= 'Number of epochs (ie. sweeps through the whole training set).') - test_num_epochs: PosInt = Field( - 2, description='Number of epochs to use in test mode.') - test_batch_sz: PosInt = Field( - 4, description='Batch size to use in test mode.') - overfit_num_steps: PosInt = Field( - 1, description='Number of optimizer steps to use in overfit mode.') sync_interval: PosInt = Field( 1, description='The interval in epochs for each sync to the cloud.') batch_sz: PosInt = Field(32, description='Batch size.') @@ -785,18 +783,14 @@ def get_data_transforms(self) -> Tuple[A.BasicTransform, A.BasicTransform]: return base_transform, aug_transform - def build(self, - tmp_dir: Optional[str] = None, - overfit_mode: bool = False, - test_mode: bool = False) -> Tuple[Dataset, Dataset, Dataset]: + def build(self, tmp_dir: Optional[str] = None + ) -> Tuple[Dataset, Dataset, Dataset]: """Build and return train, val, and test datasets.""" raise NotImplementedError() def build_dataset(self, split: Literal['train', 'valid', 'test'], - tmp_dir: Optional[str] = None, - overfit_mode: bool = False, - test_mode: bool = False) -> Dataset: + tmp_dir: Optional[str] = None) -> Dataset: """Build and return dataset for a single split.""" raise NotImplementedError() @@ -880,11 +874,10 @@ def validate_group_uris(cls, values: dict) -> dict: 'len(group_train_sz_rel) != len(group_uris).') return values - def _build_dataset( - self, - dirs: Iterable[str], - tf: Optional[A.BasicTransform] = None, - ) -> Tuple[Dataset, Dataset, Dataset]: + def _build_dataset(self, + dirs: Iterable[str], + tf: Optional[A.BasicTransform] = None + ) -> Tuple[Dataset, Dataset, Dataset]: """Make datasets for a single split. Args: @@ -933,27 +926,17 @@ def dir_to_dataset(self, data_dir: str, transform: A.BasicTransform) -> Dataset: raise NotImplementedError() - def build(self, - tmp_dir: str, - overfit_mode: bool = False, - test_mode: bool = False) -> Tuple[Dataset, Dataset, Dataset]: + def build(self, tmp_dir: str) -> Tuple[Dataset, Dataset, Dataset]: if self.group_uris is None: - return self._get_datasets_from_uri( - self.uri, - tmp_dir=tmp_dir, - overfit_mode=overfit_mode, - test_mode=test_mode) + return self._get_datasets_from_uri(self.uri, tmp_dir=tmp_dir) if self.uri is not None: log.warning('Both DataConfig.uri and DataConfig.group_uris ' 'specified. Only DataConfig.group_uris will be used.') train_ds, valid_ds, test_ds = self._get_datasets_from_group_uris( - self.group_uris, - tmp_dir=tmp_dir, - overfit_mode=overfit_mode, - test_mode=test_mode) + self.group_uris, tmp_dir=tmp_dir) if self.train_sz is not None or self.train_sz_rel is not None: train_ds = self.random_subset_dataset( @@ -963,17 +946,11 @@ def build(self, def build_dataset(self, split: Literal['train', 'valid', 'test'], - tmp_dir: Optional[str] = None, - overfit_mode: bool = False, - test_mode: bool = False) -> Dataset: + tmp_dir: Optional[str] = None) -> Dataset: if self.group_uris is None: ds = self._get_dataset_from_uri( - self.uri, - split=split, - tmp_dir=tmp_dir, - overfit_mode=overfit_mode, - test_mode=test_mode) + self.uri, split=split, tmp_dir=tmp_dir) return ds if self.uri is not None: @@ -981,11 +958,7 @@ def build_dataset(self, 'specified. Only DataConfig.group_uris will be used.') ds = self._get_dataset_from_group_uris( - self.group_uris, - split=split, - tmp_dir=tmp_dir, - overfit_mode=overfit_mode, - test_mode=test_mode) + self.group_uris, split=split, tmp_dir=tmp_dir) if split == 'train': if self.train_sz is not None or self.train_sz_rel is not None: @@ -994,12 +967,8 @@ def build_dataset(self, return ds - def _get_datasets_from_uri( - self, - uri: Union[str, List[str]], - tmp_dir: str, - overfit_mode: bool = False, - test_mode: bool = False) -> Tuple[Dataset, Dataset, Dataset]: + def _get_datasets_from_uri(self, uri: Union[str, List[str]], tmp_dir: str + ) -> Tuple[Dataset, Dataset, Dataset]: """Get image train, validation, & test datasets from a single zip file. Args: @@ -1020,7 +989,7 @@ def _get_datasets_from_uri( test_dirs = [d for d in test_dirs if isdir(d)] base_transform, aug_transform = self.get_data_transforms() - train_tf = (aug_transform if not overfit_mode else base_transform) + train_tf = aug_transform val_tf, test_tf = base_transform, base_transform train_ds, val_ds, test_ds = self._build_datasets( @@ -1032,12 +1001,9 @@ def _get_datasets_from_uri( test_tf=test_tf) return train_ds, val_ds, test_ds - def _get_dataset_from_uri(self, - uri: Union[str, List[str]], + def _get_dataset_from_uri(self, uri: Union[str, List[str]], split: Literal['train', 'valid', 'test'], - tmp_dir: str, - overfit_mode: bool = False, - test_mode: bool = False) -> Dataset: + tmp_dir: str) -> Dataset: """Get image dataset from a single zip file. Args: @@ -1053,7 +1019,7 @@ def _get_dataset_from_uri(self, dirs = [d for d in dirs if isdir(d)] base_transform, aug_transform = self.get_data_transforms() - if split == 'train' and not overfit_mode: + if split == 'train': tf = aug_transform else: tf = base_transform @@ -1066,9 +1032,7 @@ def _get_datasets_from_group_uris( uris: Union[str, List[str]], tmp_dir: str, group_train_sz: Optional[int] = None, - group_train_sz_rel: Optional[float] = None, - overfit_mode: bool = False, - test_mode: bool = False, + group_train_sz_rel: Optional[float] = None ) -> Tuple[Dataset, Dataset, Dataset]: train_ds_lst, valid_ds_lst, test_ds_lst = [], [], [] @@ -1082,10 +1046,7 @@ def _get_datasets_from_group_uris( for uri, size in zip(uris, group_sizes): train_ds, valid_ds, test_ds = self._get_datasets_from_uri( - uri, - tmp_dir=tmp_dir, - overfit_mode=overfit_mode, - test_mode=test_mode) + uri, tmp_dir=tmp_dir) if size is not None: if isinstance(size, float): train_ds = self.random_subset_dataset( @@ -1108,10 +1069,7 @@ def _get_dataset_from_group_uris( uris: Union[str, List[str]], tmp_dir: str, group_sz: Optional[int] = None, - group_sz_rel: Optional[float] = None, - overfit_mode: bool = False, - test_mode: bool = False, - ) -> Dataset: + group_sz_rel: Optional[float] = None) -> Dataset: group_sizes = None if group_sz is not None: @@ -1123,12 +1081,7 @@ def _get_dataset_from_group_uris( per_uri_dataset = [] for uri, size in zip(uris, group_sizes): - ds = self._get_dataset_from_uri( - uri, - split=split, - tmp_dir=tmp_dir, - overfit_mode=overfit_mode, - test_mode=test_mode) + ds = self._get_dataset_from_uri(uri, split=split, tmp_dir=tmp_dir) if size is not None: if isinstance(size, float): ds = self.random_subset_dataset(ds, fraction=size) @@ -1458,12 +1411,10 @@ def scene_to_dataset(self, def build_dataset(self, split: Literal['train', 'valid', 'test'], - tmp_dir: Optional[str] = None, - overfit_mode: bool = False, - test_mode: bool = False) -> Dataset: + tmp_dir: Optional[str] = None) -> Dataset: base_transform, aug_transform = self.get_data_transforms() - if split == 'train' and not overfit_mode: + if split == 'train': tf = aug_transform else: tf = base_transform @@ -1477,12 +1428,10 @@ def build_dataset(self, return ds - def build(self, - tmp_dir: Optional[str] = None, - overfit_mode: bool = False, - test_mode: bool = False) -> Tuple[Dataset, Dataset, Dataset]: + def build(self, tmp_dir: Optional[str] = None + ) -> Tuple[Dataset, Dataset, Dataset]: base_transform, aug_transform = self.get_data_transforms() - train_tf = (aug_transform if not overfit_mode else base_transform) + train_tf = aug_transform val_tf, test_tf = base_transform, base_transform train_ds, val_ds, test_ds = self._build_datasets( @@ -1495,35 +1444,26 @@ def build(self, return train_ds, val_ds, test_ds -@register_config('learner') +def learner_config_upgrader(cfg_dict: dict, version: int) -> dict: + if version == 4: + # removed in version 5 + cfg_dict.pop('overfit_mode', None) + cfg_dict.pop('test_mode', None) + cfg_dict.pop('predict_mode', None) + return cfg_dict + + +@register_config('learner', upgrader=learner_config_upgrader) class LearnerConfig(Config): """Config for Learner.""" model: Optional[ModelConfig] solver: SolverConfig data: DataConfig - predict_mode: bool = Field( - False, - description='If True, skips training, loads model, and does final eval.' - ) - test_mode: bool = Field( - False, - description= - ('If True, uses test_num_epochs, test_batch_sz, truncated datasets with ' - 'only a single batch, image_sz that is cut in half, and num_workers = 0. ' - 'This is useful for testing that code runs correctly on CPU without ' - 'multithreading before running full job on GPU.')) - overfit_mode: bool = Field( - False, - description= - ('If True, uses half image size, and instead of doing epoch-based training, ' - 'optimizes the model using a single batch repeatedly for ' - 'overfit_num_steps number of steps.')) eval_train: bool = Field( False, - description= - ('If True, runs final evaluation on training set (in addition to test set). ' - 'Useful for debugging.')) + description='If True, runs final evaluation on training set ' + '(in addition to validation set). Useful for debugging.') save_model_bundle: bool = Field( True, description= @@ -1566,25 +1506,6 @@ def validate_class_loss_weights(cls, values: dict) -> dict: f'the number of classes ({num_classes})') return values - @root_validator(skip_on_failure=True) - def update_for_mode(cls, values: dict) -> dict: - overfit_mode = values.get('overfit_mode') - test_mode = values.get('test_mode') - solver: SolverConfig = values.get('solver') - data: DataConfig = values.get('data') - - if overfit_mode: - data.img_sz = data.img_sz // 2 - if test_mode: - solver.overfit_num_steps = solver.test_overfit_num_steps - - if test_mode: - solver.num_epochs = solver.test_num_epochs - solver.batch_sz = solver.test_batch_sz - data.num_workers = 0 - - return values - def build(self, tmp_dir: Optional[str] = None, model_weights_path: Optional[str] = None, diff --git a/rastervision_pytorch_learner/rastervision/pytorch_learner/regression_learner.py b/rastervision_pytorch_learner/rastervision/pytorch_learner/regression_learner.py index 4a837b896..252ef71d2 100644 --- a/rastervision_pytorch_learner/rastervision/pytorch_learner/regression_learner.py +++ b/rastervision_pytorch_learner/rastervision/pytorch_learner/regression_learner.py @@ -43,9 +43,6 @@ def build_model(self, model_def_path: Optional[str] = None) -> 'nn.Module': ddp_rank=self.ddp_local_rank) return model - def on_overfit_start(self): - self.on_train_start() - def on_train_start(self): ys = [] for _, y in self.train_dl: