Remove legacy test, overfit, and predict modes from Learner (#2024)

Co-authored-by: Adeel Hassan <[email protected]>
azavea · Jan 4, 2024 · 9d034ac · 9d034ac
1 parent 2a115da
commit 9d034ac
Show file tree

Hide file tree

Showing 15 changed files with 91 additions and 232 deletions.
diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/__init__.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/__init__.py
@@ -2,7 +2,7 @@
 
 
 def register_plugin(registry):
-    registry.set_plugin_version('rastervision.pytorch_backend', 1)
+    registry.set_plugin_version('rastervision.pytorch_backend', 2)
 
 
 import rastervision.pipeline

diff --git a/...pytorch_backend/rastervision/pytorch_backend/examples/chip_classification/spacenet_rio.py b/...pytorch_backend/rastervision/pytorch_backend/examples/chip_classification/spacenet_rio.py
@@ -51,8 +51,7 @@ def get_config(runner,
         test (bool, optional): If True, does the following simplifications:
             (1) Uses only the first 1 scene
             (2) Uses only a 600x600 crop of the scenes
-            (3) Enables test mode in the learner, which makes it use the
-                test_batch_sz and test_num_epochs, among other things.
+            (3) Trains for only 4 epochs.
             Defaults to False.
 
     Returns:
@@ -173,8 +172,7 @@ def make_scene(scene_info) -> SceneConfig:
 
     solver = SolverConfig(
         lr=1e-4,
-        num_epochs=20,
-        test_num_epochs=4,
+        num_epochs=20 if not test else 4,
         batch_sz=32,
         one_cycle=True,
         external_loss_def=external_loss_def)
@@ -183,7 +181,6 @@ def make_scene(scene_info) -> SceneConfig:
         data=data,
         model=model,
         solver=solver,
-        test_mode=test,
         log_tensorboard=True,
         run_tensorboard=False)
 

diff --git a/...on_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/cowc_potsdam.py b/...on_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/cowc_potsdam.py
@@ -47,8 +47,7 @@ def get_config(runner,
         test (bool, optional): If True, does the following simplifications:
             (1) Uses only the first 2 scenes
             (2) Uses only a 2000x2000 crop of the scenes
-            (3) Enables test mode in the learner, which makes it use the
-                test_batch_sz and test_num_epochs, among other things.
+            (3) Trains for only 2 epochs.
             Defaults to False.
 
     Returns:
@@ -181,13 +180,13 @@ def make_scene(id: str) -> SceneConfig:
         model=model,
         solver=SolverConfig(
             lr=1e-4,
-            num_epochs=10,
-            test_num_epochs=2,
+            num_epochs=10 if not test else 2,
             batch_sz=16,
-            one_cycle=True),
+            one_cycle=True,
+        ),
         log_tensorboard=False,
         run_tensorboard=False,
-        test_mode=test)
+    )
 
     predict_options = ObjectDetectionPredictOptions(
         merge_thresh=0.5, score_thresh=0.9)

diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/xview.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/examples/object_detection/xview.py
@@ -35,8 +35,7 @@ def get_config(runner,
         test (bool, optional): If True, does the following simplifications:
             (1) Uses only the first 2 scenes.
             (2) Uses only a 2000x2000 crop of the scenes.
-            (3) Enables test mode in the learner, which makes it use the
-                test_batch_sz and test_num_epochs, among other things.
+            (3) Trains for only 2 epochs.
             Defaults to False.
 
     Returns:
@@ -114,13 +113,13 @@ def make_scene(scene_info):
         model=ObjectDetectionModelConfig(backbone=Backbone.resnet50),
         solver=SolverConfig(
             lr=1e-4,
-            num_epochs=10,
-            test_num_epochs=2,
+            num_epochs=10 if not test else 2,
             batch_sz=16,
-            one_cycle=True),
+            one_cycle=True,
+        ),
         log_tensorboard=True,
         run_tensorboard=False,
-        test_mode=test)
+    )
 
     return ObjectDetectionConfig(
         root_uri=root_uri,

diff --git a/...orch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam.py b/...orch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam.py
@@ -64,8 +64,7 @@ def get_config(runner,
         test (bool, optional): If True, does the following simplifications:
             (1) Uses only the first 2 scenes
             (2) Uses only a 600x600 crop of the scenes
-            (3) Enables test mode in the learner, which makes it use the
-                test_batch_sz and test_num_epochs, among other things.
+            (3) Trains for only 2 epochs and uses a batch size of 2.
             Defaults to False.
 
     Returns:
@@ -222,15 +221,10 @@ def make_scene(id) -> SceneConfig:
         data=data,
         model=model,
         solver=SolverConfig(
-            lr=1e-4,
-            num_epochs=10,
-            test_num_epochs=2,
-            batch_sz=8,
-            test_batch_sz=2,
-            one_cycle=True),
+            lr=1e-4, num_epochs=10, batch_sz=8, one_cycle=True),
         log_tensorboard=True,
         run_tensorboard=False,
-        test_mode=test)
+    )
 
     pipeline = SemanticSegmentationConfig(
         root_uri=root_uri,

diff --git a/...rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam_multi_source.py b/...rastervision/pytorch_backend/examples/semantic_segmentation/isprs_potsdam_multi_source.py
@@ -96,8 +96,7 @@ def get_config(runner,
         test (bool, optional): If True, does the following simplifications:
             (1) Uses only the first 2 scenes
             (2) Uses only a 600x600 crop of the scenes
-            (3) Enables test mode in the learner, which makes it use the
-                test_batch_sz and test_num_epochs, among other things.
+            (3) Trains for only 2 epochs and uses a batch size of 2.
             Defaults to False.
 
     Returns:
@@ -153,21 +152,18 @@ def get_config(runner,
     # --------------------------------------------
     model_config = SemanticSegmentationModelConfig(backbone=Backbone.resnet50)
 
+    num_epochs = NUM_EPOCHS if not test else TEST_MODE_NUM_EPOCHS
+    batch_sz = BATCH_SIZE if not test else TEST_MODE_BATCH_SIZE
     solver_config = SolverConfig(
-        lr=LR,
-        num_epochs=NUM_EPOCHS,
-        batch_sz=BATCH_SIZE,
-        test_num_epochs=TEST_MODE_NUM_EPOCHS,
-        test_batch_sz=TEST_MODE_BATCH_SIZE,
-        one_cycle=ONE_CYCLE)
+        lr=LR, num_epochs=num_epochs, batch_sz=batch_sz, one_cycle=ONE_CYCLE)
 
     backend_config = PyTorchSemanticSegmentationConfig(
         data=data,
         model=model_config,
         solver=solver_config,
         log_tensorboard=LOG_TENSORBOARD,
         run_tensorboard=RUN_TENSORBOARD,
-        test_mode=test)
+    )
 
     # -----------------------------------------------
     # Pass configurations to the pipeline config

diff --git a/...rch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/spacenet_vegas.py b/...rch_backend/rastervision/pytorch_backend/examples/semantic_segmentation/spacenet_vegas.py
@@ -147,8 +147,7 @@ def get_config(runner,
             True.
         test (bool, optional): If True, does the following simplifications:
             (1) Uses only a small subset of training and validation scenes.
-            (2) Enables test mode in the learner, which makes it use the
-                test_batch_sz and test_num_epochs, among other things.
+            (2) Trains for only 2 epochs.
             Defaults to False.
 
     Returns:
@@ -214,15 +213,10 @@ def get_config(runner,
     backend = PyTorchSemanticSegmentationConfig(
         data=data,
         model=SemanticSegmentationModelConfig(backbone=Backbone.resnet50),
-        solver=SolverConfig(
-            lr=1e-4,
-            num_epochs=5,
-            test_num_epochs=2,
-            batch_sz=8,
-            one_cycle=True),
+        solver=SolverConfig(lr=1e-4, num_epochs=5, batch_sz=8, one_cycle=True),
         log_tensorboard=True,
         run_tensorboard=False,
-        test_mode=test)
+    )
 
     return SemanticSegmentationConfig(
         root_uri=root_uri,

diff --git a/...vision_pytorch_backend/rastervision/pytorch_backend/pytorch_chip_classification_config.py b/...vision_pytorch_backend/rastervision/pytorch_backend/pytorch_chip_classification_config.py
@@ -50,7 +50,6 @@ def get_learner_config(self, pipeline):
             data=self.data,
             model=self.model,
             solver=self.solver,
-            test_mode=self.test_mode,
             output_uri=pipeline.train_uri,
             log_tensorboard=self.log_tensorboard,
             run_tensorboard=self.run_tensorboard,

diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_learner_backend_config.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_learner_backend_config.py
@@ -11,7 +11,17 @@
 log = logging.getLogger(__name__)
 
 
-@register_config('pytorch_learner_backend')
+def pytorch_learner_backend_config_upgrader(cfg_dict: dict,
+                                            version: int) -> dict:
+    if version == 1:
+        # removed in version 2
+        cfg_dict.pop('test_mode', None)
+    return cfg_dict
+
+
+@register_config(
+    'pytorch_learner_backend',
+    upgrader=pytorch_learner_backend_config_upgrader)
 class PyTorchLearnerBackendConfig(BackendConfig):
     """Configure a :class:`.PyTorchLearnerBackend`."""
 
@@ -23,12 +33,6 @@ class PyTorchLearnerBackendConfig(BackendConfig):
     run_tensorboard: bool = Field(
         False,
         description='If True, run Tensorboard server pointing at log files.')
-    test_mode: bool = Field(
-        False,
-        description=
-        ('This field is passed along to the LearnerConfig which is returned by '
-         'get_learner_config(). For more info, see the docs for'
-         'pytorch_learner.learner_config.LearnerConfig.test_mode.'))
     save_all_checkpoints: bool = Field(
         False,
         description=(

diff --git a/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_object_detection_config.py b/rastervision_pytorch_backend/rastervision/pytorch_backend/pytorch_object_detection_config.py
@@ -50,7 +50,6 @@ def get_learner_config(self, pipeline):
             data=self.data,
             model=self.model,
             solver=self.solver,
-            test_mode=self.test_mode,
             output_uri=pipeline.train_uri,
             log_tensorboard=self.log_tensorboard,
             run_tensorboard=self.run_tensorboard,

diff --git a/...sion_pytorch_backend/rastervision/pytorch_backend/pytorch_semantic_segmentation_config.py b/...sion_pytorch_backend/rastervision/pytorch_backend/pytorch_semantic_segmentation_config.py
@@ -50,7 +50,6 @@ def get_learner_config(self, pipeline):
             data=self.data,
             model=self.model,
             solver=self.solver,
-            test_mode=self.test_mode,
             output_uri=pipeline.train_uri,
             log_tensorboard=self.log_tensorboard,
             run_tensorboard=self.run_tensorboard,

diff --git a/rastervision_pytorch_learner/rastervision/pytorch_learner/__init__.py b/rastervision_pytorch_learner/rastervision/pytorch_learner/__init__.py
@@ -2,7 +2,7 @@
 
 
 def register_plugin(registry):
-    registry.set_plugin_version('rastervision.pytorch_learner', 4)
+    registry.set_plugin_version('rastervision.pytorch_learner', 5)
 
 
 import rastervision.pipeline

diff --git a/rastervision_pytorch_learner/rastervision/pytorch_learner/learner.py b/rastervision_pytorch_learner/rastervision/pytorch_learner/learner.py
@@ -221,7 +221,7 @@ def __init__(self,
         else:
             self.output_dir_local = get_local_path(self.output_dir, tmp_dir)
             make_dir(self.output_dir_local, force_empty=True)
-            if self.training and not cfg.overfit_mode:
+            if self.training:
                 self.sync_from_cloud()
             log.info(f'Local output dir: {self.output_dir_local}')
             log.info(f'Remote output dir: {self.output_dir}')
@@ -394,25 +394,19 @@ def main(self):
         resume if interrupted), logs stats, plots predictions, and syncs
         results to the cloud.
         """
+        cfg = self.cfg
         if not self.avoid_activating_cuda_runtime:
             log_system_details()
-        log.info(self.cfg)
+        log.info(cfg)
         log.info(f'Using device: {self.device}')
         self.log_data_stats()
         self.run_tensorboard()
 
-        cfg = self.cfg
-        if not cfg.predict_mode:
-            if not self.avoid_activating_cuda_runtime:
-                self.plot_dataloaders(self.cfg.data.preview_batch_limit)
-            if cfg.overfit_mode:
-                self.overfit()
-            else:
-                self.train()
-                if cfg.save_model_bundle:
-                    self.save_model_bundle()
-        else:
-            self.load_checkpoint()
+        if not self.avoid_activating_cuda_runtime:
+            self.plot_dataloaders(cfg.data.preview_batch_limit)
+        self.train()
+        if cfg.save_model_bundle:
+            self.save_model_bundle()
 
         self.stop_tensorboard()
         if cfg.eval_train:
@@ -756,31 +750,6 @@ def on_epoch_end(self, curr_epoch: int, metrics: MetricDict) -> None:
         if (curr_epoch + 1) % self.cfg.solver.sync_interval == 0:
             self.sync_to_cloud()
 
-    def overfit(self):
-        """Optimize model using the same batch repeatedly."""
-        self.on_overfit_start()
-
-        x, y = next(iter(self.train_dl))
-        x = self.to_device(x, self.device)
-        y = self.to_device(y, self.device)
-        batch = (x, y)
-
-        num_steps = self.cfg.solver.overfit_num_steps
-        with tqdm(range(num_steps), desc='Overfitting') as bar:
-            for step in bar:
-                loss = self.train_step(batch, step)['train_loss']
-                loss.backward()
-                self.opt.step()
-
-                if (step + 1) % 25 == 0:
-                    log.info('\nstep: %d', step)
-                    log.info('train_loss: %f', loss)
-
-        self.save_weights(self.last_model_weights_path)
-
-    def on_overfit_start(self):
-        """Hook that is called at start of overfit routine."""
-
     ########################
     # Prediction/inference
     ########################
@@ -1238,23 +1207,15 @@ def setup_data(self, distributed: Optional[bool] = None):
     def build_datasets(self) -> Tuple['Dataset', 'Dataset', 'Dataset']:
         """Build Datasets for train, validation, and test splits."""
         log.info(f'Building datasets ...')
-        cfg = self.cfg
-        train_ds, val_ds, test_ds = self.cfg.data.build(
-            tmp_dir=self.tmp_dir,
-            overfit_mode=cfg.overfit_mode,
-            test_mode=cfg.test_mode)
+        train_ds, val_ds, test_ds = self.cfg.data.build(tmp_dir=self.tmp_dir)
         return train_ds, val_ds, test_ds
 
     def build_dataset(self, split: Literal['train', 'valid', 'test']
                       ) -> Tuple['Dataset', 'Dataset', 'Dataset']:
         """Build Dataset for split."""
         log.info('Building %s dataset ...', split)
         cfg = self.cfg
-        ds = cfg.data.build_dataset(
-            split=split,
-            tmp_dir=self.tmp_dir,
-            overfit_mode=cfg.overfit_mode,
-            test_mode=cfg.test_mode)
+        ds = cfg.data.build_dataset(split=split, tmp_dir=self.tmp_dir)
         return ds
 
     def build_dataloaders(self, distributed: Optional[bool] = None