Skip to content

Commit

Permalink
Remove legacy test, overfit, and predict modes from Learner (#2024)
Browse files Browse the repository at this point in the history
Co-authored-by: Adeel Hassan <[email protected]>
  • Loading branch information
AdeelH and AdeelH authored Jan 4, 2024
1 parent 2a115da commit 9d034ac
Show file tree
Hide file tree
Showing 15 changed files with 91 additions and 232 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def register_plugin(registry):
registry.set_plugin_version('rastervision.pytorch_backend', 1)
registry.set_plugin_version('rastervision.pytorch_backend', 2)


import rastervision.pipeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def get_config(runner,
test (bool, optional): If True, does the following simplifications:
(1) Uses only the first 1 scene
(2) Uses only a 600x600 crop of the scenes
(3) Enables test mode in the learner, which makes it use the
test_batch_sz and test_num_epochs, among other things.
(3) Trains for only 4 epochs.
Defaults to False.
Returns:
Expand Down Expand Up @@ -173,8 +172,7 @@ def make_scene(scene_info) -> SceneConfig:

solver = SolverConfig(
lr=1e-4,
num_epochs=20,
test_num_epochs=4,
num_epochs=20 if not test else 4,
batch_sz=32,
one_cycle=True,
external_loss_def=external_loss_def)
Expand All @@ -183,7 +181,6 @@ def make_scene(scene_info) -> SceneConfig:
data=data,
model=model,
solver=solver,
test_mode=test,
log_tensorboard=True,
run_tensorboard=False)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def get_config(runner,
test (bool, optional): If True, does the following simplifications:
(1) Uses only the first 2 scenes
(2) Uses only a 2000x2000 crop of the scenes
(3) Enables test mode in the learner, which makes it use the
test_batch_sz and test_num_epochs, among other things.
(3) Trains for only 2 epochs.
Defaults to False.
Returns:
Expand Down Expand Up @@ -181,13 +180,13 @@ def make_scene(id: str) -> SceneConfig:
model=model,
solver=SolverConfig(
lr=1e-4,
num_epochs=10,
test_num_epochs=2,
num_epochs=10 if not test else 2,
batch_sz=16,
one_cycle=True),
one_cycle=True,
),
log_tensorboard=False,
run_tensorboard=False,
test_mode=test)
)

predict_options = ObjectDetectionPredictOptions(
merge_thresh=0.5, score_thresh=0.9)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ def get_config(runner,
test (bool, optional): If True, does the following simplifications:
(1) Uses only the first 2 scenes.
(2) Uses only a 2000x2000 crop of the scenes.
(3) Enables test mode in the learner, which makes it use the
test_batch_sz and test_num_epochs, among other things.
(3) Trains for only 2 epochs.
Defaults to False.
Returns:
Expand Down Expand Up @@ -114,13 +113,13 @@ def make_scene(scene_info):
model=ObjectDetectionModelConfig(backbone=Backbone.resnet50),
solver=SolverConfig(
lr=1e-4,
num_epochs=10,
test_num_epochs=2,
num_epochs=10 if not test else 2,
batch_sz=16,
one_cycle=True),
one_cycle=True,
),
log_tensorboard=True,
run_tensorboard=False,
test_mode=test)
)

return ObjectDetectionConfig(
root_uri=root_uri,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ def get_config(runner,
test (bool, optional): If True, does the following simplifications:
(1) Uses only the first 2 scenes
(2) Uses only a 600x600 crop of the scenes
(3) Enables test mode in the learner, which makes it use the
test_batch_sz and test_num_epochs, among other things.
(3) Trains for only 2 epochs and uses a batch size of 2.
Defaults to False.
Returns:
Expand Down Expand Up @@ -222,15 +221,10 @@ def make_scene(id) -> SceneConfig:
data=data,
model=model,
solver=SolverConfig(
lr=1e-4,
num_epochs=10,
test_num_epochs=2,
batch_sz=8,
test_batch_sz=2,
one_cycle=True),
lr=1e-4, num_epochs=10, batch_sz=8, one_cycle=True),
log_tensorboard=True,
run_tensorboard=False,
test_mode=test)
)

pipeline = SemanticSegmentationConfig(
root_uri=root_uri,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ def get_config(runner,
test (bool, optional): If True, does the following simplifications:
(1) Uses only the first 2 scenes
(2) Uses only a 600x600 crop of the scenes
(3) Enables test mode in the learner, which makes it use the
test_batch_sz and test_num_epochs, among other things.
(3) Trains for only 2 epochs and uses a batch size of 2.
Defaults to False.
Returns:
Expand Down Expand Up @@ -153,21 +152,18 @@ def get_config(runner,
# --------------------------------------------
model_config = SemanticSegmentationModelConfig(backbone=Backbone.resnet50)

num_epochs = NUM_EPOCHS if not test else TEST_MODE_NUM_EPOCHS
batch_sz = BATCH_SIZE if not test else TEST_MODE_BATCH_SIZE
solver_config = SolverConfig(
lr=LR,
num_epochs=NUM_EPOCHS,
batch_sz=BATCH_SIZE,
test_num_epochs=TEST_MODE_NUM_EPOCHS,
test_batch_sz=TEST_MODE_BATCH_SIZE,
one_cycle=ONE_CYCLE)
lr=LR, num_epochs=num_epochs, batch_sz=batch_sz, one_cycle=ONE_CYCLE)

backend_config = PyTorchSemanticSegmentationConfig(
data=data,
model=model_config,
solver=solver_config,
log_tensorboard=LOG_TENSORBOARD,
run_tensorboard=RUN_TENSORBOARD,
test_mode=test)
)

# -----------------------------------------------
# Pass configurations to the pipeline config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ def get_config(runner,
True.
test (bool, optional): If True, does the following simplifications:
(1) Uses only a small subset of training and validation scenes.
(2) Enables test mode in the learner, which makes it use the
test_batch_sz and test_num_epochs, among other things.
(2) Trains for only 2 epochs.
Defaults to False.
Returns:
Expand Down Expand Up @@ -214,15 +213,10 @@ def get_config(runner,
backend = PyTorchSemanticSegmentationConfig(
data=data,
model=SemanticSegmentationModelConfig(backbone=Backbone.resnet50),
solver=SolverConfig(
lr=1e-4,
num_epochs=5,
test_num_epochs=2,
batch_sz=8,
one_cycle=True),
solver=SolverConfig(lr=1e-4, num_epochs=5, batch_sz=8, one_cycle=True),
log_tensorboard=True,
run_tensorboard=False,
test_mode=test)
)

return SemanticSegmentationConfig(
root_uri=root_uri,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def get_learner_config(self, pipeline):
data=self.data,
model=self.model,
solver=self.solver,
test_mode=self.test_mode,
output_uri=pipeline.train_uri,
log_tensorboard=self.log_tensorboard,
run_tensorboard=self.run_tensorboard,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,17 @@
log = logging.getLogger(__name__)


@register_config('pytorch_learner_backend')
def pytorch_learner_backend_config_upgrader(cfg_dict: dict,
version: int) -> dict:
if version == 1:
# removed in version 2
cfg_dict.pop('test_mode', None)
return cfg_dict


@register_config(
'pytorch_learner_backend',
upgrader=pytorch_learner_backend_config_upgrader)
class PyTorchLearnerBackendConfig(BackendConfig):
"""Configure a :class:`.PyTorchLearnerBackend`."""

Expand All @@ -23,12 +33,6 @@ class PyTorchLearnerBackendConfig(BackendConfig):
run_tensorboard: bool = Field(
False,
description='If True, run Tensorboard server pointing at log files.')
test_mode: bool = Field(
False,
description=
('This field is passed along to the LearnerConfig which is returned by '
'get_learner_config(). For more info, see the docs for'
'pytorch_learner.learner_config.LearnerConfig.test_mode.'))
save_all_checkpoints: bool = Field(
False,
description=(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def get_learner_config(self, pipeline):
data=self.data,
model=self.model,
solver=self.solver,
test_mode=self.test_mode,
output_uri=pipeline.train_uri,
log_tensorboard=self.log_tensorboard,
run_tensorboard=self.run_tensorboard,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def get_learner_config(self, pipeline):
data=self.data,
model=self.model,
solver=self.solver,
test_mode=self.test_mode,
output_uri=pipeline.train_uri,
log_tensorboard=self.log_tensorboard,
run_tensorboard=self.run_tensorboard,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def register_plugin(registry):
registry.set_plugin_version('rastervision.pytorch_learner', 4)
registry.set_plugin_version('rastervision.pytorch_learner', 5)


import rastervision.pipeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def __init__(self,
else:
self.output_dir_local = get_local_path(self.output_dir, tmp_dir)
make_dir(self.output_dir_local, force_empty=True)
if self.training and not cfg.overfit_mode:
if self.training:
self.sync_from_cloud()
log.info(f'Local output dir: {self.output_dir_local}')
log.info(f'Remote output dir: {self.output_dir}')
Expand Down Expand Up @@ -394,25 +394,19 @@ def main(self):
resume if interrupted), logs stats, plots predictions, and syncs
results to the cloud.
"""
cfg = self.cfg
if not self.avoid_activating_cuda_runtime:
log_system_details()
log.info(self.cfg)
log.info(cfg)
log.info(f'Using device: {self.device}')
self.log_data_stats()
self.run_tensorboard()

cfg = self.cfg
if not cfg.predict_mode:
if not self.avoid_activating_cuda_runtime:
self.plot_dataloaders(self.cfg.data.preview_batch_limit)
if cfg.overfit_mode:
self.overfit()
else:
self.train()
if cfg.save_model_bundle:
self.save_model_bundle()
else:
self.load_checkpoint()
if not self.avoid_activating_cuda_runtime:
self.plot_dataloaders(cfg.data.preview_batch_limit)
self.train()
if cfg.save_model_bundle:
self.save_model_bundle()

self.stop_tensorboard()
if cfg.eval_train:
Expand Down Expand Up @@ -756,31 +750,6 @@ def on_epoch_end(self, curr_epoch: int, metrics: MetricDict) -> None:
if (curr_epoch + 1) % self.cfg.solver.sync_interval == 0:
self.sync_to_cloud()

def overfit(self):
"""Optimize model using the same batch repeatedly."""
self.on_overfit_start()

x, y = next(iter(self.train_dl))
x = self.to_device(x, self.device)
y = self.to_device(y, self.device)
batch = (x, y)

num_steps = self.cfg.solver.overfit_num_steps
with tqdm(range(num_steps), desc='Overfitting') as bar:
for step in bar:
loss = self.train_step(batch, step)['train_loss']
loss.backward()
self.opt.step()

if (step + 1) % 25 == 0:
log.info('\nstep: %d', step)
log.info('train_loss: %f', loss)

self.save_weights(self.last_model_weights_path)

def on_overfit_start(self):
"""Hook that is called at start of overfit routine."""

########################
# Prediction/inference
########################
Expand Down Expand Up @@ -1238,23 +1207,15 @@ def setup_data(self, distributed: Optional[bool] = None):
def build_datasets(self) -> Tuple['Dataset', 'Dataset', 'Dataset']:
"""Build Datasets for train, validation, and test splits."""
log.info(f'Building datasets ...')
cfg = self.cfg
train_ds, val_ds, test_ds = self.cfg.data.build(
tmp_dir=self.tmp_dir,
overfit_mode=cfg.overfit_mode,
test_mode=cfg.test_mode)
train_ds, val_ds, test_ds = self.cfg.data.build(tmp_dir=self.tmp_dir)
return train_ds, val_ds, test_ds

def build_dataset(self, split: Literal['train', 'valid', 'test']
) -> Tuple['Dataset', 'Dataset', 'Dataset']:
"""Build Dataset for split."""
log.info('Building %s dataset ...', split)
cfg = self.cfg
ds = cfg.data.build_dataset(
split=split,
tmp_dir=self.tmp_dir,
overfit_mode=cfg.overfit_mode,
test_mode=cfg.test_mode)
ds = cfg.data.build_dataset(split=split, tmp_dir=self.tmp_dir)
return ds

def build_dataloaders(self, distributed: Optional[bool] = None
Expand Down
Loading

0 comments on commit 9d034ac

Please sign in to comment.