diff --git a/test/base/trainer/test_accum_optimizer.py b/test/base/trainer/test_accum_optimizer.py index 0248764..ca99507 100644 --- a/test/base/trainer/test_accum_optimizer.py +++ b/test/base/trainer/test_accum_optimizer.py @@ -21,7 +21,7 @@ from test.util.store_logs_callback import StoreLogsCallback from tfaip.base import TrainerParams -from tfaip.scenario.tutorial.full.data import DataParams +from tfaip.scenario.tutorial.full.data.data_params import DataParams from tfaip.scenario.tutorial.full.scenario import TutorialScenario diff --git a/test/base/trainer/test_early_stopping.py b/test/base/trainer/test_early_stopping.py index 3b400f3..e51b5a5 100644 --- a/test/base/trainer/test_early_stopping.py +++ b/test/base/trainer/test_early_stopping.py @@ -20,7 +20,7 @@ from tensorflow.python.keras.backend import clear_session from tfaip.base import TrainerParams -from tfaip.scenario.tutorial.full.data import DataParams +from tfaip.scenario.tutorial.full.data.data_params import DataParams from tfaip.scenario.tutorial.full.scenario import TutorialScenario import logging logging.basicConfig(level=logging.DEBUG) diff --git a/test/base/trainer/test_ema.py b/test/base/trainer/test_ema.py index a6e8e36..1d9efac 100644 --- a/test/base/trainer/test_ema.py +++ b/test/base/trainer/test_ema.py @@ -21,7 +21,7 @@ from test.util.store_logs_callback import StoreLogsCallback from tfaip.base import TrainerParams -from tfaip.scenario.tutorial.full.data import DataParams +from tfaip.scenario.tutorial.full.data.data_params import DataParams from tfaip.scenario.tutorial.full.scenario import TutorialScenario diff --git a/test/base/trainer/test_multiple_lav_lists.py b/test/base/trainer/test_multiple_lav_lists.py index 05ad039..eedc6db 100644 --- a/test/base/trainer/test_multiple_lav_lists.py +++ b/test/base/trainer/test_multiple_lav_lists.py @@ -22,7 +22,7 @@ from test.util.store_logs_callback import StoreLogsCallback from tfaip.base import TrainerParams -from tfaip.scenario.tutorial.full.data import DataParams +from tfaip.scenario.tutorial.full.data.data_params import DataParams from tfaip.scenario.tutorial.full.scenario import TutorialScenario diff --git a/test/scenario/tutorial/test_tutorial_full.py b/test/scenario/tutorial/test_tutorial_full.py index 1d83a88..51104a4 100644 --- a/test/scenario/tutorial/test_tutorial_full.py +++ b/test/scenario/tutorial/test_tutorial_full.py @@ -21,7 +21,8 @@ from test.util.training import resume_training, single_train_iter, lav_test_case, warmstart_training_test_case from tfaip.base.data.databaseparams import DataGeneratorParams -from tfaip.scenario.tutorial.full.data import DataParams, Data +from tfaip.scenario.tutorial.full.data.data import Data +from tfaip.scenario.tutorial.full.data.data_params import DataParams from tfaip.scenario.tutorial.full.scenario import TutorialScenario @@ -66,7 +67,7 @@ def check(data): class TestTutorialTrain(unittest.TestCase): - def setUp(self) -> None: + def tearDown(self) -> None: clear_session() def test_single_train_iter(self): diff --git a/test/scripts/test_lav.py b/test/scripts/test_lav.py index 5ed13b8..3ced831 100644 --- a/test/scripts/test_lav.py +++ b/test/scripts/test_lav.py @@ -46,7 +46,6 @@ def test_multi_lav_tutorial(self): '--data_params', 'train.batch_size=2', ]) check_call(['tfaip-multi-lav', - '--scenario', 'tutorial.full', '--export_dirs', os.path.join(d, 'best'), os.path.join(d, 'best'), '--data', 'limit=10', ]) diff --git a/test/util/training.py b/test/util/training.py index 592c968..5ad0a56 100644 --- a/test/util/training.py +++ b/test/util/training.py @@ -17,6 +17,7 @@ # ============================================================================== import json import os +import sys import tempfile import time import unittest @@ -31,7 +32,11 @@ from tfaip.util.random import set_global_random_seed -def warmstart_training_test_case(test: unittest.TestCase, scenario, scenario_params: ScenarioBaseParams, debug=True, +debug_test = sys.flags.debug + + +def warmstart_training_test_case(test: unittest.TestCase, scenario, scenario_params: ScenarioBaseParams, + debug=debug_test, delta=None): # First train a normal iteration and store the results of metrics and losses with a fixed seed # Then reload the model as warmstart, train an epoch but with a learning rate of 0 @@ -77,7 +82,7 @@ def warmstart_training_test_case(test: unittest.TestCase, scenario, scenario_par test.assertAlmostEqual(v, initial_logs[k], delta=delta) -def single_train_iter(test: unittest.TestCase, scenario, scenario_params: ScenarioBaseParams, debug=True): +def single_train_iter(test: unittest.TestCase, scenario, scenario_params: ScenarioBaseParams, debug=debug_test): scenario_params.debug_graph_construction = debug scenario_params.debug_graph_n_examples = 1 trainer_params = TrainerParams( @@ -95,7 +100,7 @@ def single_train_iter(test: unittest.TestCase, scenario, scenario_params: Scenar trainer.train() -def lav_test_case(test: unittest.TestCase, scenario: Type[ScenarioBase], scenario_params, debug=True, +def lav_test_case(test: unittest.TestCase, scenario: Type[ScenarioBase], scenario_params, debug=False, delta=None): with tempfile.TemporaryDirectory() as tmp_dir: trainer_params = TrainerParams( @@ -147,7 +152,7 @@ def lav_test_case(test: unittest.TestCase, scenario: Type[ScenarioBase], scenari test.assertAlmostEqual(bs1_results[k], bs5_results[k], delta=delta, msg=f"on key {k}") -def resume_training(test: unittest.TestCase, scenario, scenario_params, delta=None): +def resume_training(test: unittest.TestCase, scenario, scenario_params, delta=None, debug=debug_test): # simulate by setting epochs to 1, then loading the trainer_params and setting epochs to 2 with tempfile.TemporaryDirectory() as tmp_dir: store_logs_callback = StoreLogsCallback() @@ -156,6 +161,7 @@ def resume_training(test: unittest.TestCase, scenario, scenario_params, delta=No epochs=1, samples_per_epoch=scenario_params.data_params.train.batch_size, skip_model_load_test=True, # not required in this test + force_eager=debug, export_final=False, export_best=False, scenario_params=scenario_params, diff --git a/tfaip/__init__.py b/tfaip/__init__.py index cdfa874..835ffb4 100644 --- a/tfaip/__init__.py +++ b/tfaip/__init__.py @@ -15,4 +15,4 @@ # You should have received a copy of the GNU General Public License along with # tfaip. If not, see http://www.gnu.org/licenses/. # ============================================================================== -__version__ = "1.0.0" +__version__ = "1.0.1" diff --git a/tfaip/base/data/databaseparams.py b/tfaip/base/data/databaseparams.py index 931397c..23e319e 100644 --- a/tfaip/base/data/databaseparams.py +++ b/tfaip/base/data/databaseparams.py @@ -22,7 +22,7 @@ from dataclasses_json import dataclass_json -from tfaip.base.data.pipeline.datapipeline import SamplePipelineParams +from tfaip.base.data.pipeline.sample.params import SamplePipelineParams from tfaip.util.argumentparser import dc_meta logger = logging.getLogger(__name__) diff --git a/tfaip/base/data/pipeline/datagenerator.py b/tfaip/base/data/pipeline/datagenerator.py new file mode 100644 index 0000000..068ae56 --- /dev/null +++ b/tfaip/base/data/pipeline/datagenerator.py @@ -0,0 +1,35 @@ +from abc import ABC, abstractmethod +from random import shuffle +from typing import Iterable, List + +from tfaip.base import DataGeneratorParams +from tfaip.base.data.pipeline.definitions import PipelineMode, Sample + + +class DataGenerator(ABC): + def __init__(self, mode: PipelineMode, params: 'DataGeneratorParams'): + params.validate() + self.mode = mode + self.params = params + + @abstractmethod + def __len__(self): + raise NotImplementedError + + @abstractmethod + def generate(self) -> Iterable[Sample]: + raise NotImplementedError + + +class RawDataGenerator(DataGenerator): + def __init__(self, raw_data: List[Sample], mode: PipelineMode, params: 'DataGeneratorParams'): + super(RawDataGenerator, self).__init__(mode, params) + self.raw_data = raw_data + + def __len__(self): + return len(self.raw_data) + + def generate(self) -> Iterable[Sample]: + if self.mode == PipelineMode.Training: + shuffle(self.raw_data) + return self.raw_data diff --git a/tfaip/base/data/pipeline/datapipeline.py b/tfaip/base/data/pipeline/datapipeline.py index 37e6f50..bb9682f 100644 --- a/tfaip/base/data/pipeline/datapipeline.py +++ b/tfaip/base/data/pipeline/datapipeline.py @@ -18,18 +18,15 @@ import copy import gc from abc import ABC, abstractmethod -from dataclasses import dataclass, field from functools import partial -from random import shuffle from typing import TYPE_CHECKING, List, Iterable, Optional, Callable, Type import logging -from dataclasses_json import dataclass_json - -from tfaip.base.data.pipeline.dataprocessor import DataProcessorFactory, SequenceProcessor, DataProcessor -from tfaip.base.data.pipeline.definitions import Sample, PipelineMode, DataProcessorFactoryParams, \ - GENERAL_PROCESSOR -from tfaip.base.data.pipeline.parallelpipeline import ParallelDataProcessorPipeline +from tfaip.base.data.pipeline.datagenerator import DataGenerator, RawDataGenerator +from tfaip.base.data.pipeline.dataprocessor import SequenceProcessor, DataProcessor +from tfaip.base.data.pipeline.definitions import Sample, PipelineMode +from tfaip.base.data.pipeline.sample.params import SamplePipelineParams +from tfaip.base.data.pipeline.sample.processorpipeline import SampleProcessorPipeline, ParallelSampleProcessingPipeline from tfaip.base.data.pipeline.tfdatasetgenerator import TFDatasetGenerator from tfaip.util.multiprocessing.join import JoinableHolder @@ -41,78 +38,6 @@ logger = logging.getLogger(__name__) -class SampleConsumer: - pass - - -def create_processor_fn(factory: DataProcessorFactory, processors: List[DataProcessorFactoryParams], params, mode: PipelineMode) -> SequenceProcessor: - return factory.create_sequence(processors, params, mode) - - -@dataclass_json -@dataclass -class SamplePipelineParams: - run_parallel: bool = True - sample_processors: List[DataProcessorFactoryParams] = field(default_factory=list) - - -class SampleProcessorPipeline: - def __init__(self, data_pipeline: 'DataPipeline', processor_fn: Optional[Callable[[], SequenceProcessor]] = None): - self.data_pipeline = data_pipeline - self.create_processor_fn = processor_fn - - def apply(self, samples: Iterable[Sample]) -> Iterable[Sample]: - if not self.create_processor_fn: - for sample in samples: - yield sample - else: - processor = self.create_processor_fn() - for sample in samples: - r = processor.apply_on_sample(sample) - if r is not None: - yield r - - -class ParallelSampleProcessingPipeline(SampleProcessorPipeline): - def apply(self, samples: Iterable[Sample]) -> Iterable[Sample]: - parallel_pipeline = ParallelDataProcessorPipeline(self.data_pipeline, samples, - create_processor_fn=self.create_processor_fn, - auto_repeat_input=False) - for x in parallel_pipeline.output_generator(): - yield x - - parallel_pipeline.join() - - -class DataGenerator(ABC): - def __init__(self, mode: PipelineMode, params: 'DataGeneratorParams'): - params.validate() - self.mode = mode - self.params = params - - @abstractmethod - def __len__(self): - raise NotImplementedError - - @abstractmethod - def generate(self) -> Iterable[Sample]: - raise NotImplementedError - - -class RawDataGenerator(DataGenerator): - def __init__(self, raw_data: List[Sample], mode: PipelineMode, params: 'DataGeneratorParams'): - super(RawDataGenerator, self).__init__(mode, params) - self.raw_data = raw_data - - def __len__(self): - return len(self.raw_data) - - def generate(self) -> Iterable[Sample]: - if self.mode == PipelineMode.Training: - shuffle(self.raw_data) - return self.raw_data - - def _create_sequence_processor_fn(factory, *args) -> Callable[[], SequenceProcessor]: return factory.create_sequence(*args) @@ -161,7 +86,9 @@ def auto_batch(self): def create_data_generator(self) -> DataGenerator: raise NotImplementedError - def flat_input_processors(self, preload=False, non_preloadable_params=[]) -> List[DataProcessor]: + def flat_input_processors(self, preload=False, non_preloadable_params=None) -> List[DataProcessor]: + if non_preloadable_params is None: + non_preloadable_params = [] factory = self.data.__class__.data_processor_factory() params: SamplePipelineParams = self._input_processors @@ -208,9 +135,6 @@ def create_output_pipeline(self) -> Optional[SampleProcessorPipeline]: return SampleProcessorPipeline(self, self._sequence_processor_fn(params)) return SampleProcessorPipeline(self) - def create_data_consumer(self) -> SampleConsumer: - return SampleConsumer() - def __enter__(self): from tfaip.base.data.pipeline.runningdatapipeline import RunningDataPipeline return RunningDataPipeline(self) @@ -254,7 +178,6 @@ def __init__(self, super(RawDataPipeline, self).__init__(mode, data_base, generator_params, input_processors, output_processors) self.samples = samples - def to_mode(self, mode: PipelineMode) -> 'DataPipeline': return self.__class__(self.samples, mode, self.data, self.generator_params, self._input_processors, self._output_processors) diff --git a/tfaip/base/data/pipeline/sample/__init__.py b/tfaip/base/data/pipeline/sample/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tfaip/base/data/pipeline/sample/params.py b/tfaip/base/data/pipeline/sample/params.py new file mode 100644 index 0000000..7e96878 --- /dev/null +++ b/tfaip/base/data/pipeline/sample/params.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass, field +from typing import List + +from dataclasses_json import dataclass_json + +from tfaip.base.data.pipeline.definitions import DataProcessorFactoryParams + + +@dataclass_json +@dataclass +class SamplePipelineParams: + run_parallel: bool = True + sample_processors: List[DataProcessorFactoryParams] = field(default_factory=list) diff --git a/tfaip/base/data/pipeline/sample/processorpipeline.py b/tfaip/base/data/pipeline/sample/processorpipeline.py new file mode 100644 index 0000000..3acd9ec --- /dev/null +++ b/tfaip/base/data/pipeline/sample/processorpipeline.py @@ -0,0 +1,40 @@ +from typing import List, Optional, Callable, Iterable, TYPE_CHECKING + +from tfaip.base.data.pipeline.dataprocessor import DataProcessorFactory, SequenceProcessor +from tfaip.base.data.pipeline.definitions import DataProcessorFactoryParams, Sample, PipelineMode +from tfaip.base.data.pipeline.parallelpipeline import ParallelDataProcessorPipeline + +if TYPE_CHECKING: + from tfaip.base.data.pipeline.datapipeline import DataPipeline + + +def create_processor_fn(factory: DataProcessorFactory, processors: List[DataProcessorFactoryParams], params, mode: PipelineMode) -> SequenceProcessor: + return factory.create_sequence(processors, params, mode) + + +class SampleProcessorPipeline: + def __init__(self, data_pipeline: 'DataPipeline', processor_fn: Optional[Callable[[], SequenceProcessor]] = None): + self.data_pipeline = data_pipeline + self.create_processor_fn = processor_fn + + def apply(self, samples: Iterable[Sample]) -> Iterable[Sample]: + if not self.create_processor_fn: + for sample in samples: + yield sample + else: + processor = self.create_processor_fn() + for sample in samples: + r = processor.apply_on_sample(sample) + if r is not None: + yield r + + +class ParallelSampleProcessingPipeline(SampleProcessorPipeline): + def apply(self, samples: Iterable[Sample]) -> Iterable[Sample]: + parallel_pipeline = ParallelDataProcessorPipeline(self.data_pipeline, samples, + create_processor_fn=self.create_processor_fn, + auto_repeat_input=False) + for x in parallel_pipeline.output_generator(): + yield x + + parallel_pipeline.join() diff --git a/tfaip/base/lav/multilav.py b/tfaip/base/lav/multilav.py index 2369d64..30bc7b4 100644 --- a/tfaip/base/lav/multilav.py +++ b/tfaip/base/lav/multilav.py @@ -43,6 +43,7 @@ def __init__(self, data_gen_params: DataGeneratorParams, predictor_fn: Callable[[List[str], PredictorParams], MultiModelPredictor], evaluator: Evaluator, + predictor_params: PredictorParams ): assert params.model_path self._params = params @@ -51,6 +52,10 @@ def __init__(self, self._evaluator = evaluator self.device_config = DeviceConfig(self._params.device_params) self.benchmark_results = PredictorBenchmarkResults() + self.predictor_params = predictor_params + predictor_params.silent = True + predictor_params.progress_bar = True + predictor_params.include_targets = True @distribute_strategy def run(self, @@ -60,11 +65,8 @@ def run(self, if callbacks is None: callbacks = [] - predictor_params = PredictorParams(self._params.device_params, - silent=True, progress_bar=True, run_eagerly=run_eagerly, - include_targets=True, - ) - predictor = self._predictor_fn(self._params.model_path, predictor_params) + self.predictor_params.run_eagerly = run_eagerly + predictor = self._predictor_fn(self._params.model_path, self.predictor_params) lav_pipeline = predictor.data.get_pipeline(PipelineMode.Evaluation, self._data_gen_params) for cb in callbacks: diff --git a/tfaip/base/scenario/scenariobase.py b/tfaip/base/scenario/scenariobase.py index 67438ec..f436727 100644 --- a/tfaip/base/scenario/scenariobase.py +++ b/tfaip/base/scenario/scenariobase.py @@ -252,12 +252,13 @@ def create_lav(cls, lav_params: 'LAVParams', scenario_params: 'ScenarioBaseParam ) @classmethod - def create_multi_lav(cls, lav_params: 'LAVParams', scenario_params: 'ScenarioBaseParams'): + def create_multi_lav(cls, lav_params: 'LAVParams', scenario_params: 'ScenarioBaseParams', predictor_params: Optional['PredictorParams']=None): return MultiLAV( lav_params, scenario_params.data_params.val, cls.create_multi_predictor, cls.create_evaluator(scenario_params.evaluator_params), + predictor_params=predictor_params if predictor_params is not None else cls.multi_predictor_cls().get_params_cls()(), ) @classmethod diff --git a/tfaip/base/trainer/callbacks/progbar.py b/tfaip/base/trainer/callbacks/progbar.py index 1a2c5bd..3675b44 100644 --- a/tfaip/base/trainer/callbacks/progbar.py +++ b/tfaip/base/trainer/callbacks/progbar.py @@ -11,6 +11,11 @@ def __init__(self, delta_time=5, **kwargs): self._delta_time = delta_time # Output every 5 secs, by default self._last_time = time.time() + def on_epoch_begin(self, epoch, logs=None): + self._last_time = time.time() + self._time_remaining = 0 + super(TFAIPProgbarLogger, self).on_epoch_begin(epoch, logs) + def _batch_update_progbar(self, batch, logs=None): super(TFAIPProgbarLogger, self)._batch_update_progbar(batch, logs) if self.verbose == 2: diff --git a/tfaip/scenario/tutorial/full/data/__init__.py b/tfaip/scenario/tutorial/full/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tfaip/scenario/tutorial/full/data.py b/tfaip/scenario/tutorial/full/data/data.py similarity index 50% rename from tfaip/scenario/tutorial/full/data.py rename to tfaip/scenario/tutorial/full/data/data.py index a1543cd..a599c5f 100644 --- a/tfaip/scenario/tutorial/full/data.py +++ b/tfaip/scenario/tutorial/full/data/data.py @@ -15,70 +15,54 @@ # You should have received a copy of the GNU General Public License along with # tfaip. If not, see http://www.gnu.org/licenses/. # ============================================================================== -import glob -from dataclasses import dataclass, field import logging -from typing import Iterable, Type +from typing import Type import tensorflow as tf import tensorflow.keras as keras -from dataclasses_json import dataclass_json from tfaip.base.data.data import DataBaseParams, DataBase from tfaip.base.data.databaseparams import DataGeneratorParams -from tfaip.base.data.pipeline.datapipeline import DataPipeline, DataGenerator, RawDataGenerator, RawDataPipeline +from tfaip.base.data.pipeline.datapipeline import RawDataPipeline, SamplePipelineParams from tfaip.base.data.listfile.listfiledata import ListFilePipelineParams from tfaip.base.data.pipeline.dataprocessor import DataProcessorFactory -from tfaip.base.data.pipeline.definitions import PipelineMode, Sample -from tfaip.util.argumentparser import dc_meta -from tfaip.util.imaging.io import load_image_from_img_file +from tfaip.base.data.pipeline.definitions import PipelineMode, DataProcessorFactoryParams +from tfaip.scenario.tutorial.full.data.data_params import DataParams +from tfaip.scenario.tutorial.full.data.data_pipeline import TutorialPipeline, to_samples +from tfaip.scenario.tutorial.full.data.processors.normalize import NormalizeProcessor logger = logging.getLogger(__name__) -@dataclass_json -@dataclass -class DataParams(DataBaseParams): - dataset: str = field(default='mnist', metadata=dc_meta( - help="The dataset to select (chose also fashion_mnist)." - )) - - -def to_samples(samples): - return [Sample(inputs={'img': img}, targets={'gt': gt.reshape((1,))}) for img, gt in zip(*samples)] - - class Data(DataBase): @classmethod def data_processor_factory(cls) -> DataProcessorFactory: - return DataProcessorFactory([]) + # List all available processors here + return DataProcessorFactory([NormalizeProcessor]) @classmethod - def data_pipeline_cls(cls) -> Type[DataPipeline]: - class TutorialPipeline(DataPipeline): - def create_data_generator(self) -> DataGenerator: - if self.mode == PipelineMode.Training: - return RawDataGenerator(to_samples(self.data.train), self.mode, self.generator_params) - elif self.mode == PipelineMode.Evaluation: - return RawDataGenerator(to_samples(self.data.test), self.mode, self.generator_params) - elif self.mode == PipelineMode.Prediction: - if isinstance(self.generator_params, ListFilePipelineParams): - # Instead of loading images to a raw pipeline, you should create a custom preprocessing pipeline - # That is used during training and prediction - assert self.generator_params.list, "No images provided" - return RawDataGenerator( - [Sample(inputs={'img': img}) for img in map(load_image_from_img_file, glob.glob(self.generator_params.list))], - self.mode, self.generator_params) - else: - return RawDataGenerator(to_samples(self.data.test), self.mode, self.generator_params) - elif self.mode == PipelineMode.Targets: - return RawDataGenerator(to_samples(self.data.test), self.mode, self.generator_params) + def data_pipeline_cls(cls) -> Type[TutorialPipeline]: return TutorialPipeline @classmethod def prediction_generator_params_cls(cls) -> Type[DataGeneratorParams]: return ListFilePipelineParams + @classmethod + def get_default_params(cls) -> DataBaseParams: + params = super(Data, cls).get_default_params() + # Define the default python input pipeline by specifying the list of processors + # A DataProcessorFactoryParams requires the name of the class registered above in data_processor_factory + # The second argument is the mode when to apply (Training (e.g., data augmentation), Prediction, Evaluation + # (=validation during training), Targets (only produce GroundTruth)), the third parameter are optional args. + params.pre_processors_ = SamplePipelineParams( + run_parallel=True, # Run the pipeline in parallel (by spawning subprocesses) + sample_processors=[ + DataProcessorFactoryParams(NormalizeProcessor.__name__) + ]) + + return params + @staticmethod def get_params_cls(): return DataParams @@ -97,7 +81,7 @@ def _input_layer_specs(self): def _target_layer_specs(self): return {'gt': tf.TensorSpec(shape=[1], dtype='uint8')} - def _list_lav_dataset(self) -> Iterable[DataPipeline]: + def _list_lav_dataset(self): # Create two evaluation datasets using test and train data test = RawDataPipeline(to_samples(self.test), PipelineMode.Evaluation, self, self._params.val) train = RawDataPipeline(to_samples(self.train), PipelineMode.Evaluation, self, self._params.val) diff --git a/tfaip/scenario/tutorial/full/data/data_params.py b/tfaip/scenario/tutorial/full/data/data_params.py new file mode 100644 index 0000000..1abe639 --- /dev/null +++ b/tfaip/scenario/tutorial/full/data/data_params.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass, field + +from dataclasses_json import dataclass_json + +from tfaip.base import DataBaseParams +from tfaip.util.argumentparser import dc_meta + + +@dataclass_json +@dataclass +class DataParams(DataBaseParams): + dataset: str = field(default='mnist', metadata=dc_meta( + help="The dataset to select (chose also fashion_mnist)." + )) diff --git a/tfaip/scenario/tutorial/full/data/data_pipeline.py b/tfaip/scenario/tutorial/full/data/data_pipeline.py new file mode 100644 index 0000000..20e8505 --- /dev/null +++ b/tfaip/scenario/tutorial/full/data/data_pipeline.py @@ -0,0 +1,31 @@ +import glob + +from tfaip.base import ListFilePipelineParams +from tfaip.base.data.pipeline.datapipeline import DataPipeline, DataGenerator, RawDataGenerator +from tfaip.base.data.pipeline.definitions import PipelineMode, Sample +from tfaip.util.imaging.io import load_image_from_img_file + + +def to_samples(samples): + return [Sample(inputs={'img': img}, targets={'gt': gt.reshape((1,))}) for img, gt in zip(*samples)] + + +class TutorialPipeline(DataPipeline): + def create_data_generator(self) -> DataGenerator: + if self.mode == PipelineMode.Training: + return RawDataGenerator(to_samples(self.data.train), self.mode, self.generator_params) + elif self.mode == PipelineMode.Evaluation: + return RawDataGenerator(to_samples(self.data.test), self.mode, self.generator_params) + elif self.mode == PipelineMode.Prediction: + if isinstance(self.generator_params, ListFilePipelineParams): + # Instead of loading images to a raw pipeline, you should create a custom preprocessing pipeline + # That is used during training and prediction + assert self.generator_params.list, "No images provided" + return RawDataGenerator( + [Sample(inputs={'img': img}) for img in + map(load_image_from_img_file, glob.glob(self.generator_params.list))], + self.mode, self.generator_params) + else: + return RawDataGenerator(to_samples(self.data.test), self.mode, self.generator_params) + elif self.mode == PipelineMode.Targets: + return RawDataGenerator(to_samples(self.data.test), self.mode, self.generator_params) diff --git a/tfaip/scenario/tutorial/full/data/processors/__init__.py b/tfaip/scenario/tutorial/full/data/processors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tfaip/scenario/tutorial/full/data/processors/normalize.py b/tfaip/scenario/tutorial/full/data/processors/normalize.py new file mode 100644 index 0000000..69f6803 --- /dev/null +++ b/tfaip/scenario/tutorial/full/data/processors/normalize.py @@ -0,0 +1,13 @@ +from tfaip.base.data.pipeline.dataprocessor import DataProcessor +from tfaip.base.data.pipeline.definitions import Sample + + +class NormalizeProcessor(DataProcessor): + """ + Example class to show how to use processors that are run in parallel in the samples in the input pipeline. + This processor will normalize and center the input sample in the range of [-1, 1] (we know the input is in [0, 255] + """ + def apply(self, sample: Sample) -> Sample: + inputs = sample.inputs.copy() + inputs['img'] = ((inputs['img'] / 255) - 0.5) * 2 + return sample.new_inputs(inputs) diff --git a/tfaip/scenario/tutorial/full/model.py b/tfaip/scenario/tutorial/full/model.py index dfbb2bd..8dd0342 100644 --- a/tfaip/scenario/tutorial/full/model.py +++ b/tfaip/scenario/tutorial/full/model.py @@ -23,6 +23,7 @@ import numpy as np from tfaip.base.imports import ModelBaseParams, ModelBase, MetricDefinition, MultiMetricDefinition +from tfaip.base.model.graphbase import GraphBase from tfaip.base.model.metric.multi import MultiMetric from tfaip.base.model.util.graph_enum import create_graph_enum from tfaip.base.trainer.callbacks.tensor_board_data_handler import TensorBoardDataHandler @@ -46,6 +47,8 @@ class ModelParams(ModelBaseParams): class TutorialModel(ModelBase): @staticmethod def get_params_cls(): + # Return the class type of the model params for this model + # These are the params defined above return ModelParams def create_graph(self, params) -> 'GraphBase': @@ -55,36 +58,53 @@ def _best_logging_settings(self): return "max", "acc" def _loss(self, inputs, outputs) -> Dict[str, AnyTensor]: + # Loss functions for the model (if several, you can weight them by overriding _loss_weights) return {'loss': tf.keras.layers.Lambda( lambda x: tf.keras.metrics.sparse_categorical_crossentropy(*x, from_logits=True), name='loss')( (inputs['gt'], outputs['logits']))} def _extended_metric(self, inputs, outputs) -> Dict[str, tf.keras.layers.Layer]: + # Extended metric is an alternative to _metric if a metric requires more than one target or output + # Override _sample_weights to provide a weighting factor for different samples in a batch return {'acc': tf.keras.layers.Lambda(lambda x: tf.keras.metrics.sparse_categorical_accuracy(*x), name='acc')( (inputs['gt'], outputs['pred']))} def _metric(self): + # Return a dict of metrics. The MetricDefinition defines the target and output which is passed to the + # respective metric. If more than one target or output is required to compute a (custom) metric, use + # _extended_metric instead return {'simple_acc': MetricDefinition("gt", "class", keras.metrics.Accuracy())} def _multi_metric(self) -> Dict[str, MultiMetricDefinition]: # Example showing how to manipulate true and pred for sub metrics class MyMultiMetric(MultiMetric): def _precompute_values(self, y_true, y_pred, sample_weight): + # Compute some intermediate values that will be used in the sub metrics + # Here, the Identity is returned, and applied to the default keras Accuracy metrics (see below) return y_true, y_pred, sample_weight return {'multi_metric': MultiMetricDefinition('gt', 'class', MyMultiMetric([keras.metrics.Accuracy(name='macc1'), keras.metrics.Accuracy(name='macc2')]))} def _print_evaluate(self, inputs, outputs: Dict[str, AnyNumpy], targets: Dict[str, AnyNumpy], data, print_fn=print): + # Print informative text during validation correct = outputs['class'] == targets['gt'] print_fn(f"PRED/GT: {outputs['class']}{'==' if correct else '!='}{targets['gt']} (p = {outputs['pred'][outputs['class']]})") def _create_tensorboard_handler(self) -> 'TensorBoardDataHandler': + # This tensorboard handler shows how to write image data (last batch of validation) to the Tensorboard + # The image is the output of the conv layers + # See TensorBoardDataHandler for further options class TutorialTBHandler(TensorBoardDataHandler): def _outputs_for_tensorboard(self, inputs, outputs) -> Dict[str, AnyTensor]: + # List the outputs of the model that are used for the Tensorboard + # Here, access the 'conv_out' return {k: v for k, v in outputs.items() if k in ['conv_out']} def handle(self, name, name_for_tb, value, step): + # Override handle to state, that something other than writing a scalar must be performed + # for a output. Value is the output of the network as numpy array if name == 'conv_out': + # Create the image data as numpt array b, w, h, c = value.shape ax_dims = int(np.ceil(np.sqrt(c))) out_conv_v = np.zeros([b, w * ax_dims, h * ax_dims, 1]) @@ -92,8 +112,11 @@ def handle(self, name, name_for_tb, value, step): x = i % ax_dims y = i // ax_dims out_conv_v[:,x*w:(x+1)*w,y*h:(y+1)*h, 0] = value[:,:,:,i] + + # Write the image (use 'name_for_tb' and step) tf.summary.image(name_for_tb, out_conv_v, step=step) else: + # The default case, write a scalar super(TutorialTBHandler, self).handle(name, name_for_tb, value, step) return TutorialTBHandler() diff --git a/tfaip/scenario/tutorial/full/predictor.py b/tfaip/scenario/tutorial/full/predictor.py index 0c058e6..31b3c72 100644 --- a/tfaip/scenario/tutorial/full/predictor.py +++ b/tfaip/scenario/tutorial/full/predictor.py @@ -20,6 +20,10 @@ class TutorialVoter(MultiModelVoter): + """ + This MultiModelVoter performs a majority vote of several predictions. + Alternatively, one could sum up all probabilities of the classes and pick the argmax. + """ def vote(self, sample: Sample) -> Sample: # sample.outputs is a list of the output of each model # just do a majority voting @@ -33,5 +37,10 @@ def vote(self, sample: Sample) -> Sample: class TutorialMultiModelPredictor(MultiModelPredictor): + """ + Tutorial class for a MultiModelPredictor to show how to implement a voting mechanism to vote the output of + multiple models. + """ def create_voter(self, data_params: 'DataBaseParams') -> MultiModelVoter: + # Create an instance of the voter return TutorialVoter() diff --git a/tfaip/scenario/tutorial/full/scenario.py b/tfaip/scenario/tutorial/full/scenario.py index 2966d63..bc04210 100644 --- a/tfaip/scenario/tutorial/full/scenario.py +++ b/tfaip/scenario/tutorial/full/scenario.py @@ -24,7 +24,6 @@ from tfaip.base.data.pipeline.definitions import Sample from tfaip.base.evaluator.evaluator import Evaluator from tfaip.base.imports import ScenarioBase, ScenarioBaseParams, ModelBase -from tfaip.scenario.tutorial.full.data import Data from tfaip.scenario.tutorial.full.model import TutorialModel from tfaip.scenario.tutorial.full.predictor import TutorialMultiModelPredictor from tfaip.util.typing import AnyNumpy @@ -43,6 +42,7 @@ def model_cls(cls) -> Type['ModelBase']: @classmethod def data_cls(cls) -> Type['DataBase']: + from tfaip.scenario.tutorial.full.data.data import Data return Data @staticmethod diff --git a/tfaip/scripts/lav.py b/tfaip/scripts/lav.py index cb1bd6e..69a2764 100644 --- a/tfaip/scripts/lav.py +++ b/tfaip/scripts/lav.py @@ -50,7 +50,9 @@ def parse_args(args=None): from tfaip.base.imports import ScenarioBase parser = TFAIPArgumentParser() parser.add_argument('--export_dir', required=True) - parser.add_argument('--run_eagerly', action='store_true', help="Run the graph in eager mode. This is helpful for debugging. Note that all custom layers must be added to ModelBase!") + parser.add_argument('--run_eagerly', action='store_true', + help="Run the graph in eager mode. This is helpful for debugging. " + "Note that all custom layers must be added to ModelBase!") parser.add_argument('--dump', type=str, help='Dump the predictions and results to the given filepath') args, unknown_args = parser.parse_known_args(args) diff --git a/tfaip/scripts/lav_multi.py b/tfaip/scripts/lav_multi.py index b56a8f3..9c9e6fb 100644 --- a/tfaip/scripts/lav_multi.py +++ b/tfaip/scripts/lav_multi.py @@ -30,7 +30,7 @@ def run(): main(*parse_args()) -def main(args, scenario_cls, scenario_params): +def main(args, scenario_cls, scenario_params, predictor_params): callbacks = [] if args.dump: callbacks.append(DumpResultsCallback(args.dump)) @@ -40,7 +40,7 @@ def main(args, scenario_cls, scenario_params): logger.info("lav_params=" + lav_params.to_json(indent=2)) # create the lav and run it - lav = scenario_cls.create_multi_lav(lav_params, scenario_params) + lav = scenario_cls.create_multi_lav(lav_params, scenario_params, predictor_params) for i, r in enumerate(lav.run(run_eagerly=args.run_eagerly, callbacks=callbacks)): print(json.dumps(r, indent=2)) lav.benchmark_results.pretty_print() @@ -50,22 +50,24 @@ def parse_args(args=None): from tfaip.base.scenario.scenariobase import ScenarioBase parser = TFAIPArgumentParser() - parser.add_argument('--scenario', type=str, required=True) parser.add_argument('--export_dirs', required=True, nargs='+') parser.add_argument('--run_eagerly', action='store_true', help="Run the graph in eager mode. This is helpful for debugging. Note that all custom layers must be added to ModelBase!") parser.add_argument('--dump', type=str, help='Dump the predictions and results to the given filepath') args, unknown_args = parser.parse_known_args(args) - scenario, scenario_params = ScenarioBase.from_path(args.export_dirs[0]) + scenario, scenario_params = ScenarioBase.from_path(args.export_dirs[0]) # scenario based on first model pipeline_params = scenario_params.data_params.val lav_params = scenario.lav_cls().get_params_cls()() lav_params.model_path = args.export_dirs + predictor_params = scenario.multi_predictor_cls().get_params_cls()() parser = TFAIPArgumentParser() add_args_group(parser, group='lav_params', default=lav_params, params_cls=scenario.lav_cls().get_params_cls()) + add_args_group(parser, group='predictor_params', default=predictor_params, params_cls=scenario.multi_predictor_cls().get_params_cls(), + exclude_field_names={'device_params', 'silent', 'progress_bar', 'run_eagerly', 'include_targets'}) add_args_group(parser, group='data', default=pipeline_params, params_cls=pipeline_params.__class__) - return parser.parse_args(unknown_args, namespace=args), scenario, scenario_params + return parser.parse_args(unknown_args, namespace=args), scenario, scenario_params, predictor_params if __name__ == '__main__': diff --git a/tfaip/scripts/train.py b/tfaip/scripts/train.py index 313a3e7..ea52765 100644 --- a/tfaip/scripts/train.py +++ b/tfaip/scripts/train.py @@ -56,7 +56,6 @@ def parse_args(args=None): default_trainer_params = scenario_def.scenario.default_trainer_params() add_args_group(p, group='trainer_params', default=default_trainer_params, params_cls=default_trainer_params.__class__) - return parser.parse_args(args) diff --git a/tfaip/util/argumentparser/parser.py b/tfaip/util/argumentparser/parser.py index 6dbc84b..0528326 100644 --- a/tfaip/util/argumentparser/parser.py +++ b/tfaip/util/argumentparser/parser.py @@ -165,9 +165,14 @@ def is_list_field(field): ) -def make_store_dataclass_action(data_cls: Any, required_fields: List): +def make_store_dataclass_action(data_cls: Any, required_fields: List, exclude_field_names: List[str] = None): + if exclude_field_names is None: + exclude_field_names = [] safe_separator = ":" all_fields = fields_to_dict(data_cls, safe_separator=safe_separator) + for name in exclude_field_names: + if name in all_fields: + del all_fields[name] required_fields.extend([field for name, field in all_fields.items() if field.metadata.get('required', False)]) class DataClassAction(Action): @@ -335,18 +340,23 @@ def argument_list_to_str(arguments): return "[" + ', '.join(f"{name}" for name in arguments) + "]" -def add_args_group(parser: TFAIPArgumentParser, group: str, params_cls: Any, default=None): +def add_args_group(parser: TFAIPArgumentParser, group: str, params_cls: Any, default=None, exclude_field_names=None): + if exclude_field_names is None: + exclude_field_names = [] assert(isinstance(parser, TFAIPArgumentParser)) default = default if default else params_cls() params_cls = default.__class__ parser.add_argument("--" + group, - action=make_store_dataclass_action(params_cls, parser.get_required_fields("--" + group)), + action=make_store_dataclass_action(params_cls, parser.get_required_fields("--" + group), + exclude_field_names=exclude_field_names), default=default, nargs='*', metavar="KEY=VAL", help=generate_help(params_cls)) for name, field in params_cls.__dataclass_fields__.items(): + if name in exclude_field_names: + continue if field_is_dataclass(field) and not name.endswith("_"): if 'arg_mode' not in field.metadata or field.metadata['arg_mode'] == 'flat': add_args_group(parser, group=name, params_cls=extract_dataclass_from_field(field), default=getattr(default, name))