diff --git a/padocc/core/project.py b/padocc/core/project.py index cccf921..588370a 100644 --- a/padocc/core/project.py +++ b/padocc/core/project.py @@ -7,7 +7,7 @@ import logging from .errors import error_handler -from .utils import extract_file, BypassSwitch +from .utils import extract_file, BypassSwitch, apply_substitutions from .logs import reset_file_handler from .mixins import DirectoryMixin, EvaluationsMixin @@ -267,7 +267,12 @@ def _configure_filelist(self): ) if pattern.endswith('.txt'): - self.allfiles.set(extract_file(pattern)) + content = extract_file(pattern) + if 'substitutions' in self.base_cfg: + content, status = apply_substitutions('datasets', subs=self.base_cfg['substitutions'], content=content) + if status: + self.logger.warning(status) + self.allfiles.set(content) else: # Pattern is a wildcard set of files if 'latest' in pattern: @@ -279,11 +284,13 @@ def _setup_config( self, pattern : str = None, update : str = None, - remove : str = None + remove : str = None, + substitutions: dict = None, ) -> None: """ Create base cfg json file with all required parameters. """ + self.logger.debug('Constructing the config file.') if pattern or update or remove: config = { @@ -292,6 +299,8 @@ def _setup_config( 'updates':update, 'removals':remove, } + if substitutions: + config['substitutions'] = substitutions self.base_cfg.set(config) @property diff --git a/padocc/core/utils.py b/padocc/core/utils.py index 6f57df5..65f9992 100644 --- a/padocc/core/utils.py +++ b/padocc/core/utils.py @@ -232,4 +232,17 @@ def find_closest(num, closest): if abs(d-closest) < min_diff: min_diff = abs(d-closest) closest_div = d - return closest_div \ No newline at end of file + return closest_div + +def apply_substitutions(subkey: str, subs: dict = None, content: list = None): + if not subs: + return content, "" + + if subkey not in subs: + return content, f"Subkey {subkey} is not valid for substitutions" + + content = '\n'.join(content) + for f, r in subs[subkey].items(): + content = content.replace(f,r) + + return content.split('\n') , "" \ No newline at end of file diff --git a/padocc/operations/mixins.py b/padocc/operations/mixins.py index 7612fd6..fd2b6d3 100644 --- a/padocc/operations/mixins.py +++ b/padocc/operations/mixins.py @@ -12,9 +12,9 @@ FalseLogger, LoggedOperation ) -from padocc.core.utils import extract_file, times +from padocc.core.utils import extract_file, times, apply_substitutions -from ..core.project import ProjectOperation +from padocc.core.project import ProjectOperation config = { 'proj_code': None, @@ -117,7 +117,9 @@ def _init_group(self, datasets : list, substitutions: dict = None): self.logger.info('Creating project directories') # Group config is the contents of datasets.csv if substitutions: - datasets = _apply_substitutions('init_file',subs=substitutions, content=datasets) + datasets, status = apply_substitutions('init_file',subs=substitutions, content=datasets) + if status: + self.logger.warning(status) self.datasets.set(datasets) @@ -136,8 +138,14 @@ def _open_json(file): proj_code = ds_values[0] pattern = ds_values[1] - if ds_values[1].endswith('.txt') and substitutions: - pattern = _apply_substitutions('dataset_file', subs=substitutions, content=[pattern])[0] + if pattern.endswith('.txt') and substitutions: + pattern, status = apply_substitutions('dataset_file', subs=substitutions, content=[pattern]) + pattern = pattern[0] + if status: + self.logger.warning(status) + + if substitutions: + cfg_values['substitutions'] = substitutions cfg_values['pattern'] = pattern proj_codes.append(proj_code) @@ -730,16 +738,5 @@ def _get_input( return config -def _apply_substitutions(subkey: str, subs: dict = None, content: list = None): - if not subs: - return content, "" - - if subkey not in subs: - return content, f"Subkey {subkey} is not valid for substitutions" - - content = '\n'.join(content) - for f, r in subs[subkey].items(): - content = content.replace(f,r) - return content.split('\n') \ No newline at end of file