diff --git a/docs/source/config.rst b/docs/source/config.rst index 9a3011f6..a701fab5 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -396,7 +396,7 @@ Postprocessing ``sync`` Sync archive to a remote directory using rsync. Make sure that the - configured path to sync output to, i.e. ``path`` is the correct location + configured path to sync output to, i.e. ``path``, is the correct location before enabling automatic syncing or before running ``payu sync``. If postscript is also configured, the latest output and restart files will @@ -409,7 +409,7 @@ Postprocessing ``queue`` (*Default:* ``copyq``) PBS queue used to submit the sync job. - ``walltime`` + ``walltime`` (*Default:* ``10:00:00``) Time required to run the job. ``mem`` (*Default:* ``2GB``) @@ -419,7 +419,7 @@ Postprocessing Number of ncpus required for the job. ``path`` - Destination path to copy archive outputs to. This must be a unique + Destination path to sync archive outputs to. This must be a unique absolute path for your experiment, otherwise, outputs will be overwritten. @@ -427,13 +427,22 @@ Postprocessing Sync permanently archived restarts, which are determined by ``restart_freq``. - ``rsync_flags`` - Additional flags to add to rsync commands used for syncing files. Note - that these will be added to the default flags ``-vrltoD --safe-links``. - + ``rsync_flags`` (*Default:* ``-vrltoD --safe-links``) + Additional flags to add to rsync commands used for syncing files. + ``exclude`` - Patterns to exclude from rsync commands. This can be a single pattern or - a list of patterns. This is equivalent to rsync's ``--exclude PATTERN``. + Patterns to exclude from rsync commands. This is equivalent to rsync's + ``--exclude PATTERN``. This can be a single pattern or a list of + patterns. If a pattern includes any special characters, + e.g. ``.*+?|[]{}()``, it will need to be quoted. For example:: + + exclude: + - 'iceh.????-??-??.nc' + - '*-IN-PROGRESS' + + ``exclude_uncollated`` (*Default:* ``True`` if collation is enabled) + Flag to exclude uncollated files from being synced. This is equivalent + to adding ``--exclude *.nc.*``. ``extra_paths`` List of ``glob`` patterns which match extra paths to sync to remote @@ -443,8 +452,8 @@ Postprocessing ``remove_local_files`` (*Default:* ``False``) Remove local files once they are successfully synced to the remote archive. Files in protected paths will not be deleted. Protected paths - include the last output, the last saved restart (determined by - ``restart_freq``), and any subsequent restarts. + include the ``extra_paths`` (if defined), last output, the last saved + restart (determined by ``restart_freq``), and any subsequent restarts. ``remove_local_dirs`` (*Default:* ``False``) Remove local directories once a directory has been successfully synced. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 9ba721a6..773bbf55 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -300,7 +300,8 @@ Then run:: payu sync -To sync all restarts including the latest restarts, use the --sync-restarts -flag:: +By default ``payu sync`` will not sync the latest restarts that may be pruned +at a later date. To sync all restarts including the latest restarts, use the +``--sync-restarts`` flag:: payu sync --sync-restarts diff --git a/payu/subcommands/sync_cmd.py b/payu/subcommands/sync_cmd.py index 9618717e..253b3da5 100644 --- a/payu/subcommands/sync_cmd.py +++ b/payu/subcommands/sync_cmd.py @@ -33,6 +33,7 @@ def runcmd(model_type, config_path, lab_path, dir_path, sync_restarts, default_ncpus = 1 default_queue = 'copyq' default_mem = '2GB' + default_walltime = '10:00:00' pbs_config['queue'] = sync_config.get('queue', default_queue) @@ -40,6 +41,8 @@ def runcmd(model_type, config_path, lab_path, dir_path, sync_restarts, pbs_config['mem'] = sync_config.get('mem', default_mem) + pbs_config['walltime'] = sync_config.get('walltime', default_walltime) + sync_jobname = sync_config.get('jobname') if not sync_jobname: pbs_jobname = pbs_config.get('jobname') @@ -53,31 +56,7 @@ def runcmd(model_type, config_path, lab_path, dir_path, sync_restarts, pbs_config['jobname'] = sync_jobname[:15] - # Replace (or remove) walltime - walltime = sync_config.get('walltime') - if walltime: - pbs_config['walltime'] = walltime - else: - # Remove walltime if set - try: - pbs_config.pop('walltime') - except KeyError: - pass - - # Disable hyperthreading - qsub_flags = [] - iflags = iter(pbs_config.get('qsub_flags', '').split()) - for flag in iflags: - if flag == '-l': - try: - flag += ' ' + next(iflags) - except StopIteration: - break - - if 'hyperthread' not in flag: - qsub_flags.append(flag) - - pbs_config['qsub_flags'] = ' '.join(qsub_flags) + pbs_config['qsub_flags'] = sync_config.get('qsub_flags', '') cli.submit_job('payu-sync', pbs_config, pbs_vars) diff --git a/payu/sync.py b/payu/sync.py index 8ebba8ea..88026074 100644 --- a/payu/sync.py +++ b/payu/sync.py @@ -121,59 +121,30 @@ def add_extra_source_paths(self): def set_destination_path(self): "set or create destination path to sync archive to" - # Remote archive user - default_user = getpass.getuser() - remote_user = self.config.get('user', default_user) - # Remote path to sync output to dest_path = self.config.get('path', None) + if dest_path is None: + print("There's is no configured path to sync output to. " + "In config.yaml, set:\n" + " sync:\n path: PATH/TO/REMOTE/ARCHIVE\n" + "Replace PATH/TO/REMOTE/ARCHIVE with a unique absolute path " + "to sync outputs to. Ensure path is unique to avoid " + "overwriting exsiting output!") + raise ValueError("payu: error: Sync path is not defined.") + if not self.remote_syncing: - if dest_path is None: - # Automate destination path to: - # /g/data/{project}/{user}/{model}/{experiment_name}/archive - project = self.expt.config.get('project', - os.environ['PROJECT']) - dest_path = os.path.join('/', 'g', 'data', project, - remote_user, self.expt.model_name, - self.expt.name, 'archive') - - # Create destination directory if not already exists + # Create local destination directory if it does not exist mkdir_p(dest_path) else: - # Top-level path is implicitly set by the SSH key - # (Usually /projects/[group]) - - # Remote mkdir is currently not possible, so any new subdirectories - # must be created before auto-archival - if dest_path is None: - os.path.join(self.expt.model_name, self.expt.name, 'archive') - dest_path = f'{remote_user}@{self.remote_url}:{dest_path}' + # Syncing to remote machine + remote_user = self.config.get('user', None) + if remote_user is not None: + dest_path = f'{remote_user}@{self.remote_url}:{dest_path}' + else: + dest_path = f'{self.remote_url}:{dest_path}' self.destination_path = dest_path - def set_base_rsync_cmd(self): - """Set base rsync command with default rsync flags, any configured - additional flags, rsync protocol, and ssh-key (if remote syncing)""" - rsync_cmd = f'rsync -vrltoD --safe-links' - - # Add any additional rsync flags - additional_rsync_flags = self.config.get('rsync_flags', None) - if additional_rsync_flags: - rsync_cmd += f' {additional_rsync_flags}' - - # Add rsync protocol, if defined - rsync_protocol = self.config.get('rsync_protocol', None) - if rsync_protocol: - rsync_cmd += f' --protocol={rsync_protocol}' - - # Add remote host rsync options - if self.remote_syncing: - ssh_key_path = os.path.join(os.getenv('HOME'), '.ssh', - 'id_rsa_file_transfer') - rsync_cmd += f' -e "ssh -i {ssh_key_path}"' - - self.base_rsync_cmd = rsync_cmd - def set_excludes_flags(self): """Add lists of patterns of filepaths to exclude from sync commands""" # Get any excludes @@ -182,10 +153,22 @@ def set_excludes_flags(self): exclude = [exclude] excludes = ' '.join(['--exclude ' + pattern for pattern in exclude]) - if "--exclude *.nc.*" not in excludes: - # TODO: Useful enough to keep?? - # Uncollated files are always excluded + + # Default to exclude uncollated files if collation is enabled + # This can be over-riden using exclude_uncollated config flag + exclude_uncollated = self.config.get('exclude_uncollated', None) + + if exclude_uncollated is None: + collate_config = self.expt.config.get('collate', {}) + collating = collate_config.get('enable', True) + if collating: + exclude_uncollated = True + + exclude_flag = "--exclude *.nc.*" + if (exclude_uncollated and exclude_flag not in excludes + and exclude_flag not in self.config.get('rsync_flags', [])): excludes += " --exclude *.nc.*" + self.excludes = excludes def build_cmd(self, source_path): @@ -272,9 +255,13 @@ def run(self): # Set rsync command components self.set_destination_path() - self.set_base_rsync_cmd() self.set_excludes_flags() + # Set base rsync command + default_flags = '-vrltoD --safe-links' + rsync_flags = self.config.get('rsync_flags', default_flags) + self.base_rsync_cmd = f'rsync {rsync_flags}' + # Set remove local files/dirs options remove_files = self.config.get('remove_local_files', False) self.remove_files = '--remove-source-files' if remove_files else '' diff --git a/test/test_sync.py b/test/test_sync.py index 59b1b8cc..f42b5566 100644 --- a/test/test_sync.py +++ b/test/test_sync.py @@ -184,35 +184,65 @@ def test_restarts_to_sync(add_config, envt_vars, del os.environ[envt_var] -def test_rsync_components(): +def test_set_destination_path(): additional_config = { "sync": { - "rsync_flags": "--compress", - "rsync_protocol": 29, "url": "test.domain", "user": "test-usr", "path": "remote/path", - "exclude": ["iceh.????-??-??.nc", "*-DEPRECATED"] }} sync = setup_sync(additional_config=additional_config) - # Test base_rsync_cmd - sync.set_base_rsync_cmd() - - home_path = os.getenv('HOME') - expected_cmd = ('rsync -vrltoD --safe-links --compress --protocol=29' - f' -e "ssh -i {home_path}/.ssh/id_rsa_file_transfer"') - - assert sync.base_rsync_cmd == expected_cmd - # Test destination_path sync.set_destination_path() assert sync.destination_path == "test-usr@test.domain:remote/path" - # Test excludes + # Test value error raised when path is not set + sync = setup_sync(additional_config={}) + with pytest.raises(ValueError): + sync.set_destination_path() + + +@pytest.mark.parametrize( + "add_config, expected_excludes", + [ + ( + { + "sync": { + "exclude": ["iceh.????-??-??.nc", "*-DEPRECATED"] + }, + "collate": { + "enable": True + } + }, ("--exclude iceh.????-??-??.nc --exclude *-DEPRECATED" + " --exclude *.nc.*") + ), + ( + { + "sync": { + "exclude_uncollated": False + }, + "collate": { + "enable": True + } + }, "" + ), + ( + { + "sync": { + "exclude": "*-DEPRECATED" + }, + "collate": { + "enable": False + } + }, "--exclude *-DEPRECATED" + ) + ]) +def test_set_excludes_flags(add_config, expected_excludes): + sync = setup_sync(additional_config=add_config) + + # Test setting excludes sync.set_excludes_flags() - expected_excludes = "--exclude iceh.????-??-??.nc --exclude *-DEPRECATED" - expected_excludes += " --exclude *.nc.*" assert sync.excludes == expected_excludes