From ac7d574efd60366bd1b9e7815482fe66ad2882ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20J=C3=A4hn?= <mjaehn@ethz.ch>
Date: Thu, 1 Feb 2024 17:55:10 +0100
Subject: [PATCH] Update 'how to run'

---
 docs/howtorun.rst | 54 ++++++++++++++++++++++++++--------------------
 run_chain.py      | 55 +++++++++++++++++++++--------------------------
 2 files changed, 55 insertions(+), 54 deletions(-)
diff --git a/docs/howtorun.rst b/docs/howtorun.rst
index d9f83220..ea58e0a7 100644
--- a/docs/howtorun.rst
+++ b/docs/howtorun.rst
@@ -23,51 +23,50 @@ contain additional runscripts to be submitted via ``sbatch``.
 
 .. hint::
 	Technically, you can run several cases (instead of a single case) in one command,
-	which is useful for nested run, for example. This can be achieved by running
+	which is useful for nested runs, for example. This can be achieved by running
 	``./run_chain.py <case1> <case2>``. With that, the full chain is executed for 
 	``case1`` first, and afterwards for ``case2``.
 
-Without specifiying a job list, the default joblist defined in
-``config/models.yaml`` will be executed.
-
 There are several optional arguments available to change the behavior of the chain:
 
+	$ ./run_chain.py -h
+
 * ``-h``, ``--help``
-  	Show a help message and exit.
+  	Show this help message and exit.
 * ``-j [JOB_LIST ...]``, ``--jobs [JOB_LIST ...]``
     List of job names to be executed.
-    A job is a .py-file in jobs/ with a ``main()`` function, which
+    A job is a ``.py`` file in i``jobs/`` with a ``main()`` function, which
     handles one aspect of the Processing Chain, for
     example copying ``meteo`` input data or launching a
     job for ``int2lm``. Jobs are executed in the order
     in which they are given here. If no jobs are
-    given, default jobs will be executedas defined
-    in config/models.yaml.
+    given, default jobs will be executed as defined
+    in ``config/models.yaml``.
 * ``-f``, ``--force``
-    Force the processing chain to redo all specified
+    Force the Processing Chain to redo all specified
     jobs, even if they have been started already or
     were finished previously. WARNING: Only logfiles
     get deleted, other effects of a given job
     (copied files etc.) are simply overwritten. This
-    may cause errors.
-* ``-t NTRY``, ``--try NTRY``
-	Amount of time the cosmo job is re-tried before crashing. Default is 1.
+    may cause errors or unexpected behavior.
 * ``-r``, ``--resume`` 
-    Resume the processing chain by restarting the
+    Resume the Processing Chain by restarting the
     last unfinished job. WARNING: Only the logfile
     gets deleted, other effects of a given job
     (copied files etc.) are simply overwritten. This
-    may cause errors.
+    may cause errors or unexpected behavior.
 
 What it Does
 ------------
 
-The script ``run_chain.py`` reads the command line arguments and the config file.
+The script ``run_chain.py`` reads the command line arguments and the config file
+from the specified case.
 It then calls the function :func:`run_chain.restart_runs`, which divides the
 simulation time according to the specified restart steps. Then it calls
-:func:`run_chain.run_chain` for each sub-run. This function sets up the directory
-structure of the chain and then starts the specified :ref:`jobs<jobs-section>`
-sequentially.
+:func:`run_chain.run_chunk` for each part (chunk) of the simulation workflow.
+This function sets up the directory structure of the chain and then submits the
+specified :ref:`jobs<jobs-section>` via ``sbatch`` to the Slurm workload manager,
+taking job dependencies into account.
 
 Test Cases
 ----------
@@ -89,6 +88,10 @@ the script::
 This will run all the individual scripts in ``jenkins/scripts/``, which 
 can also be launched separately if desired.
 
+These cases undergo regulary testing to ensure that the Processing Chain runs
+correctly. A corresponding Jenkins plan is launched on a weekly basis and 
+when triggered within a GitHub pull request.
+
 Directory Structure
 -------------------
 
@@ -108,6 +111,11 @@ run looks like this::
 	        ├── cfg.int2lm_input/
 	        ├── cfg.int2lm_work/
 	        └── cfg.int2lm_output/
+
+As one can see, it creates working directories for both the ``int2lm`` preprocessor
+and ``cosmo``. Additionally, and this is always the case, the ``checkpoints`` 
+directory holds all the job logfiles. Whenever a job has successfully finished,
+the logfile is copied from the ``working`` to the ``finished`` sub-directory.
                    
 Running the ``cosmo-ghg-test`` case therefore produces the following
 directories and files (showing four levels of directories deep)::
@@ -124,7 +132,7 @@ directories and files (showing four levels of directories deep)::
 	│   │   │   ├── online_vprm
 	│   │   │   ├── post_cosmo
 	│   │   │   ├── post_int2lm
-	│   │   │   └── prepare_data
+	│   │   │   └── prepare_cosmo
 	│   │   └── working/
 	│   │       ├── biofluxes
 	│   │       ├── cosmo
@@ -134,7 +142,7 @@ directories and files (showing four levels of directories deep)::
 	│   │       ├── online_vprm
 	│   │       ├── post_cosmo
 	│   │       ├── post_int2lm
-	│   │       └── prepare_data
+	│   │       └── prepare_cosmo
 	│   ├── cosmo/
 	│   │   ├── input/
 	│   │   │   ├── oem/
@@ -177,7 +185,7 @@ directories and files (showing four levels of directories deep)::
 		│   │   ├── online_vprm
 		│   │   ├── post_cosmo
 		│   │   ├── post_int2lm
-		│   │   └── prepare_data
+		│   │   └── prepare_cosmo
 		│   └── working/
 		│       ├── biofluxes
 		│       ├── cosmo
@@ -187,7 +195,7 @@ directories and files (showing four levels of directories deep)::
 		│       ├── online_vprm
 		│       ├── post_cosmo
 		│       ├── post_int2lm
-		│       └── prepare_data
+		│       └── prepare_cosmo
 		├── cosmo/
 		│   ├── input/
 		│   │   ├── oem
@@ -222,7 +230,7 @@ directories and files (showing four levels of directories deep)::
 
 -------------------------------------------
 
-.. autofunction:: run_chain.run_chain
+.. autofunction:: run_chain.run_chunk
 
 -------------------------------------------	
 
diff --git a/run_chain.py b/run_chain.py
index 743b54c2..7e9e5b42 100755
--- a/run_chain.py
+++ b/run_chain.py
@@ -16,18 +16,18 @@
 
 
 def parse_arguments():
-    """Parse command line arguments for the processing chain script.
+    """Parse command line arguments for the Processing Chain script.
 
     Parses and retrieves command line arguments, allowing users to specify
     run identifiers, jobs to execute, and various options to control the
-    execution of the processing chain.
+    execution of the Processing Chain.
 
     Returns
     -------
     argparse.Namespace
         A namespace object containing parsed command line arguments.
     """
-    parser = argparse.ArgumentParser(description="Run the processing chain.")
+    parser = argparse.ArgumentParser(description="Run the Processing Chain.")
 
     parser.add_argument("casenames",
                         nargs='+',
@@ -36,9 +36,9 @@ def parse_arguments():
                         "to be in cases/<casename>/. The runs are executed "
                         "sequentially in the order they're given here.")
 
-    jobs_help = ("List of job-names to be executed. A job is a .py-"
+    jobs_help = ("List of job names to be executed. A job is a .py "
                  "file in jobs/ with a main()-function which "
-                 "handles one aspect of the processing chain, for "
+                 "handles one aspect of the Processing Chain, for "
                  "example copying meteo-input data or launching a "
                  "job for int2lm. "
                  "Jobs are executed in the order in which they are "
@@ -52,27 +52,20 @@ def parse_arguments():
                         help=jobs_help,
                         default=None)
 
-    force_help = ("Force the processing chain to redo all specified jobs,"
+    force_help = ("Force the Processing Chain to redo all specified jobs,"
                   " even if they have been started already or were finished"
                   " previously. WARNING: Only logfiles get deleted,"
                   " other effects of a given job (copied files etc.)"
-                  " are simply overwritten. This may cause errors.")
+                  " are simply overwritten. This may cause errors"
+                  " or unexpected behavior.")
     parser.add_argument("-f", "--force", action='store_true', help=force_help)
 
-    tries_help = ("Amount of time the cosmo job is re-tried before crashing."
-                  " Default is 1.")
-    parser.add_argument("-t",
-                        "--try",
-                        help=tries_help,
-                        dest="ntry",
-                        type=int,
-                        default=1)
-
     resume_help = (
-        "Resume the processing chain by restarting the last unfinished job."
+        "Resume the Processing Chain by restarting the last unfinished job."
         " WARNING: Only the logfile gets deleted,"
         " other effects of a given job (copied files etc.)"
-        " are simply overwritten. This may cause errors.")
+        " are simply overwritten. This may cause errors."
+        " or unexpected behavior.")
     parser.add_argument("-r",
                         "--resume",
                         help=resume_help,
@@ -90,7 +83,7 @@ def __init__(self, casename):
         """Initialize an instance of the Config class.
 
         Initializes an instance of the Config class with user-specific
-        and default attributes. The class represents a processing chain for a
+        and default attributes. The class represents a Processing Chain for a
         particular case, and its attributes are populated based on the provided
         `casename`.
 
@@ -98,7 +91,7 @@ def __init__(self, casename):
         ----------
         casename : str
             The identifier for the case, typically specifying the configuration
-            and settings to be used in the processing chain.
+            and settings to be used in the Processing Chain.
 
         Attributes
         ----------
@@ -107,13 +100,13 @@ def __init__(self, casename):
         email : str
             The user's email address, initially set to None and updated using the `set_email` method.
         casename : str
-            The specified case name for the processing chain.
+            The specified case name for the Processing Chain.
         chain_src_dir : str
-            The source directory for the processing chain, typically the current working directory.
+            The source directory for the Processing Chain, typically the current working directory.
         case_path : str
             The path to the case directory under 'cases/' for the specified `casename`.
         work_root : str
-            The root directory for processing chain execution, typically located under the source directory.
+            The root directory for Processing Chain execution, typically located under the source directory.
 
         Notes
         -----
@@ -414,15 +407,15 @@ def create_vars_from_dicts(self):
 
 def run_chain(work_root, model_cfg, cfg, startdate_sim, enddate_sim, job_names,
               force, resume):
-    """Run the processing chain, managing job execution and logging.
+    """Run the Processing Chain, managing job execution and logging.
 
-    This function sets up and manages the execution of a processing chain, handling
+    This function sets up and manages the execution of a Processing Chain, handling
     job execution, logging, and various configuration settings.
 
     Parameters
     ----------
     work_root : str
-        The path to the directory where the processing chain writes files during execution.
+        The path to the directory where the Processing Chain writes files during execution.
     model_cfg : dict
         Configuration settings for the modeling framework.
     cfg : Config
@@ -623,7 +616,7 @@ def run_chain(work_root, model_cfg, cfg, startdate_sim, enddate_sim, job_names,
 def restart_runs(work_root, model_cfg, cfg, job_names, force, resume):
     """Start subchains in specified intervals and manage restarts.
 
-    This function slices the total runtime of the processing chain according to the
+    This function slices the total runtime of the Processing Chain according to the
     `cfg.restart_step_hours` configuration. It calls `run_chain()` for each
     specified interval.
 
@@ -676,7 +669,7 @@ def restart_runs(work_root, model_cfg, cfg, job_names, force, resume):
 def restart_runs_spinup(work_root, model_cfg, cfg, job_names, force, resume):
     """Start subchains in specified intervals and manage restarts with spin-up.
 
-    This function slices the total runtime of the processing chain according to the
+    This function slices the total runtime of the Processing Chain according to the
     `cfg.restart_step_hours` configuration. It calls `run_chain()` for each specified
     interval, managing restarts with spin-up.
 
@@ -758,9 +751,9 @@ def load_model_config_yaml(yamlfile):
 
 
 if __name__ == '__main__':
-    """Main script for running a processing chain.
+    """Main script for running a Processing Chain.
 
-    This script handles the execution of a processing chain for one or more specified cases. It loads model configurations, prepares the environment, and starts the chain based on the provided settings.
+    This script handles the execution of a Processing Chain for one or more specified cases. It loads model configurations, prepares the environment, and starts the chain based on the provided settings.
 
     Parameters
     ----------
@@ -827,4 +820,4 @@ def load_model_config_yaml(yamlfile):
                       force=args.force,
                       resume=args.resume)
 
-    print('>>> Finished the processing chain successfully <<<')
+    print('>>> Finished the Processing Chain successfully <<<')