ACDD_conf.yaml

################################################################
# USER OPTIONS
# Settings to manage cmorisation and set tables/variables to process
cmor:
    # If test true it will just run the setup but not launch the job automatically
    test: false
    # working directory if default uses current directory
    appdir:  default 
    # output directory for all generated data (CMORISED files & logs)
    # if default it is set to /scratch/$project/$user/MOPPER_OUTPUT<exp>
    outpath: default
    # if true override files already exsiting in outpath 
    override: !!bool true
    # location of input data must point to dir above experiment;
    #  and experiment subdir must contain atmos/[,ocean/, ice/]
    datadir: /g/data/...
    # from exp_to_process: local name of experiment
    exp: expname 
    # Interval to cmorise inclusive of end_date
    # NB this will be used to select input files to include.
    # Use also hhmm if you want more control on subdaily data
    # start_date = "20220222T0000"
    # sometimes this can be defined at end of timestep so to get all data for your last day
    # you should use 0000 time of next day
    start_date: "19800101"                        
    end_date: "20201231"                        
    # select one of: [CM2, ESM1.5, OM2[-025], AUS2200]
    # if adding a new version other defaults might need to be set
    # see documentation
    access_version: CM2 
    # reference date for time units (set as 'default' to use start_date)
    reference_date: 1970-01-01             
    # Path and file templates can be changed based on the experiment.
    # The example below should be considered a minimum requirement. 
    # Consider adding 'table_id" if using the "all tables" option to list
    # the variables to process as variables can be present at same frequency
    # in more than one tables  
    path_template: "{product_version}/{frequency}"
    # date_range is automatically added at the end of filename
    file_template: "{variable_id}_{source_id}_{experiment_id}_{frequency}"
    # maximum file size in MB: this is meant as uncompressed, compression might reduce it by 50%
    max_size: 8192 
    # deflate_level sets the internal compression level, 
    # level 4-6 good compromise between reducing size and write/read speed
    # shuffle 0: off 1:on Shuffle reduces size without impacting speed
    deflate_level: 4
    shuffle: 1
    # Variables to CMORise: 
    # CMOR table/variable to process; default is 'all'. 
    # 'all' will use all the tables listed in the mapping file
    # Or create a yaml file listing variables to process (VAR_SUBSET[_LIST]).
    # each line: <table: [var1, var2, var3 ..]>
    tables: CMIP6_Amon
    variable_to_process: tas 
    var_subset: !!bool False
    var_subset_list: ''
    # if subhr data is included specify actual frequency as ##min
    subhr: 10min
    # model vertical levels number
    levnum: 85 
    # Mappings, vocab and tables settings
    # default=data/dreq/cmvme_all_piControl_3_3.csv
    # Leave as set unless publishing for CMIP6
    dreq: default
    force_dreq: !!bool False
    dreq_years: !!bool False
    # mapping file created with cli_db.py based on the actual model output
    master_map: "localdata/map_expname.csv"
    # CMOR tables path, these define what variables can be extracted
    # see documentation to add new tables/variables
    # use this to indicate the path used for new or modified tables
    # these will be used in preference to the package tables
    tables_path: ""
    # Ancil files are needed only for specific variables when
    # grid information is not fully available from output
    # you can usually leave these empty if processing only atmos
    # ancillary files relative path to <appdir> or fullpath
    ancils_path: "localdata/ancils"
    # grid ocean is the auscom grid file t retrieve lat/lon and their vertices
    # example is for om2, cm2, esm1.5 versions
    grid_ocean: "grid_spec.auscom.20110618.nc"
    grid_seaIce: ""
    mask_ocean: ""
    # to supply land area fraction if not available in output 
    land_frac: ""
    # to supply tile area fraction if not available in output 
    tile_frac: ""
    # used for atmospheric hybrid coordinates
    orog: ""
    # defines Controlled Vocabularies and required attributes
    # leave ACDD to follow NCI publishing requirements 
    _control_vocabulary_file: "ACDD_CV.json"
    # leave this empty unless is CMIP6
    _cmip6_option:  
    _AXIS_ENTRY_FILE: "ACDD_coordinate.json"
    _FORMULA_VAR_FILE: "ACDD_formula_terms.json"
    grids: "ACDD_grids.json"
# Additional NCI information:
    # NCI project to charge compute; $PROJECT = your default project
    project: v45 
    # additional NCI projects to be included in the storage flags, comma separated list
    addprojs: []
    # queue and memory (GB) per CPU (depends on queue),
    # hugemem is recommended for high reoslution data and/or derived variables 
    # hugemem requires a minimum of 6 cpus this is handled by the code
    queue: hugemem
    mem_per_cpu: 32 
    max_cpus: 24
    # Mopper uses multiprocessing to produce files in parallel, usually 1 cpu per worker
    # is a good compromise, occasionally you might want to pass a higher number
    # if running out of memory
    cpuxworker: 1 
    # walltime in "hh:mm:ss"
    walltime: '8:00:00'
    mode: custom
    # conda_env to use by default hh5 analysis3-unstable
    # as this has the code and all dependecies installed
    # you can override that by supplying the env to pass to "source"
    # Ex 
    # conda_env: <custom-env-path>/bin/activate
    # to allow other settings use "test: true" and modify mopper_job.sh manually
    conda_env: default
    
#
# Global attributes: these will be added to each files comment unwanted ones
# Using ACDD CV vocab to check validity of global attributes
# see data/custom-cmor-tables/ACDD_CV.json
# For CMIP6 global attributes explanation:
# https://docs.google.com/document/d/1h0r8RZr_f3-8egBMMh7aqLwy3snpD6_MrDz1q8n5XUk/edit
attrs:
    Conventions: "CF-1.7, ACDD-1.3"
    title: "ACCESS CM2  historical simulation ...."
    experiment_id: exp-id
    # Use to provide a short description of the experiment. 
    # It will be written to file as "summary" 
    exp_description: "A global simulation of ...."
    product_version: v1.0
    date_created: "2023-05-12"
    # NB source and source_id need to be defined in ACDD_CV.json 
    # if using new different model configuration
    # currently available: AUS2200, ACCESS-ESM1-5, ACCESS-CM2,
    #                      ACCESS-OM2, ACCESS-OM2-025 
    source_id: 'ACCESS-CM2'
    # AUS2200 description
    source: "ACCESS - CM2 ..."
    # ACCESS-CM2 description
    #source: "ACCESS-CM2 (2019): aerosol: UKCA-GLOMAP-mode, atmos: MetUM-HadGEM3-GA7.1 (N96; 192 x 144 longitude/latitude; 85 levels; top level 85 km), atmosChem: none, land: CABLE2.5, landIce: none, ocean: ACCESS-OM2 (GFDL-MOM5, tripolar primarily 1deg; 360 x 300 longitude/latitude; 50 levels; top grid cell 0-10 m), ocnBgchem: none, seaIce: CICE5.1.2 (same grid as ocean)"
    # ACCESS-ESM1.5 description
    #source: "ACCESS-ESM1.5 (2019): aerosol: CLASSIC (v1.0), atmos: HadGAM2 (r1.1, N96; 192 x 145 longitude/latitude; 38 levels; top level 39255 m), atmosChem: none, land: CABLE2.4, landIce: none, ocean: ACCESS-OM2 (MOM5, tripolar primarily 1deg; 360 x 300 longitude/latitude; 50 levels; top grid cell 0-10 m), ocnBgchem: WOMBAT (same grid as ocean), seaIce: CICE4.1 (same grid as ocean)"
    license: "https://creativecommons.org/licenses/by/4.0/"
    institution: University of ... 
    # not required
    organisation: Centre of Excellence for Climate Extremes
    # see here: https://acdguide.github.io/Governance/tech/keywords.html
    # use of FOR codes is reccomended
    keywords: "Climate change processes, Adverse weather events, Cloud physics"
    references: "" 
    # contact email of person running post-processing or author
    contact: <contact-email>                
    creator_name: <main-author-name>
    creator_email: <main-author-email>
    creator_url: <main-author-researcher-id>
    # not required details of any contributor including who run post processing
    # if different from creator. If more than one spearate with commas
    # see here for datacite contributor role definitions:
    # https://datacite-metadata-schema.readthedocs.io/en/4.5_draft/properties/recommended_optional/property_contributor.html#a-contributortype
    contributor_name: <contributor1>, <contributor2>
    contributor_role: data_curator, data_curator 
    contributor_email:  <contributor1-email>, <contributor2-email> 
    contributor_url:  <contributor1-researcher-id>, <contributor2-researcher-id>
    # Not required use if publishing, otherwise comment out
    #publisher_name:
    #publisher_email:
    # The following refer to the entire dataset rather than the specific file
    time_coverage_start: 1980-01-01
    time_coverage_end: 2020-12-31
    geospatial_lat_min: -90.0 
    geospatial_lat_max: 90.0
    geospatial_lon_min: -180.0
    geospatial_lon_max: 180.0
    # The following attributes will be added automatically:
    # experiment, frequency, realm, variable
    # Add below whatever other global attributes you want to add
    forcing: "GHG, Oz, SA, Sl, Vl, BC, OC, (GHG = CO2, N2O, CH4, CFC11, CFC12, CFC113, HCFC22, HFC125, HFC134a)"
    calendar: "proleptic_gregorian"
    grid: "native atmosphere N96 grid (192 x 144 latxlon)"
    # nearest value from cmip6 is 2.5 km
    nominal_resolution: "250 km"
    #
    # Parent experiment details if any
    # if parent=false, all parent fields are automatically set to "no parent".
    # If true, defined values are used.
    parent: !!bool false 
    # CMOR will add a tracking_id if you want to define a prefix add here
    tracking_id_prefix: 
    comment: "post-processed using ACCESS-MOPPeR v1.2.0 https://doi.org/10.5281/zenodo.14322348"