From e7dda2167c8fc0a2c0145c978f99d16c7b326bd8 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Mon, 16 Sep 2024 17:55:18 -0500 Subject: [PATCH] Add PCMDI Diags to zppy --- .pre-commit-config.yaml | 2 +- conda/chrisalis.yml | 34 + setup.py | 2 +- zppy/__main__.py | 4 + zppy/defaults/default.ini | 214 +++ zppy/pcmdi_diags.py | 158 ++ zppy/templates/coupled_global.py | 872 ++++++++++ zppy/templates/global_time_series.bash | 0 .../e3sm_to_cmip/default_metadata.json | 0 .../e3sm_to_cmip/vrt_remap_plev19.nc | Bin 0 -> 1260 bytes zppy/templates/pcmdi_diags.bash | 1544 +++++++++++++++++ .../templates/pcmdi_diags/cmip_variables.json | 121 ++ .../pcmdi_diags/derived_variable.json | 26 + .../pcmdi_diags/mean_climate_plot_driver.py | 670 +++++++ .../pcmdi_diags/mean_climate_plot_parser.py | 373 ++++ .../pcmdi_diags/observation_to_cmip.py | 85 + .../pcmdi_diags/plot_mean_climate.py | 84 + .../pcmdi_diags/post_merge_clim_jsons.py | 164 ++ zppy/templates/pcmdi_diags/process_sftlf.py | 61 + .../pcmdi_diags/reference_alias.json | 340 ++++ zppy/templates/pcmdi_diags/regions_specs.json | 263 +++ zppy/templates/ts.bash | 24 +- 22 files changed, 5037 insertions(+), 4 deletions(-) create mode 100644 conda/chrisalis.yml mode change 100644 => 100755 zppy/defaults/default.ini create mode 100644 zppy/pcmdi_diags.py create mode 100644 zppy/templates/coupled_global.py mode change 100644 => 100755 zppy/templates/global_time_series.bash mode change 100644 => 100755 zppy/templates/inclusions/e3sm_to_cmip/default_metadata.json create mode 100755 zppy/templates/inclusions/e3sm_to_cmip/vrt_remap_plev19.nc create mode 100755 zppy/templates/pcmdi_diags.bash create mode 100755 zppy/templates/pcmdi_diags/cmip_variables.json create mode 100755 zppy/templates/pcmdi_diags/derived_variable.json create mode 100755 zppy/templates/pcmdi_diags/mean_climate_plot_driver.py create mode 100755 zppy/templates/pcmdi_diags/mean_climate_plot_parser.py create mode 100755 zppy/templates/pcmdi_diags/observation_to_cmip.py create mode 100755 zppy/templates/pcmdi_diags/plot_mean_climate.py create mode 100755 zppy/templates/pcmdi_diags/post_merge_clim_jsons.py create mode 100755 zppy/templates/pcmdi_diags/process_sftlf.py create mode 100755 zppy/templates/pcmdi_diags/reference_alias.json create mode 100755 zppy/templates/pcmdi_diags/regions_specs.json mode change 100644 => 100755 zppy/templates/ts.bash diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5c62ff2..e9f3bb3e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ exclude: "docs|node_modules|migrations|.git|.tox" -default_stages: [commit] +default_stages: [pre-commit] fail_fast: true repos: diff --git a/conda/chrisalis.yml b/conda/chrisalis.yml new file mode 100644 index 00000000..cdefec7f --- /dev/null +++ b/conda/chrisalis.yml @@ -0,0 +1,34 @@ +name: zppy_dev +channels: + - conda-forge + - defaults +dependencies: + # Base + # ================= + - python=3.9.13 + - pip=22.2.2 + - configobj=5.0.6 + - jinja2=3.1.2 + - mache>=1.5.0 + - mpas_tools>=0.15.0 + - pillow=9.2.0 + # Developer Tools + # ================= + # If versions are updated, also update 'rev' in `.pre-commit-config.yaml` + - black=22.8.0 # version from https://anaconda.org/conda-forge/black + - flake8=5.0.4 # version from https://anaconda.org/conda-forge/flake8 + # This line also implicitly installs isort + - flake8-isort=4.2.0 # version from https://anaconda.org/conda-forge/flake8-isort + - mypy=0.982 # version from https://anaconda.org/conda-forge/mypy + - pre-commit=2.20.0 # version from https://anaconda.org/conda-forge/pre-commit + - tbump=6.9.0 + # Documentation + # If versions are updated, also update in `.github/workflows/build_workflow.yml` + # ================= + - sphinx=5.2.3 + - sphinx-multiversion=0.2.4 + - sphinx_rtd_theme=1.0.0 + # Need to pin docutils because 0.17 has a bug with unordered lists + # https://github.com/readthedocs/sphinx_rtd_theme/issues/1115 + - docutils=0.16 +prefix: /home/ac.szhang/.conda/envs/zppy_dev diff --git a/setup.py b/setup.py index c1f32060..cfac3f30 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def package_files(directory, prefixes, extensions): data_files = package_files( - "zppy", prefixes=[], extensions=["bash", "csh", "cfg", "ini", "sh", "json"] + "zppy", prefixes=[], extensions=["bash", "csh", "cfg", "ini", "sh", "json", "py"] ) setup( diff --git a/zppy/__main__.py b/zppy/__main__.py index 50e6d572..a4f90cfd 100644 --- a/zppy/__main__.py +++ b/zppy/__main__.py @@ -16,6 +16,7 @@ from zppy.ilamb import ilamb from zppy.logger import _setup_custom_logger from zppy.mpas_analysis import mpas_analysis +from zppy.pcmdi_diags import pcmdi_diags from zppy.tc_analysis import tc_analysis from zppy.ts import ts from zppy.utils import check_status, submit_script @@ -243,6 +244,9 @@ def _launch_scripts(config: ConfigObj, script_dir, job_ids_file, plugins) -> Non # ilamb tasks existing_bundles = ilamb(config, script_dir, existing_bundles, job_ids_file) + # pcmdi_diags tasks + existing_bundles = pcmdi_diags(config, script_dir, existing_bundles, job_ids_file) + # zppy external plugins for plugin in plugins: # Get plugin module function diff --git a/zppy/defaults/default.ini b/zppy/defaults/default.ini old mode 100644 new mode 100755 index 83120818..a3cf960d --- a/zppy/defaults/default.ini +++ b/zppy/defaults/default.ini @@ -107,9 +107,11 @@ input_component = string(default="") [ts] area_nm = string(default="area") cmip_metadata = string(default="inclusions/e3sm_to_cmip/default_metadata.json") +cmip_plevdata = string(default="inclusions/e3sm_to_cmip/vrt_remap_plev19.nc") # Days per file dpf = integer(default=30) extra_vars = string(default="") +interp_vars = string(default="U,V,T,Q,RELHUM,OMEGA,Z3") # Time-steps per day tpd = integer(default=1) ts_fmt = string(default="ts_only") @@ -119,11 +121,13 @@ input_component = string(default="") [[__many__]] area_nm = string(default=None) cmip_metadata = string(default=None) + cmip_plevdata = string(default=None) dpf = integer(default=None) extra_vars = string(default=None) tpd = integer(default=None) ts_fmt = string(default=None) input_component = string(default=None) + interp_vars = string(default=None) [tc_analysis] # NOTE: always overrides value in [default] @@ -131,6 +135,216 @@ input_files = string(default="eam.h2") # The scratch directory scratch = string(default="") +[pcmdi_diags] +backend = string(default="mpl") +cfg = string(default="") +# File of cmip variable lists (cmip6 convention) +cmip_variables = string(default="pcmdi_diags/cmip_variables.json") +# File of specified regions for mean climate calculation +regions_specs = string(default="pcmdi_diags/regions_specs.json") +# File of derived variables +derived_variable = string(default="pcmdi_diags/derived_variable.json") +# File of observation data name for mean climate calculation +reference_alias = string(default="pcmdi_diags/reference_alias.json") +# File of fuction to generate land/sea mask +process_sftlf = string(default="pcmdi_diags/process_sftlf.py") +# File of fuction to generate mean climate metrics figure +clim_plot_parser = string(default="pcmdi_diags/mean_climate_plot_parser.py") +# File of module to plot mean climate metrics figure +clim_plot_driver = string(default="pcmdi_diags/mean_climate_plot_driver.py") +# Path to observation time-series data +# Required for "mean_climate","variability_mode","enso" +obs_ts = string(default="") +# observational data sets (see reference_alias.json) +# observation data tag in reference_alias +obs_sets = string(default="default") +# options specific for constructing pcmdi preferred file name conventions +# required for "model_vs_obs" comparison +cmip_name = string(default="e3sm.historical.v3-LR.0051") +# required for "model_vs_model" comparison +cmip_name_ref = string(default="e3sm.historical.v3-LR.0051") +# options shared by pcmdi +pmp_debug = string(default=False) +# flag to process the land/sea mask within pcmdi +generate_sftlf = string(default=True) +# variables to be used by the pcmdi diagnostics +# needs to setup for each subsections +vars = string(default="") +# sets of diagnostics from pcmdi package +sets = string_list(default=list("mean_climate","variability_mode_atm","variability_mode_cpl","enso")) +# options to identify subset of pcmdi drivers ("mean_climate","variability_mode","enso") +subset = string(default="") +#options for cmip model metrics data from pcmdi +#group of pcmdi generated cmip metrics data (mip.exp.version) +pcmdi_data_set=string(default="cmip6.historical.v20220928") +# path to pcmdi generated cmip metrics data +pcmdi_data_path=string(default="") +########################################################################################## +# below followed the setup in e3sm_diag but used for PCMDI workflow +########################################################################################## +# See url +multiprocessing = boolean(default=True) +# See url +num_workers = integer(default=24) +# See url +figure_format = string(default="png") +# comparision type (same as e3sm_diag) +run_type = string(default="model_vs_obs") +# Used to label the results directory +# Options are "model_vs_obs" and "model_vs_model" +tag = string(default="model_vs_obs") +########################################################################################### +# Required for run_type="model_vs_model" runs, different from e3sm_diag, +# model_vs_model in pcmdi referred to the comparision of two model simulations +# with observations and cmip models. +########################################################################################### +#path for reference model data (time series) +reference_data_path_ts = string(default="") +# pcmdi_diags.py will set to match `years` if not specified +ref_years = string_list(default=list("")) +# End year (i.e., the last year to use) for the reference data +ref_end_yr = string(default="") +# Final year (i.e., the last available year) for the reference data +ref_final_yr = string(default="") +# Start year for the reference data +ref_start_yr = string(default="") +# reference model name +ref_name = string(default="") +# The years increment for reference data +ts_num_years_ref = integer(default=5) +# Set to true to swap test and ref when run_type="model_vs_model" +swap_test_ref = boolean(default=False) +########################################################################################## +# options for pcmdi mode varibility diagnostics +# vars = "psl" for atm_modes +# vars = "ts" for cpl_modes +######################################################################################### +#name of atmospheric modes varibility +atm_modes = string_list(default=list("NAM","NAO","PNA","NPO","SAM","PSA1","PSA2")) +#name of coupled modes varibility +cpl_modes = string_list(default=list("PDO","NPGO","AMO")) +#keywards for unit conversion in pcmdi (model) +ModUnitsAdjust = string(default="") +#keywards for unit conversion in pcmdi (observation) +ObsUnitsAdjust = string(default="") +#frequency of the model data +frequency = string(default="mo") +#options specific for mode varibility metrics in pcmdi +seasons = string(default="monthly") +landmask = string(default=False) +RmDomainMean = string(default=True) +EofScaling = string(default=False) +ConvEOF = string(default=True) +CBF = string(default=True) +cmec = string(default=True) +update_json = string(default=False) +plot_obs = string(default=True) +plot = string(default=True) +nc_out_obs = string(default=True) +nc_out = string(default=True) +########################################################################################## +# options for pcmdi enso diagnostics +# vars = "psl,pr,prsn,ts,tas,tauu,tauv,hflx,hfss,rlds,rsds,rlus,rlut,rsdt" +########################################################################################## +groups = string_list(default=list("ENSO_perf","ENSO_proc","ENSO_tel")) +########################################################################################## +# optional for mean climate diagnostics +# vars = "pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsdscs,rsdt,rsus,rsuscs, +# rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta-200,ta-850,ua-200,ua-850,va-200, +# va-850,zg-500" +########################################################################################## +# model data grid after remapping +grid = string(default="180x360_aave") +#flag to turn on regional mean climate metrics +regional = string(default="y") +#default regions for mean climate metrics data +regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS,NHEX_ocean,SHEX_ocean,NHEX_land,SHEX_land,ocean_50S50N") +# save derived climatology data +save_test_clims = string(default=True) +# method to determine the way to process mean climate data +# default used nco instead of pcmdi built-in function +climatology_process_method = string(default="nco") +# Regridding by pcmdi (default is to regrid data to 2.5x2.5 grid for diagnostic metrics) +# Required for mean climate +# OPTIONS: '2.5x2.5' or an actual cdms2 grid object +target_grid = string(default="2.5x2.5") +# OPTIONS: String for description on the selected grid +target_grid_string = string(default="2p5x2p5") +# OPTIONS: 'regrid2','esmf' +regrid_tool = string(default="esmf") +# OPTIONS: 'linear','conservative', only if tool is esmf +regrid_method = string(default="regrid2") +# OPTIONS: 'linear','conservative', only if tool is esmf +regrid_method_ocn = string(default="conservative") +# setup for parallel coordinate plots (hide makers for sigle model) +parcord_show_markers = string(default=False) +# setup for portrait plots (add vertical line to separate test and reference models) +portrait_vertical_line = string(default=True) + + [[__many__]] + backend = string(default=None) + cfg = string(default=None) + vars = string(default=None) + grid = string(default=None) + cmip_metadata = string(default=None) + cmip_variables = string(default=None) + pcmdi_data_set = string(default=None) + pcmdi_data_path = string(default=None) + derived_variable = string(default=None) + reference_alias = string(default=None) + regions_specs = string(default=None) + process_sftlf = string(default=None) + multiprocessing = boolean(default=None) + num_workers = integer(default=None) + obs_ts = string(default=None) + figure_format = string(default=None) + ref_end_yr = string(default=None) + ref_final_yr = string(default=None) + ref_name = string(default=None) + ref_start_yr = string(default=None) + ref_years = string_list(default=None) + reference_data_path_ts = string(default=None) + run_type = string(default=None) + sets = string_list(default=None) + swap_test_ref = boolean(default=None) + tag = string(default=None) + ts_num_years_ref = integer(default=None) + climatology_process_method = string(default=None) + target_grid = string(default=None) + target_grid_string = string(default=None) + regrid_tool = string(default=None) + regrid_method = string(default=None) + regrid_method_ocn = string(default=None) + obs_sets = string(default=None) + regions = string(default=None) + regional = string(default=None) + save_test_clims = string(default=None) + seasons = string(default=None) + RmDomainMean = string(default=None) + EofScaling = string(default=None) + ConvEOF = string(default=None) + CBF = string(default=None) + cmec = string(default=None) + update_json = string(default=None) + subset = string(default=None) + landmask = string(default=None) + frequency = string(default=None) + generate_sftlf = string(default=None) + atm_modes = string_list(default=None) + cpl_modes = string_list(default=None) + groups = string_list(default=None) + ModUnitsAdjust = string(default=None) + ObsUnitsAdjust = string(default=None) + cmip_name = string(default=None) + cmip_name_ref = string(default=None) + pmp_debug = string(default=None) + nc_out_obs = string(default=None) + nc_out = string(default=None) + plot_obs = string(default=None) + plot = string(default=None) + parcord_show_markers = string(default=None) + portrait_vertical_line = string(default=None) + [e3sm_diags] # See https://e3sm-project.github.io/e3sm_diags/_build/html/master/available-parameters.html backend = string(default="mpl") diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py new file mode 100644 index 00000000..cc38d5d7 --- /dev/null +++ b/zppy/pcmdi_diags.py @@ -0,0 +1,158 @@ +import os +import pprint +from typing import List + +import jinja2 + +from zppy.bundle import handle_bundles +from zppy.utils import ( + add_dependencies, + check_status, + get_tasks, + get_years, + make_executable, + print_url, + submit_script, +) + + +# ----------------------------------------------------------------------------- +def pcmdi_diags(config, script_dir, existing_bundles, job_ids_file): + + # Initialize jinja2 template engine + templateLoader = jinja2.FileSystemLoader( + searchpath=config["default"]["templateDir"] + ) + templateEnv = jinja2.Environment(loader=templateLoader) + template = templateEnv.get_template("pcmdi_diags.bash") + + # --- List of pcmdi_diags tasks --- + tasks = get_tasks(config, "pcmdi_diags") + if len(tasks) == 0: + return existing_bundles + + # --- Generate and submit pcmdi_diags scripts --- + dependencies: List[str] = [] + + for c in tasks: + + c["scriptDir"] = script_dir + + if "ts_num_years" in c.keys(): + c["ts_num_years"] = int(c["ts_num_years"]) + + # procedure type for e3sm_to_cmip + c["cmor_tables_prefix"] = c["diagnostics_base_path"] + + # Loop over year sets + year_sets = get_years(c["ts_years"]) + if ("ref_years" in c.keys()) and (c["ref_years"] != [""]): + ref_year_sets = get_years(c["ref_years"]) + else: + ref_year_sets = year_sets + for s, rs in zip(year_sets, ref_year_sets): + c["year1"] = s[0] + c["year2"] = s[1] + if ("last_year" in c.keys()) and (c["year2"] > c["last_year"]): + continue # Skip this year set + c["ref_year1"] = rs[0] + c["ref_year2"] = rs[1] + if c["subsection"]: + c["sub"] = c["subsection"] + else: + c["sub"] = c["grid"] + # Make a guess for observation paths, if need be + if ("ts_num_years" in c.keys()) and (c["obs_ts"] == ""): + c["obs_ts"] = ( + f"{c['diagnostics_base_path']}/observations/Atm/time-series/" + ) + if c["run_type"] == "model_vs_obs": + prefix = "pcmdi_diags_%s_%s_%04d-%04d" % ( + c["sub"], + c["tag"], + c["year1"], + c["year2"], + ) + elif c["run_type"] == "model_vs_model": + prefix = "pcmdi_diags_%s_%s_%04d-%04d_vs_%04d-%04d" % ( + c["sub"], + c["tag"], + c["year1"], + c["year2"], + c["ref_year1"], + c["ref_year2"], + ) + reference_data_path = ( + c["reference_data_path"].split("/post")[0] + "/post" + ) + if ("ts_num_years" in c.keys()) and (c["reference_data_path_ts"] == ""): + c["reference_data_path_ts"] = ( + f"{reference_data_path}/atm/{c['grid']}/cmip_ts/monthly" + ) + else: + raise ValueError("Invalid run_type={}".format(c["run_type"])) + print(prefix) + c["prefix"] = prefix + scriptFile = os.path.join(script_dir, "%s.bash" % (prefix)) + statusFile = os.path.join(script_dir, "%s.status" % (prefix)) + settingsFile = os.path.join(script_dir, "%s.settings" % (prefix)) + skip = check_status(statusFile) + if skip: + continue + + # Create script + with open(scriptFile, "w") as f: + f.write(template.render(**c)) + make_executable(scriptFile) + + # Iterate from year1 to year2 incrementing by the number of years per time series file. + if "ts_num_years" in c.keys(): + for yr in range(c["year1"], c["year2"], c["ts_num_years"]): + start_yr = yr + end_yr = yr + c["ts_num_years"] - 1 + if ( + ("mean_climate" in c["sets"]) + or ("variability_mode_atm" in c["sets"]) + or ("variability_mode_cpl" in c["sets"]) + or ("enso" in c["sets"]) + ): + add_dependencies( + dependencies, + script_dir, + "ts", + "atm_monthly_180x360_aave", + start_yr, + end_yr, + c["ts_num_years"], + ) + with open(settingsFile, "w") as sf: + p = pprint.PrettyPrinter(indent=2, stream=sf) + p.pprint(c) + p.pprint(s) + + export = "ALL" + existing_bundles = handle_bundles( + c, + scriptFile, + export, + dependFiles=dependencies, + existing_bundles=existing_bundles, + ) + if not c["dry_run"]: + if c["bundle"] == "": + # Submit job + submit_script( + scriptFile, + statusFile, + export, + job_ids_file, + dependFiles=dependencies, + ) + + else: + print("...adding to bundle '%s'" % (c["bundle"])) + + print(f" environment_commands={c['environment_commands']}") + print_url(c, "pcmdi_diags") + + return existing_bundles diff --git a/zppy/templates/coupled_global.py b/zppy/templates/coupled_global.py new file mode 100644 index 00000000..9fc41401 --- /dev/null +++ b/zppy/templates/coupled_global.py @@ -0,0 +1,872 @@ +# Script to plot some global atmosphere and ocean time series +import glob +import math +import sys +import traceback +from typing import Any, Dict, List, Optional, Tuple + +import cftime +import matplotlib as mpl +import matplotlib.backends.backend_pdf +import matplotlib.pyplot as plt +import numpy as np +import xarray +from netCDF4 import Dataset +from readTS import TS + +mpl.use("Agg") + + +# ---additional function to get moc time series +def getmoc(dir_in): + files = sorted(glob.glob(dir_in + "mocTimeSeries*.nc")) + nfiles = len(files) + print(dir_in, nfiles, "moc files in total") + var = np.array([]) + time = np.array([]) + for i in range(nfiles): + # Open input file + fin = Dataset(files[i], "r") + time0 = fin["year"][:] + var0 = fin["mocAtlantic26"][:] + for iyear in range(int(time0[0]), int(time0[-1]) + 1): + if i > 0 and iyear <= time[-1]: + print( + "the amoc value for year", + iyear, + "has been included in the moc time series from another moc file", + files[i - 1], + time[-1], + "Skipping...", + ) + else: + imon = np.where(time0 == iyear)[0] + if len(imon) == 12: + var = np.append(var, np.mean(var0[imon])) + time = np.append(time, iyear) + else: + print("error in input file :", files[i]) + + return time, var + + +# ----------------------------------------------------------------------------- +# Function to add horizontal line showing average value over a specified period +def add_line(year, var, year1, year2, ax, format="%4.2f", lw=1, color="b"): + + i1 = (np.abs(year - year1)).argmin() + i2 = (np.abs(year - year2)).argmin() + + tmp = np.average(var[i1 : i2 + 1]) + ax.plot((year[i1], year[i2]), (tmp, tmp), lw=lw, color=color, label="average") + ax.text(ax.get_xlim()[1] + 1, tmp, format % tmp, va="center", color=color) + + return + + +# ----------------------------------------------------------------------------- +# Function to add line showing linear trend over a specified period +def add_trend( + year, + var, + year1, + year2, + ax, + format="%4.2f", + lw=1, + color="b", + verbose=False, + ohc=False, + vol=False, +): + + i1 = (np.abs(year - year1)).argmin() + i2 = (np.abs(year - year2)).argmin() + x = year[i1 : i2 + 1] + y = var[i1 : i2 + 1] + + fit = np.polyfit(x, y, 1) + if verbose: + print(fit) + fit_fn = np.poly1d(fit) + ax.plot(x, fit_fn(x), lw=lw, ls="--", c=color, label="trend") + if ohc: + # Earth radius 6371229. from MPAS-O output files + heat_uptake = fit[0] / (4.0 * math.pi * (6371229.0) ** 2 * 365.0 * 86400.0) + ax.text( + ax.get_xlim()[1] + 1, + fit_fn(x[-1]), + "%+4.2f W m$^{-2}$" % (heat_uptake), + color=color, + ) + if vol: + # Earth radius 6371229. from MPAS-O output files + # sea_lvl = fit[0] / ( 4.0*math.pi*(6371229.)**2*0.7) #for oceanic portion of the Earth surface + ax.text( + ax.get_xlim()[1] + 1, + fit_fn(x[-1]), + "%+5.4f mm yr$^{-1}$" % (fit[0]), + color=color, + ) + + return + + +# ----------------------------------------------------------------------------- +# Function to get ylim +def get_ylim(standard_range, extreme_values): + if len(extreme_values) > 0: + has_extreme_values = True + extreme_min = np.amin(extreme_values) + extreme_max = np.amax(extreme_values) + else: + has_extreme_values = False + extreme_min = None + extreme_max = None + if len(standard_range) == 2: + has_standard_range = True + standard_min = standard_range[0] + standard_max = standard_range[1] + else: + has_standard_range = False + standard_min = None + standard_max = None + if has_extreme_values and has_standard_range: + # Use at least the standard range, + # perhaps a wider window to include extremes + if standard_min <= extreme_min: + ylim_min = standard_min + else: + ylim_min = extreme_min + if standard_max >= extreme_max: + ylim_max = standard_max + else: + ylim_max = extreme_max + elif has_extreme_values and not has_standard_range: + ylim_min = extreme_min + ylim_max = extreme_max + elif has_standard_range and not has_extreme_values: + ylim_min = standard_min + ylim_max = standard_max + else: + raise ValueError("Not enough range information supplied") + return [ylim_min, ylim_max] + + +# ----------------------------------------------------------------------------- +# Plotting functions + + +# 1 +def plot_net_toa_flux_restom(ax, xlim, exps, rgn): + print("Plot 1: plot_net_toa_flux_restom") + param_dict = { + "2nd_var": False, + "axhline_y": 0, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": True, + "default_ylim": [-1.5, 1.5], + "do_add_line": True, + "do_add_trend": True, + "format": "%4.2f", + "glb_only": False, + "lw": 1.0, + "ohc": False, + "set_axhline": True, + "set_legend": True, + "shorten_year": False, + "title": "Net TOA flux (restom)", + "use_getmoc": False, + "var": lambda exp: np.array(exp["annual"]["RESTOM"][0]), + "verbose": False, + "vol": False, + "ylabel": "W m-2", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 2 +def plot_global_surface_air_temperature(ax, xlim, exps, rgn): + print("Plot 2: plot_global_surface_air_temperature") + if rgn == "glb": + region_title = "Global" + elif rgn == "n": + region_title = "Northern Hemisphere" + elif rgn == "s": + region_title = "Southern Hemisphere" + else: + raise RuntimeError(f"Invalid rgn={rgn}") + param_dict = { + "2nd_var": False, + "axhline_y": None, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": True, + "default_ylim": [13, 15.5], + "do_add_line": True, + "do_add_trend": True, + "format": "%4.2f", + "glb_only": False, + "lw": 1.0, + "ohc": False, + "set_axhline": False, + "set_legend": True, + "shorten_year": False, + "title": f"{region_title} surface air temperature", + "use_getmoc": False, + "var": lambda exp: np.array(exp["annual"]["TREFHT"][0]) - 273.15, + "verbose": False, + "vol": False, + "ylabel": "degC", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 3 +def plot_toa_radiation(ax, xlim, exps, rgn): + print("Plot 3: plot_toa_radiation") + param_dict = { + "2nd_var": True, + "axhline_y": None, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": False, + "default_ylim": [235, 245], + "do_add_line": False, + "do_add_trend": False, + "format": None, + "glb_only": False, + "lw": 1.0, + "ohc": False, + "set_axhline": False, + "set_legend": False, + "shorten_year": False, + "title": "TOA radiation: SW (solid), LW (dashed)", + "use_getmoc": False, + "var": lambda exp: np.array(exp["annual"]["FSNTOA"][0]), + "verbose": None, + "vol": None, + "ylabel": "W m-2", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 4 +def plot_net_atm_energy_imbalance(ax, xlim, exps, rgn): + print("Plot 4: plot_net_atm_energy_imbalance") + param_dict = { + "2nd_var": False, + "axhline_y": None, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": True, + "default_ylim": [-0.3, 0.3], + "do_add_line": True, + "do_add_trend": False, + "format": "%4.2f", + "glb_only": False, + "lw": 1.0, + "ohc": False, + "set_axhline": False, + "set_legend": True, + "shorten_year": False, + "title": "Net atm energy imbalance (restom-ressurf)", + "use_getmoc": False, + "var": lambda exp: np.array(exp["annual"]["RESTOM"][0]) + - np.array(exp["annual"]["RESSURF"][0]), + "verbose": False, + "vol": False, + "ylabel": "W m-2", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 5 +def plot_change_ohc(ax, xlim, exps, rgn): + print("Plot 5: plot_change_ohc") + param_dict = { + "2nd_var": False, + "axhline_y": 0, + "check_exp_ocean": True, + "check_exp_vol": False, + "check_exp_year": False, + "default_ylim": [-0.3e24, 0.9e24], + "do_add_line": False, + "do_add_trend": True, + "format": "%4.2f", + "glb_only": True, + "lw": 1.5, + "ohc": True, + "set_axhline": True, + "set_legend": True, + "shorten_year": True, + "title": "Change in ocean heat content", + "use_getmoc": False, + "var": lambda exp: np.array(exp["annual"]["ohc"]), + "verbose": False, + "vol": False, + "ylabel": "J", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 6 +def plot_max_moc(ax, xlim, exps, rgn): + print("Plot 6: plot_max_moc") + param_dict = { + "2nd_var": False, + "axhline_y": 10, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": False, + "default_ylim": [4, 22], + "do_add_line": False, + "do_add_trend": True, + "format": "%4.2f", + "glb_only": True, + "lw": 1.5, + "ohc": False, + "set_axhline": True, + "set_legend": True, + "shorten_year": False, + "title": "Max MOC Atlantic streamfunction at 26.5N", + "use_getmoc": True, + "var": None, + "verbose": True, + "vol": None, + "ylabel": "Sv", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 7 +def plot_change_sea_level(ax, xlim, exps, rgn): + print("Plot 7: plot_change_sea_level") + param_dict = { + "2nd_var": False, + "axhline_y": None, + "check_exp_ocean": False, + "check_exp_vol": True, + "check_exp_year": True, + "default_ylim": [4, 22], + "do_add_line": False, + "do_add_trend": True, + "format": "%5.3f", + "glb_only": True, + "lw": 1.5, + "ohc": False, + "set_axhline": False, + "set_legend": True, + "shorten_year": True, + "title": "Change in sea level", + "use_getmoc": False, + "var": lambda exp: ( + 1e3 + * np.array(exp["annual"]["volume"]) + / (4.0 * math.pi * (6371229.0) ** 2 * 0.7) + ), + "verbose": True, + "vol": True, + "ylabel": "mm", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# 8 +def plot_net_atm_water_imbalance(ax, xlim, exps, rgn): + print("Plot 8: plot_net_atm_water_imbalance") + param_dict = { + "2nd_var": False, + "axhline_y": None, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": False, + "default_ylim": [-1, 1], + "do_add_line": True, + "do_add_trend": False, + "format": "%5.4f", + "glb_only": False, + "lw": 1.0, + "ohc": False, + "set_axhline": False, + "set_legend": True, + "shorten_year": False, + "title": "Net atm water imbalance (evap-prec)", + "use_getmoc": False, + "var": lambda exp: ( + 365 + * 86400 + * ( + np.array(exp["annual"]["QFLX"][0]) + - 1e3 + * ( + np.array(exp["annual"]["PRECC"][0]) + + np.array(exp["annual"]["PRECL"][0]) + ) + ) + ), + "verbose": False, + "vol": False, + "ylabel": "mm yr-1", + } + plot(ax, xlim, exps, param_dict, rgn) + + +# Generic plot function +def plot_generic(ax, xlim, exps, var_name, rgn): + print(f"plot_generic for {var_name}") + param_dict = { + "2nd_var": False, + "axhline_y": 0, + "check_exp_ocean": False, + "check_exp_vol": False, + "check_exp_year": True, + "default_ylim": [], + "do_add_line": True, + "do_add_trend": True, + "format": "%4.2f", + "glb_only": False, + "lw": 1.0, + "ohc": False, + "set_axhline": False, + "set_legend": True, + "shorten_year": False, + "title": var_name, + "use_getmoc": False, + "var": lambda exp: np.array(exp["annual"][var_name][0]), + "verbose": False, + "vol": False, + "ylabel": lambda exp: np.array(exp["annual"][var_name][1]), + } + plot(ax, xlim, exps, param_dict, rgn) + + +# FIXME: C901 'plot' is too complex (19) +def plot(ax, xlim, exps, param_dict, rgn): # noqa: C901 + if param_dict["glb_only"] and (rgn != "glb"): + return + ax.set_xlim(xlim) + extreme_values = [] + for exp in exps: + # Relevant to "Plot 5: plot_change_ohc" + if param_dict["check_exp_ocean"] and (exp["ocean"] is None): + continue + # Relevant to "Plot 7: plot_change_sea_level" + # This must be checked before plot 6, + # otherwise, `param_dict["var"]` will be run, + # but `exp["annual"]["volume"]` won't exist. + if param_dict["check_exp_vol"] and (exp["vol"] is None): + continue + # Relevant to "Plot 6: plot_max_moc" + if param_dict["use_getmoc"]: + if exp["moc"]: + [year, var] = getmoc(exp["moc"]) + else: + continue + else: + year = np.array(exp["annual"]["year"]) + exp["yoffset"] + var = param_dict["var"](exp) + extreme_values.append(np.amax(var)) + extreme_values.append(np.amin(var)) + if param_dict["shorten_year"]: + year = year[: len(var)] + try: + ax.plot( + year, + var, + lw=param_dict["lw"], + marker=None, + c=exp["color"], + label=exp["name"], + ) + except Exception: + raise RuntimeError(f"{param_dict['title']} could not be plotted.") + if param_dict["2nd_var"]: + # Specifically for plot_toa_radiation + # TODO: if more plots require a 2nd variable, we can change `var` to be a list, + # but that will be a more significant refactoring. + var = np.array(exp["annual"]["FLUT"][0]) + ax.plot(year, var, lw=1.0, marker=None, ls=":", c=exp["color"]) + continue + if param_dict["check_exp_year"] and exp["yr"] is None: + continue + elif param_dict["do_add_line"] or param_dict["do_add_trend"]: + for yrs in exp["yr"]: + if param_dict["do_add_line"]: + add_line( + year, + var, + yrs[0], + yrs[1], + format=param_dict["format"], + ax=ax, + lw=2 * param_dict["lw"], + color=exp["color"], + ) + if param_dict["do_add_trend"]: + add_trend( + year, + var, + yrs[0], + yrs[1], + format=param_dict["format"], + ax=ax, + lw=2 * param_dict["lw"], + color=exp["color"], + ohc=param_dict["ohc"], + verbose=param_dict["verbose"], + vol=param_dict["vol"], + ) + ylim = get_ylim(param_dict["default_ylim"], extreme_values) + ax.set_ylim(ylim) + if param_dict["set_axhline"]: + ax.axhline(y=param_dict["axhline_y"], lw=1, c="0.5") + ax.set_title(param_dict["title"]) + ax.set_xlabel("Year") + units = param_dict["ylabel"] + c = callable(units) + if c: + units = units(exps[0]) + ax.set_ylabel(units) + if param_dict["set_legend"]: + ax.legend(loc="best") + + +PLOT_DICT = { + "net_toa_flux_restom": plot_net_toa_flux_restom, + "global_surface_air_temperature": plot_global_surface_air_temperature, + "toa_radiation": plot_toa_radiation, + "net_atm_energy_imbalance": plot_net_atm_energy_imbalance, + "change_ohc": plot_change_ohc, # only glb + "max_moc": plot_max_moc, # only glb + "change_sea_level": plot_change_sea_level, # only glb + "net_atm_water_imbalance": plot_net_atm_water_imbalance, +} + + +def param_get_list(param_value): + if param_value == "None": + return [] + else: + return param_value.split(",") + + +def set_var( + exp: Dict[str, Any], + exp_key: str, + var_list: List[str], + valid_vars: List[str], + invalid_vars: List[str], + rgn: str, +) -> None: + if exp[exp_key] is not None: + ts = TS(exp[exp_key]) + for var in var_list: + try: + v: xarray.core.dataarray.DataArray + units: Optional[str] + v, units = ts.globalAnnual(var) + valid_vars.append(str(var)) + except Exception as e: + print(e) + print(f"globalAnnual failed. Invalid var = {var}") + invalid_vars.append(str(var)) + continue + if v.sizes["rgn"] > 1: + # number of years x 3 regions = v.shape + # 3 regions = global, northern hemisphere, southern hemisphere + # We get here if we used the updated `ts` task + # (using `rgn_avg` rather than `glb_avg`). + if rgn == "glb": + n = 0 + elif rgn == "n": + n = 1 + elif rgn == "s": + n = 2 + else: + raise RuntimeError(f"Invalid rgn={rgn}") + v = v.isel(rgn=n) # Just use nth region + elif rgn != "glb": + # v only has one dimension -- glb. + # Therefore it is not possible to get n or s plots. + raise RuntimeError( + f"var={var} only has global data. Cannot process rgn={rgn}" + ) + exp["annual"][var] = (v, units) + if "year" not in exp["annual"]: + years: np.ndarray[cftime.DatetimeNoLeap] = v.coords["time"].values + exp["annual"]["year"] = [x.year for x in years] + del ts + + +def make_plot_pdfs( + figstr, rgn, component, xlim, exps, plot_list, valid_plots, invalid_plots +): + num_plots = len(plot_list) + if num_plots == 0: + return + nrows = 4 + ncols = 2 + plots_per_page = nrows * ncols + num_pages = math.ceil(num_plots / plots_per_page) + + counter = 0 + # https://stackoverflow.com/questions/58738992/save-multiple-figures-with-subplots-into-a-pdf-with-multiple-pages + pdf = matplotlib.backends.backend_pdf.PdfPages(f"{figstr}_{rgn}_{component}.pdf") + for page in range(num_pages): + fig = plt.figure(1, figsize=[13.5, 16.5]) + fig.suptitle(f"{figstr}_{rgn}_{component}") + for j in range(plots_per_page): + # The final page doesn't need to be filled out with plots. + if counter >= num_plots: + break + ax = plt.subplot(nrows, ncols, j + 1) + if component == "original": + try: + plot_function = PLOT_DICT[plot_list[counter]] + except KeyError: + raise KeyError(f"Invalid plot name: {plot_list[counter]}") + try: + plot_function(ax, xlim, exps, rgn) + valid_plots.append(plot_list[counter]) + except Exception: + traceback.print_exc() + plot_name = plot_list[counter] + required_vars = [] + if plot_name == "net_toa_flux_restom": + required_vars = ["RESTOM"] + elif plot_name == "net_atm_energy_imbalance": + required_vars = ["RESTOM", "RESSURF"] + elif plot_name == "global_surface_air_temperature": + required_vars = ["TREFHT"] + elif plot_name == "toa_radiation": + required_vars = ["FSNTOA", "FLUT"] + elif plot_name == "net_atm_water_imbalance": + required_vars = ["PRECC", "PRECL", "QFLX"] + print( + f"Failed plot_function for {plot_name}. Check that {required_vars} are available." + ) + invalid_plots.append(plot_name) + counter += 1 + else: + try: + plot_name = plot_list[counter] + plot_generic(ax, xlim, exps, plot_name, rgn) + valid_plots.append(plot_name) + except Exception: + traceback.print_exc() + print(f"plot_generic failed. Invalid plot={plot_name}") + invalid_plots.append(plot_name) + counter += 1 + + fig.tight_layout() + pdf.savefig(1) + if num_pages > 1: + fig.savefig(f"{figstr}_{rgn}_{component}_{page}.png", dpi=150) + else: + fig.savefig(f"{figstr}_{rgn}_{component}.png", dpi=150) + plt.clf() + pdf.close() + + +# ----------------------------------------------------------------------------- +# FIXME: C901 'run' is too complex (19) +def run(parameters, rgn): # noqa: C901 + # These are the "Tableau 20" colors as RGB. + t20: List[Tuple[float, float, float]] = [ + (31, 119, 180), + (174, 199, 232), + (255, 127, 14), + (255, 187, 120), + (44, 160, 44), + (152, 223, 138), + (214, 39, 40), + (255, 152, 150), + (148, 103, 189), + (197, 176, 213), + (140, 86, 75), + (196, 156, 148), + (227, 119, 194), + (247, 182, 210), + (127, 127, 127), + (199, 199, 199), + (188, 189, 34), + (219, 219, 141), + (23, 190, 207), + (158, 218, 229), + ] + # Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts. + for i in range(len(t20)): + r, g, b = t20[i] + t20[i] = (r / 255.0, g / 255.0, b / 255.0) + + # "Tableau 10" uses every other color + t10 = [] + for i in range(0, len(t20), 2): + t10.append(t20[i]) + + # ----------------------------------------------------------------------------- + # --- Atmos data --- + + # Experiments + case_dir = parameters[1] + experiment_name = parameters[2] + figstr = parameters[3] + year1 = int(parameters[4]) + year2 = int(parameters[5]) + color = parameters[6] + ts_num_years = parameters[7] + plots_original = param_get_list(parameters[8]) + if parameters[9].lower() == "false": + atmosphere_only = False + else: + atmosphere_only = True + plots_atm = param_get_list(parameters[10]) + plots_ice = param_get_list(parameters[11]) + plots_lnd = param_get_list(parameters[12]) + plots_ocn = param_get_list(parameters[13]) + vars_original = [] + if "net_toa_flux_restom" or "net_atm_energy_imbalance" in plots_original: + vars_original.append("RESTOM") + if "net_atm_energy_imbalance" in plots_original: + vars_original.append("RESSURF") + if "global_surface_air_temperature" in plots_original: + vars_original.append("TREFHT") + if "toa_radiation" in plots_original: + vars_original.append("FSNTOA") + vars_original.append("FLUT") + if "net_atm_water_imbalance" in plots_original: + vars_original.append("PRECC") + vars_original.append("PRECL") + vars_original.append("QFLX") + use_atmos = plots_atm or plots_original + has_original_ocn_plots = ( + ("change_ohc" in plots_original) + or ("max_moc" in plots_original) + or ("change_sea_level" in plots_original) + ) + use_ocn = plots_ocn or (not atmosphere_only and has_original_ocn_plots) + exps: List[Dict[str, Any]] = [ + { + "atmos": ( + f"{case_dir}/post/atm/glb/ts/monthly/{ts_num_years}yr/" + if use_atmos + else None + ), + "ice": ( + f"{case_dir}/post/ice/glb/ts/monthly/{ts_num_years}yr/" + if plots_ice + else None + ), + "land": ( + f"{case_dir}/post/lnd/glb/ts/monthly/{ts_num_years}yr/" + if plots_lnd + else None + ), + "ocean": ( + f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" + if use_ocn + else None + ), + "moc": ( + f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" + if use_ocn + else None + ), + "vol": ( + f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" + if use_ocn + else None + ), + "name": experiment_name, + "yoffset": 0.0, + "yr": ([year1, year2],), + "color": f"{color}", + } + ] + + valid_vars: List[str] = [] + invalid_vars: List[str] = [] + + # Read data + exp: Dict[str, Any] + for exp in exps: + exp["annual"] = {} + + # Use vars_original rather than plots_original, + # since the plots have different names than the variables + set_var(exp, "atmos", vars_original, valid_vars, invalid_vars, rgn) + set_var(exp, "atmos", plots_atm, valid_vars, invalid_vars, rgn) + set_var(exp, "ice", plots_ice, valid_vars, invalid_vars, rgn) + set_var(exp, "land", plots_lnd, valid_vars, invalid_vars, rgn) + set_var(exp, "ocean", plots_ocn, valid_vars, invalid_vars, rgn) + + # Optionally read ohc + if exp["ocean"] is not None: + ts = TS(exp["ocean"]) + exp["annual"]["ohc"], _ = ts.globalAnnual("ohc") + # annomalies with respect to first year + exp["annual"]["ohc"][:] = exp["annual"]["ohc"][:] - exp["annual"]["ohc"][0] + + if exp["vol"] is not None: + ts = TS(exp["vol"]) + exp["annual"]["volume"], _ = ts.globalAnnual("volume") + # annomalies with respect to first year + exp["annual"]["volume"][:] = ( + exp["annual"]["volume"][:] - exp["annual"]["volume"][0] + ) + + print( + f"{rgn} region globalAnnual was computed successfully for these variables: {valid_vars}" + ) + print( + f"{rgn} region globalAnnual could not be computed for these variables: {invalid_vars}" + ) + + # ----------------------------------------------------------------------------- + # --- Generate plots --- + + xlim = [float(year1), float(year2)] + + valid_plots: List[str] = [] + invalid_plots: List[str] = [] + + make_plot_pdfs( + figstr, rgn, "original", xlim, exps, plots_original, valid_plots, invalid_plots + ) + make_plot_pdfs( + figstr, rgn, "atm", xlim, exps, plots_atm, valid_plots, invalid_plots + ) + make_plot_pdfs( + figstr, rgn, "ice", xlim, exps, plots_ice, valid_plots, invalid_plots + ) + make_plot_pdfs( + figstr, rgn, "lnd", xlim, exps, plots_lnd, valid_plots, invalid_plots + ) + make_plot_pdfs( + figstr, rgn, "ocn", xlim, exps, plots_ocn, valid_plots, invalid_plots + ) + + print(f"These {rgn} region plots generated successfully: {valid_plots}") + print( + f"These {rgn} region plots could not be generated successfully: {invalid_plots}" + ) + + +def run_by_region(parameters): + regions = parameters[14].split(",") + for rgn in regions: + if rgn.lower() in ["glb", "global"]: + rgn = "glb" + elif rgn.lower() in ["n", "north", "northern"]: + rgn = "n" + elif rgn.lower() in ["s", "south", "southern"]: + rgn = "s" + else: + raise RuntimeError(f"Invalid rgn={rgn}") + run(parameters, rgn) + + +if __name__ == "__main__": + run_by_region(sys.argv) diff --git a/zppy/templates/global_time_series.bash b/zppy/templates/global_time_series.bash old mode 100644 new mode 100755 diff --git a/zppy/templates/inclusions/e3sm_to_cmip/default_metadata.json b/zppy/templates/inclusions/e3sm_to_cmip/default_metadata.json old mode 100644 new mode 100755 diff --git a/zppy/templates/inclusions/e3sm_to_cmip/vrt_remap_plev19.nc b/zppy/templates/inclusions/e3sm_to_cmip/vrt_remap_plev19.nc new file mode 100755 index 0000000000000000000000000000000000000000..2b71cc296ff2f207b80961668af618cecf911c7e GIT binary patch literal 1260 zcma)6QE${Z5cZaBizpJDctk=Lssx-iah$ZkwN@t*Mpq+L{V6^u3af0v<-VB`cL(xUBVs*tzuH z0qVP%tWb-(pW}#cM?4;cJG<#GHJ^x5gIn+HJTxkagxyU;mFvnHJW7_m_Vd-UM4Q%iQwCt(YJ@xiP%r~ zPM;HIvIun;B?%9r_7}kynhac#OWu}t>SQ@PISbLmpRm%|XDah&JlC#bGAk6*rd;@3 zddZ|O7@3f+Tug2%9NsF!8oxMtWT6?B0vp4WsW_`S`~NN9^vEm<;fmNHP@Mm` zT~_F_FKX$uoXl0lYUhPhg)9Z8ln>6zO3odoN2m27PmS{F&WIjCnI#mmqILUcpc~;< zIG{K0+d`Fcs^}d$^S&H1W-=RA$I#HeM^kv;cVw#lY%vKlTc8orEsV1U^y8dC`Nt3E z$b(kV{hM^&0qIAi^WZ$``~(x{-=~E9^N^4~?-H_rsUklRa!3go{z1r%%Y {{ prefix }}.status + +# Basic definitions +case="{{ case }}" +www="{{ www }}" +y1={{ year1 }} +y2={{ year2 }} +Y1="{{ '%04d' % (year1) }}" +Y2="{{ '%04d' % (year2) }}" +{% if run_type == "model_vs_model" %} +ref_Y1="{{ '%04d' % (ref_year1) }}" +ref_Y2="{{ '%04d' % (ref_year2) }}" +{%- endif %} +run_type="{{ run_type }}" +tag="{{ tag }}" + +results_dir=${tag}_${Y1}-${Y2} + +ref_name={{ ref_name }} + +#info for pcmdi specific data structure +case_id=v$(date '+%Y%m%d') + +# Create temporary workdir +workdir=`mktemp -d tmp.${id}.XXXX` +cd ${workdir} + +# Create results directory +if [ ! -d ${results_dir} ];then + mkdir -p ${results_dir} +fi +#directory to save land/sea mask generated by pcmdi +fixed_dir="${results_dir}/fixed" +if [ ! -d ${fixed_dir} ];then + mkdir -p ${fixed_dir} +fi + +# Prepare data files for pcmdi diagnostics, which is achieved by two steps: +# (a) convert e3sm output to cmip type, which used the "e3sm_to_cmip" function +# available at zppy (modifications are made to process more variables and +# 3D fileds at fixed pressure levels). +# (b) locate observations in e3sm diagnostics and convert them to the pcmdi preferred +# data format +#file to specify reference data used to derive the diagnostic metrics +cat > reference_alias.json << EOF +{% include reference_alias %} +EOF +#regions specified to derive global/regional mean metrics +cat > regions_specs.json << EOF +{% include regions_specs %} +EOF +#file include derived variables +cat > derived_variable.json << EOF +{% include derived_variable %} +EOF +#file to genereate land/sea mask data if not available +cat > process_sftlf.py << EOF +{% include process_sftlf %} +EOF + +{%- if ("mean_climate" in sets) %} +#file to genereate figures for mean climate metrics(temporary) +cat > mean_climate_plot_parser.py << EOF +{% include clim_plot_parser %} +EOF +#file to genereate figures for mean climate metrics(temporary) +cat > mean_climate_plot_driver.py << EOF +{% include clim_plot_driver %} +EOF +{%- endif %} + +# script for pcmdi pre-processing +cat > collect_data.py << EOF +import os +import subprocess +import time +import psutil +import json +import sys +import glob +import collections +import cdms2 +import gc +import numpy as np +from re import split +from itertools import chain +from shutil import copyfile +from subprocess import Popen, PIPE, call + +def childCount(): + current_process = psutil.Process() + children = current_process.children() + return(len(children)) + +def combine_time_series(variables, start_yr, end_yr, num_years, + cmip_name, dir_source, out_dic_file, outpath, + multiprocessing, num_workers): + #special case treatment (variables not in cmip cmor list) + altmod_dic = {"sst" : "ts", + "taux" : "tauu", + "tauy" : "tauv", + "rstcre" : "SWCF", + "rltcre" : "LWCF"} + # list of model data dictionary + var_list = []; lstcm0 = []; lstcm1 = [] + mod_out = collections.OrderedDict() + for key in variables: + if "_" in key or "-" in key: + var = split("_|-", key)[0] + else: + var = key + varin = var + if var in ["areacella", "sftlf", "orog"]: + fpaths = sorted(glob.glob(os.path.join(dir_source,var+"_*.nc"))) + for fpath in fpaths: + if os.path.exists(fpath): + output = os.path.join(outpath,"{}_fx_{}.nc".format(var,product)) + copyfile(fpath,output) + del(fpaths) + else: + fpaths = sorted(glob.glob(os.path.join(dir_source,varin+"_*.nc"))) + ######################################################################################### + #code below attempts to address special scenarios + if len(fpaths) < 1 and var in altmod_dic.keys(): + varin = altmod_dic.get(var,var) + if varin == "SWCF" or varin == "LWCF": + dir_source1 = "/".join(dir_source.split("/")[0:-2])+"/ts/monthly/{{ts_num_years}}yr" + fpaths = sorted(glob.glob(os.path.join(dir_source1,varin+"_*.nc"))) + else: + fpaths = sorted(glob.glob(os.path.join(dir_source,varin+"_*.nc"))) + ######################################################################################### + if len(fpaths) > 0: + tableId = fpaths[0].split("/")[-1].split("_")[1] + if tableId not in [ "Amon", "Lmon", "Omon", "SImon" ]: + tableId = "Amon" + yms = '{:04d}01'.format(start_yr) + yme = '{:04d}12'.format(end_yr) + fname = "{}.{}.{}.{}.{}.{}.{}-{}.nc".format( + cmip_name.split(".")[0], + cmip_name.split(".")[1], + cmip_name.split(".")[2].replace(".","-"), + cmip_name.split(".")[3], + tableId,var,yms,yme) + output = os.path.join(outpath,fname) + if (var not in var_list) or (not os.path.exists(output)): + var_list.append(var) + cmd_list = [] + cmd_list.append("ncrcat -v {} -d time,{}-01-01,{}-12-31".format(varin,yms[0:4],yme[0:4])) + for fpath in fpaths: + cmd_list.append(fpath) + cmd_list.append(output) + cdm0 = (" ".join(cmd_list)) + lstcm0.append(cdm0) + del(cmd_list,cdm0) + if varin != var: + cmd_extra = "ncrename -v {},{} {}".format(varin,var,output) + lstcm1.append(cmd_extra) + del(cmd_extra) + ############################################################ + #record the test model data information + mod_out[var] = { "mip" : cmip_name.split(".")[0], + "exp" : cmip_name.split(".")[1], + "model" : cmip_name.split(".")[2].replace(".","-"), + "realization": cmip_name.split(".")[3], + "tableId" : tableId, + "file_path" : output, + "template" : fname, + "start_yymm" : yms, + "end_yymm" : yme, + "varin" : varin } + del(tableId,yms,yme,fname,output) + del(fpaths) + del(var,varin) + gc.collect() + # Save test model data information required for next step + json.dump(mod_out, + open(out_dic_file, "w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + del(mod_out,variables,altmod_dic) + + #finally process the data in parallel + if not os.path.exists(outpath): + os.makedirs(outpath,mode=0o777) + lstall = list(chain(lstcm0,lstcm1)) + lensub = [len(lstcm0),len(lstcm1)] + lensub = np.cumsum(lensub) - 1 + print("Number of jobs starting is ", str(len(lstall))) + procs = [] + for i,p in enumerate(lstall): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstall)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstall[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + del(lstall,lensub,lstcm0,lstcm1) + + #set a delay to esure all process fully done + time.sleep(1) + print("done submitting") + + if len(var_list) > 0: + print("# of variables available for diagnostics: ", len(var_list)) + else: + exit("ERROR: can not found model variables to process....") + + return var_list + +def locate_ts_observation (variables, obs_sets, start_yr, end_yr, + input_path, out_dic_file, outpath, + multiprocessing, num_workers): + # fixed observational name convention to be consistent with cmip + mip = "obs"; realization = "00"; tableId = "Amon" + # special case treatment (these obs vars are inconsistent with cmor vars) + altobs_dic = { "pr" : "PRECT", + "sst" : "ts", + "sfcWind" : "si10", + "taux" : "tauu", + "tauy" : "tauv", + "rltcre" : "toa_cre_lw_mon", + "rstcre" : "toa_cre_sw_mon", + "rtmt" : "toa_net_all_mon"} + + # find and process observational data avaiable in e3sm_diags + var_list = []; lstcm0 = []; lstcm1 = [] + obs_dic = json.load(open(os.path.join('.','reference_alias.json'))) + obs_out = collections.OrderedDict() + for i,key in enumerate(variables): + if "_" in key or "-" in key: + var = key.split("_|-", var)[0] + else: + var = key + if len(obs_sets) != len(variables): + option = obs_sets[0] + else: + option = obs_sets[i] + if "default" in obs_sets or "alternate" in obs_sets: + obstag = obs_dic[var][option] + else: + inv_map = {v: k for k, v in obs_dic[var].items()} + if len(obs_sets) != len(variables): + obstag = obs_sets[0] + else: + obstag = obs_sets[i] + option = inv_map[obstag] + del(inv_map) + varin = var + if "ceres_ebaf" in obstag: + fpaths = sorted(glob.glob(os.path.join(input_path, + obstag.replace('ceres_ebaf','ceres_ebaf*'), + varin+"_*.nc"))) + if len(fpaths) < 1 and var in altobs_dic.keys(): + varin = altobs_dic.get(var,var) + fpaths = sorted(glob.glob(os.path.join(input_path, + obstag.replace('ceres_ebaf','ceres_ebaf*'), + varin+"_*.nc"))) + else: + fpaths = sorted(glob.glob(os.path.join(input_path,obstag,var+"_*.nc"))) + if len(fpaths) < 1 and var in altobs_dic.keys(): + varin = altobs_dic.get(var,var) + fpaths = sorted(glob.glob(os.path.join(input_path,obstag,varin+"_*.nc"))) + + if len(fpaths) > 0 and os.path.exists(fpaths[0]): + template = fpaths[0].split("/")[-1] + obsname = fpaths[0].split("/")[-2] + fyms = template.split("_")[-2][0:6] + fyme = template.split("_")[-1][0:6] + yms = '{:04d}{:02d}'.format(start_yr,1) + yme = '{:04d}{:02d}'.format(end_yr,12) + if int(yms) < int(fyms): + yms = fyms + if int(yme) > int(fyme): + yme = fyme + + #rename file following cmip-like convention + fname = "{}.{}.{}.{}.{}.{}.{}-{}.nc".format( + mip,option,obsname.replace(".","-"),realization,tableId,var,yms,yme) + output = os.path.join(outpath,fname) + if (var not in var_list) or (not os.path.exists(output)): + var_list.append(var) + cmd = "ncrcat -v {} -d time,{}-01-01,{}-12-31 {} {}".format( + varin,yms[0:4],yme[0:4],fpaths[0],output) + lstcm0.append(cmd); del(cmd) + if var != varin: + cmd_extra = "ncrename -v {},{} {}".format(varin,var,output) + lstcm1.append(cmd_extra) + del(cmd_extra) + + #record the observation information + obs_out[var] = { "mip" : mip, + "exp" : option, + "realization" : realization, + "tableId" : tableId, + "model" : obsname, + "file_path" : output, + "template" : fname, + "start_yymm" : yms, + "end_yymm" : yme, + "varin" : varin} + del(template,obsname,fyms,fyme,yms,yme,fname,output) + else : + print("warning: reference data not found for", var) + del(var,varin,option,obstag) + gc.collect() + + # Save observational information required for next step + json.dump(obs_out, + open(out_dic_file,"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + del(obs_dic,obs_out,obs_sets,altobs_dic) + + #finally process the data in parallel + if not os.path.exists(outpath): + os.makedirs(outpath,mode=0o777) + lstall = list(chain(lstcm0,lstcm1)) + lensub = [len(lstcm0),len(lstcm1)] + lensub = np.cumsum(lensub) - 1 + print("Number of jobs starting is ", str(len(lstall))) + procs = [] + for i,p in enumerate(lstall): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstall)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstall[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + del(lstall,lensub,lstcm0,lstcm1) + + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + + if len(var_list) > 0: + print("# of variables in observations: ", len(var_list)) + else: + exit("ERROR: can not found model variables to process....") + + return var_list + +def main(): + #basic information + start_yr = int('${Y1}') + end_yr = int('${Y2}') + num_years = end_yr - start_yr + 1 + + multiprocessing = {{multiprocessing}} + num_workers = {{num_workers}} + + # Model + # Test data directory +{% if run_type == "model_vs_obs" %} + test_data_dir = 'ts' +{% elif run_type == "model_vs_model" %} + test_data_dir = 'ts_test' +{%- endif %} + test_name = '${case}' + test_start_yr = start_yr + test_end_yr = end_yr + test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' + + #info for pcmdi data structure + test_cmip_name = '{{cmip_name}}' + + #Ref +{% if run_type == "model_vs_obs" %} + # Obs + reference_dir_source = '{{ obs_ts }}' + ref_data_dir = 'ts_ref' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = ref_start_yr + num_years - 1 + if (ref_end_yr <= {{ ref_final_yr }}): + ref_end_yr = ref_end_yr + else: + ref_end_yr = {{ ref_final_yr }} +{% elif run_type == "model_vs_model" %} + # Reference + reference_dir_source = '{{ reference_data_path_ts }}' + ref_data_dir = 'ts_ref' + ref_name = '${ref_name}' + short_ref_name = '{{ short_ref_name }}' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = {{ ref_final_yr }} + #info for pcmdi data structure + ref_cmip_name = '{{ cmip_name_ref }}' + + # Optionally, swap test and reference model + if {{ swap_test_ref }}: + test_data_dir, ref_data_dir = ref_data_dir, test_data_dir + test_name, ref_name = ref_name, test_name + short_test_name, short_ref_name = short_ref_name, short_test_name + ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +{%- endif %} + + ################################################################ + # process test model data for comparision + ################################################################ + # variable list in configuration file # + variables = list("{{ vars }}".split(",")) + print("process test model data for comparision") + test_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)) + cmor_vars = combine_time_series(variables,test_start_yr,test_end_yr, + int({{ts_num_years}}),test_cmip_name, + test_dir_source,test_dic_file,test_data_dir, + multiprocessing,num_workers) + ################################################################ + # process reference data for comparison + ################################################################ + print("process reference obs/model data for comparision") +{% if run_type == "model_vs_obs" %} + obs_sets = list('{{ obs_sets }}'.split(",")) + refr_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) + refr_vars = locate_ts_observation(cmor_vars,obs_sets, + ref_start_yr,ref_end_yr, + reference_dir_source, + refr_dic_file,ref_data_dir, + multiprocessing,num_workers) + + print("# of variables in test model: ", len(cmor_vars)) + print("# of variables in reference model: ", len(refr_vars)) + del(refr_vars,cmor_vars) +{% elif run_type == "model_vs_model" %} + refr_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) + refr_vars = combine_time_series(cmor_vars,ref_start_yr,ref_end_yr, + int({{ts_num_years_ref}}),ref_cmip_name, + ref_dir_source,refr_dic_file,ref_data_dir, + multiprocessing,num_workers) + + print("# of variables in test model: ", len(cmor_vars)) + print("# of variables in reference model: ", len(refr_vars)) + del(refr_vars,cmor_vars) +{%- endif %} + +if __name__ == "__main__": + main() + +EOF + +################################ +# Pcmdi pre-processing to link +# required data to work directory +command="python -u collect_data.py" +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (9)' > {{ prefix }}.status + exit 9 +fi + +################################################################ +# generate input parameter for pcmdi metrics driver +{%- if ("mean_climate" in sets) or ("variability_mode" in sets) or ("enso" in sets) %} +cat > parameterfile.py << EOF +import os +import sys +import json + +#basic information +start_yr = int('${Y1}') +end_yr = int('${Y2}') +num_years = end_yr - start_yr + 1 + +# Model +# Test data path +{% if run_type == "model_vs_obs" %} +test_data_dir = 'ts' +{% elif run_type == "model_vs_model" %} +test_data_dir = 'ts_test' +{%- endif %} +test_name = '${case}' +test_start_yr = start_yr +test_end_yr = end_yr +test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' +test_cmip_name = '{{ cmip_name }}' + +# Ref +{% if run_type == "model_vs_obs" %} +# Obs +reference_dir_source = '{{ obs_ts }}' +ref_data_dir = 'ts_ref' +ref_start_yr = {{ ref_start_yr }} +ref_end_yr = ref_start_yr + num_years - 1 +if (ref_end_yr <= {{ ref_final_yr }}): + ref_end_yr = ref_end_yr +else: + ref_end_yr = {{ ref_final_yr }} +{% elif run_type == "model_vs_model" %} +# Reference +reference_dir_source = '{{ reference_data_path_ts }}' +ref_data_dir = 'ts_ref' +ref_name = '${ref_name}' +short_ref_name = '{{ short_ref_name }}' +ref_start_yr = {{ ref_start_yr }} +ref_end_yr = {{ ref_final_yr }} +ref_cmip_name = '{{ cmip_name_ref }}' + +# Optionally, swap test and reference model +if {{ swap_test_ref }}: + test_data_dir, ref_data_dir = ref_data_dir, test_data_dir + test_name, ref_name = ref_name, test_name + short_test_name, short_ref_name = short_ref_name, short_test_name + ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +{%- endif %} + +# shared options +case_id = "${case_id}" + +# Record NetCDF output +nc_out_obs = {{ nc_out_obs }} +nc_out = {{ nc_out }} +if nc_out: + ext = ".nc" +else: + ext = ".xml" + +user_notes = 'Provenance and results' +parallel = False +debug = {{ pmp_debug }} + +# Generate plots +plot = {{ plot }} +plot_obs = {{ plot_obs }} # optional + +# Additional settings +run_type = '{{ run_type }}' +figure_format = '{{ figure_format }}' + +{%- if "mean_climate" in subset %} +############################################################# +#parameter setup specific for mean climate metrics +############################################################# +mip = test_cmip_name.split(".")[0] +exp = test_cmip_name.split(".")[1] +product = test_cmip_name.split(".")[2] +realm = test_cmip_name.split(".")[3] +realization = realm + +{% if run_type == "model_vs_obs" %} +test_data_set = [ test_cmip_name.split(".")[2] ] +{% elif run_type == "model_vs_model" %} +test_data_set = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] +{%- endif %} + +modver = "${case_id}" + +# Generate CMEC compliant json +cmec = {{ cmec }} + +# SIMULATION PARAMETER +period = "{:04d}{:02d}-{:04d}{:02d}".format(test_start_yr,1,test_end_yr,12) + +# INTERPOLATION OPTIONS +target_grid = '{{ target_grid }}' # OPTIONS: '2.5x2.5' or an actual cdms2 grid object +targetGrid = target_grid +target_grid_string = '{{ target_grid_string }}' +regrid_tool = '{{ regrid_tool }}' # OPTIONS: 'regrid2','esmf' +regrid_method = '{{ regrid_method }}' # OPTIONS: 'linear','conservative', only if tool is esmf +regrid_tool_ocn = '{{ regrid_tool_ocn }}' # OPTIONS: "regrid2","esmf" +regrid_method_ocn = ( '{{ regrid_method_ocn }}' ) # OPTIONS: 'linear','conservative', only if tool is esmf + +# SAVE INTERPOLATED MODEL CLIMATOLOGIES ? +save_test_clims = {{ save_test_clims }} + +# CUSTOMIZE REGIONS VALUES NAMES +regions_values = {"land":100.,"ocean":0.} + +#defined regions +regions_specs = json.load(open(os.path.join(".",'regions_specs.json'))) +for kk in regions_specs.keys(): + if "domain" in regions_specs[kk].keys(): + if "latitude" in regions_specs[kk]['domain'].keys(): + regions_specs[kk]['domain']['latitude'] = tuple(regions_specs[kk]['domain']['latitude']) + if "longitude" in regions_specs[kk]['domain'].keys(): + regions_specs[kk]['domain']['longitude'] = tuple(regions_specs[kk]['domain']['longitude']) + +#region specified for each variable +regions =json.load(open(os.path.join("${results_dir}",'var_region_{{sub}}_catalogue.json'))) + +####################################### +# DATA LOCATION: MODELS, OBS AND METRICS OUTPUT +# --------------------------------------------- +# Templates for model climatology files +test_data_path = os.path.join( + "${results_dir}", + "climo", + "${case_id}") +test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(test_data_dir)))) +template = test_dic['ts'][product]['template'] +filename_template = template.replace('ts',"%(variable)").replace(product,"%(model)") +del(test_dic) + +####################################### +# ROOT PATH FOR OBSERVATIONS +reference_data_set = list('{{ obs_sets }}'.split(",")) +reference_data_path = os.path.join("${results_dir}","climo","${case_id}") +observation_file = os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(ref_data_dir)) +custom_observations = os.path.abspath(observation_file) +if not os.path.exists(custom_observations): + sys.exit("ERROR: observation climatology file is missing....") + +####################################### +# DIRECTORY AND FILENAME FOR OUTPUTING METRICS RESULTS +metrics_in_single_file = 'n' # 'y' or 'n' +metrics_output_path = os.path.join( + "${results_dir}", + "metrics_results", + "mean_climate", + mip, + exp, + "%(case_id)" +) # All SAME FILE +############################################################ +# DIRECTORY WHERE TO PUT INTERPOLATED MODELS' CLIMATOLOGIES +diagnostics_output_path= os.path.join( + "${results_dir}", + "diagnostic_results", + "mean_climate", + mip, + exp, + "%(case_id)" +) + +########################################### +# Templates for MODEL land/sea mask (sftlf) +# depracated in new version of pcmdi +############################################# +generate_sftlf = {{ generate_sftlf }} +os.path.join("${fixed_dir}","sftlf_%(model).nc") +test_clims_interpolated_output = diagnostics_output_path + +{%- endif %} + +{%- if "variability_mode" in subset %} +############################################################ +#parameter setup specific for mode variability metrics +############################################################ +mip = test_cmip_name.split(".")[0] +exp = test_cmip_name.split(".")[1] +product = test_cmip_name.split(".")[2] + +{% if run_type == "model_vs_obs" %} +modnames = [ test_cmip_name.split(".")[2] ] +{% elif run_type == "model_vs_model" %} +modnames = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] +{%- endif %} + +realm = test_cmip_name.split(".")[3] +realization = realm + +msyear = test_start_yr +meyear = test_end_yr +osyear = ref_start_yr +oeyear = ref_end_yr + +seasons = list('{{ seasons }}'.split(",")) +frequency = '{{ frequency }}' + +#from configuration file +varOBS = '{{vars}}' +varModel = '{{vars}}' +ObsUnitsAdjust = {{ ObsUnitsAdjust }} +ModUnitsAdjust = {{ ModUnitsAdjust }} + +# If True, maskout land region thus consider only over ocean +landmask = {{ landmask }} + +#open dictional file to locate model and reference files +test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)))) +modpath = test_dic[varModel]['file_path'] +model = test_dic[varModel]['model'] +if model != product: + print("warning: model {} in dataset differ from user setup {}".format(model,product)) + print("warning: use model in datasets to continue....") + modnames = [model] +del (test_dic) + +#setup template for fixed files (e.g. land/sea mask) +modpath_lf = os.path.join("${fixed_dir}","sftlf_%(model).nc") + +#open dictional file to locate reference data +ref_dic = json.load(open(os.path.join("${results_dir}", + '{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)))) +reference_data_name = ref_dic[varOBS]['model'] +reference_data_path = ref_dic[varOBS]['file_path'] + +#update time for observation if different +ref_syear = str(ref_dic[varOBS]['start_yymm'])[0:4] +ref_eyear = str(ref_dic[varOBS]['end_yymm'])[0:4] +if int(ref_syear) > osyear: + osyear = int(ref_syear) +if int(ref_eyear) < oeyear: + oeyear = int(ref_eyear) +del(ref_dic,ref_syear,ref_eyear) + +####################################### + +# If True, remove Domain Mean of each time step +RmDomainMean = {{ RmDomainMean }} + +# If True, consider EOF with unit variance +EofScaling = {{ EofScaling }} + +# Conduct CBF analysis +CBF = {{ CBF }} + +# Conduct conventional EOF analysis +ConvEOF = {{ ConvEOF }} + +# Generate CMEC compliant json +cmec = {{ cmec }} + +# Update diagnostic file if exist +update_json = {{ update_json }} + +####################################### +results_dir = os.path.join( + "${results_dir}", + "%(output_type)", + "variability_modes", + "%(mip)", + "%(exp)", + "${case_id}", + "%(variability_mode)", + "%(reference_data_name)", +) +{%- endif %} + +{%- if "enso" in subset %} +############################################################ +#parameter setup specific for enso metrics +############################################################ +mip = test_cmip_name.split(".")[0] +exp = test_cmip_name.split(".")[1] + +{% if run_type == "model_vs_obs" %} +modnames = [ test_cmip_name.split(".")[2] ] +{% elif run_type == "model_vs_model" %} +modnames = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] +{%- endif %} + +realm = test_cmip_name.split(".")[3] +realization = realm + +msyear = test_start_yr +meyear = test_end_yr + +osyear = ref_start_yr +oeyear = ref_end_yr + +####################################### +# Model (test) +# setup template for fixed files (e.g. land/sea mask) +modpath_lf = os.path.join("${fixed_dir}","sftlf_%(model).nc") +# construct model template +test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)))) +vv0 = list(test_dic.keys())[0] +tableId = test_dic[vv0]['tableId'] +modpath = os.path.join( + test_data_dir, + "%(mip).%(exp).%(model).%(realization)."+tableId+".%(variable)." + + '{:04d}{:02d}-{:04d}{:02d}'.format(msyear,1,meyear,12) + + ".nc") +del(test_dic,vv0) + +# OBSERVATIONS +reference_data_path = {} +reference_data_lf_path = {} +#orgnize obs catalog +ref_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)))) +for var in ref_dic: + refname = ref_dic[var]['model'] + if refname not in reference_data_path.keys(): + reference_data_path[refname] = {} + reference_data_path[refname][var] = {'template': ref_dic[var]['template']} + #land/sea mask + reference_data_lf_path[refname] = os.path.join("${fixed_dir}",'sftlf.{}.nc'.format(refname)) + #update time information(minimum overlap) + ref_syear = str(ref_dic[var]['start_yymm'])[0:4] + ref_eyear = str(ref_dic[var]['end_yymm'])[0:4] + if int(ref_syear) > osyear: + osyear = int(ref_syear) + if int(ref_eyear) < oeyear: + oeyear = int(ref_eyear) + del(refname) +del(ref_dic) + +#document the observation catalogue +obs_cmor = True +obs_cmor_path = ref_data_dir +obs_catalogue = 'obs_info_catalogue.json' +json.dump(reference_data_path, + open(obs_catalogue,"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) +del(reference_data_path) + +# METRICS COLLECTION (ENSO_perf, ENSO_tel, ENSO_proc) +# will set in main driver +# metricsCollection = ENSO_perf # ENSO_perf, ENSO_tel, ENSO_proc + +# OUTPUT +results_dir = os.path.join( + "${results_dir}", + "%(output_type)", + "enso_metric", + "%(mip)", + "%(exp)", + "${case_id}", + "%(metricsCollection)", +) + +json_name = "%(mip)_%(exp)_%(metricsCollection)_${case_id}_%(model)_%(realization)" + +netcdf_name = json_name + +{%- endif %} + +EOF +{%- endif %} + +################################################################ + +# Run PCMDI Diags +echo +echo ===== RUN PCMDI DIAGS ===== +echo + +# Prepare configuration file +cat > pcmdi.py << EOF +import os +import glob +import json +import re +import sys +import cdms2 +import psutil +import numpy as np +import collections +import subprocess +import time +import pcmdi_metrics +from pcmdi_metrics.utils import StringConstructor +from argparse import RawTextHelpFormatter +from shutil import copyfile +from re import split +from itertools import chain +from subprocess import Popen, PIPE, call + +{%- if "mean_climate" in subset %} +from mean_climate_plot_parser import ( + create_mean_climate_plot_parser, +) +from mean_climate_plot_driver import ( + mean_climate_metrics_plot, +) +{%- endif %} + +def childCount(): + current_process = psutil.Process() + children = current_process.children() + return(len(children)) + +def generate_land_sea_mask(data_file,outpath): + data_dic = json.load(open(data_file)) + for var in data_dic: + model = data_dic[var]['model'] + mpath = data_dic[var]['file_path'] + mpath_lf = os.path.join(outpath,"sftlf.{}.nc".format(model)) + # generate land/sea mask if not exist + if not os.path.exists(mpath_lf): + print("generate land/sea mask file....") + return_code = call(['python','process_sftlf.py',var,model,mpath,mpath_lf],text=False) + else: + return_code = 0 + del(model,mpath,mpath_lf) + del(data_dic) + + return return_code + +{%- if "mean_climate" in subset %} +def calculate_climatology(method,start_yr,end_yr,data_dic,out_dic, + outpath,multiprocessing,num_workers): + + #first check the monthly data dictionary + if not os.path.exists(data_dic): + exit("ERROR: monthly data dictionary file not found...") + else: + data_dic = json.load(open(data_dic)) + + if not os.path.exists(outpath): + os.makedirs(outpath,mode=0o777) + + ##################################### + #calculate annual cycle climatology + ##################################### + clim_dic = collections.OrderedDict() + lstcmd = []; lstcm0 = []; lstcm1 = []; lstcm2 = [] + for var in data_dic.keys(): + cyms = '{:04d}-{:02d}'.format(start_yr,1) + cyme = '{:04d}-{:02d}'.format(end_yr,12) + if int(data_dic[var]['start_yymm']) > (start_yr*100+1): + cyms = '{}-{}'.format(str(data_dic[var]['start_yymm'])[0:4], + str(data_dic[var]['start_yymm'])[4:6]) + if int(data_dic[var]['end_yymm']) < (end_yr*100+12): + cyme = '{}-{}'.format(str(data_dic[var]['end_yymm'])[0:4], + str(data_dic[var]['end_yymm'])[4:6]) + infile = data_dic[var]['file_path'] + if os.path.exists(infile): + if method == "pcmdi": + #reform the output file template + outfile = ".".join(data_dic[var]['template'].split(".")[:-2]) + ".nc" + cmd = (" ".join(["pcmdi_compute_climatologies.py", + "--start", cyms, + "--end", cyme, + "--var", var, + "--infile", infile, + "--outpath", outpath+"/", + "--outfilename", outfile ])) + lstcmd.append(cmd); del(cmd,outfile) + else: + # use nco to process mean climatology + # middle month days from January to February + dofm = [15,46,74,105,135,166,196,227,258,288,319,349] + #create a temporary directory to save temporary files + if not os.path.exists("tmpnco"): + os.mkdir("tmpnco",mode=0o777) + #derive annual cycle climate mean + for imon,mday in enumerate(dofm): + tmpfile = os.path.join('tmpnco',"{}_tmp_{:02d}-clim.nc".format(var,imon+1)) + cmd = (" ".join(['ncra -O -h -F -d', + 'time,{},,12'.format(imon+1), + infile,tmpfile])) + lstcmd.append(cmd) + cm0 = (" ".join(['ncatted -O -h -a', + 'units,time,o,c,"days since 0001-01-01 00:00:0.0"', + tmpfile,tmpfile])) + lstcm0.append(cm0) + cm1 = (" ".join(['ncap2 -O -h -s', + "'time=time*0+{};defdim({},{});time_bnds=make_bounds(time,{},{})'".format( + mday,'"bnds"',2,'\$bnds','"time_bnds"'), + tmpfile,tmpfile])) + lstcm1.append(cm1); del(cmd,cm0,cm1,tmpfile) + #derive seasonal and annual mean + for season in ["AC", "DJF", "JJA", "MAM", "SON", "ANN"]: + period = "{}-{}".format(cyms.replace("-",""),cyme.replace("-","")) + outpre = ".".join(data_dic[var]['template'].split(".")[:-2]) + outfile = os.path.join(outpath,".".join([outpre,"{}.{}.{}.nc".format(period,season,"${case_id}")])) + if season == "AC": + cm2 = (" ".join(["ncrcat -O -v {} -d time,0,".format(var), + os.path.join('tmpnco',"{}_*_*-clim.nc".format(var)), + outfile])) + elif season == "DJF": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_12-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_01-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_02-clim.nc".format(var)), + outfile])) + elif season == "JJA": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_06-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_07-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_08-clim.nc".format(var)), + outfile])) + elif season == "MAM": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_03-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_04-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_05-clim.nc".format(var)), + outfile])) + elif season == "SON": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_09-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_10-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_11-clim.nc".format(var)), + outfile])) + elif season == "ANN": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_*-clim.nc".format(var)), + outfile])) + lstcm2.append(cm2); del(cm2,period,outfile,outpre) + #document climatology info in dictionary file# + period = "{}-{}".format(cyms.replace("-",""),cyme.replace("-","")) + template = ".".join(data_dic[var]['template'].split(".")[:-2]) + \ + ".{}.AC.{}.nc".format(period,"${case_id}") + clim_dic[var] = {data_dic[var]['exp'] : data_dic[var]['model'], + data_dic[var]['model'] : {'template' : template, + 'period' : period, + 'data_path' : outpath}} + #save climatology dictionary + json.dump(clim_dic, + open(out_dic,"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + + #finally process the data in parallela + if method == "pcmdi": + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + elif method == "nco": + lstall = list(chain(lstcmd,lstcm0,lstcm1,lstcm2)) + lensub = [len(lstcmd),len(lstcm0),len(lstcm1),len(lstcm2)] + lensub = np.cumsum(lensub) - 1 + print("Number of jobs starting is ", str(len(lstall))) + procs = [] + for i,p in enumerate(lstall): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstall)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstall[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + # clean the temporary files + for tmpfil in glob.glob(os.path.join('tmpnco',"_*_*-clim.nc".format(var))): + if os.path.exists(tmpfil): + os.remove(tmpfil) + + # add a delay to ensure the processing fully done + time.sleep(1) + print("done submitting") + del(lstcmd,lstcm0,lstall,lstcm1,lstcm2,clim_dic,data_dic) + + return + +def calculate_derived_variable(var,data_dic,data_path): + #################################################### + #this function is used to calculate a quantity given + #the data documented in the data_dic passed by user + #derived_variable.json is a file documen the rules to + #calculate the required diagnostic variables + ##################################################### + derive_dic = json.load(open("derived_variable.json")) + vsublist = []; operator = [] + #collect the variable and operation rulse for derivation + for vv in derive_dic[var]: + vsublist.append(vv) + operator.append(derive_dic[var][vv]) + + #now search data file and judge if the derivation is possible + l_derive = True + for i,vv in enumerate(vsublist): + infile = data_dic[vv]['data_path'] + if i == 0: + outfile = infile.replace(vv,var) + if (not os.path.exists(infile)) or (os.path.exists(outfile)): + l_derive = False + + # finally do derivation + if l_derive: + for i,vv in enumerate(derive_dic[var].keys()): + infile = data_dic[vv]['data_path'] + f = cdms2.open(infile) + if i == 0: + d = f(vv) * operator[i] + else: + d = d + f(vv) * operator[i] + f.close() + del(infile) + f = cdms2.open(outfile,'w') + f.write(d) + f.close() + del(d,outfile,f) + outdic = {'template' : outfile.split("/")[-1], + 'data_path' : outfile} + del(derive_dic,vsublist,operator) + + return outdic, outfile + +{%- endif %} + +def main (): + start_yr = int('${Y1}') + end_yr = int('${Y2}') + num_years = end_yr - start_yr + 1 + + num_workers = {{ num_workers }} + multiprocessing = {{multiprocessing}} + + # Model + # Test data directory +{% if run_type == "model_vs_obs" %} + test_data_dir = 'ts' +{% elif run_type == "model_vs_model" %} + test_data_dir = 'ts_test' +{%- endif %} + test_name = '${case}' + test_start_yr = start_yr + test_end_yr = end_yr + test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' + test_cmip_name = '{{ cmip_name }}' + + # Ref +{% if run_type == "model_vs_obs" %} + # Obs + reference_dir_source = '{{ obs_ts }}' + ref_data_dir = 'ts_ref' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = ref_start_yr + num_years - 1 + if (ref_end_yr <= {{ ref_final_yr }}): + ref_end_yr = ref_end_yr + else: + ref_end_yr = {{ ref_final_yr }} +{% elif run_type == "model_vs_model" %} + # Reference + reference_dir_source = '{{ reference_data_path_ts }}' + ref_data_dir = 'ts_ref' + ref_name = '${ref_name}' + short_ref_name = '{{ short_ref_name }}' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = {{ ref_final_yr }} + ref_cmip_name = '{{ cmip_name_ref }}' + + # Optionally, swap test and reference model + if {{ swap_test_ref }}: + test_data_dir, ref_data_dir = ref_data_dir, test_data_dir + test_name, ref_name = ref_name, test_name + short_test_name, short_ref_name = short_ref_name, short_test_name + ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +{%- endif %} + + ################################################################################ + # land/sea mask is needed in PCMDI diagnostics, check and generate it here as + # these data are not always available for model or observations + ################################################################################ + # Model + test_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)) + return_code = generate_land_sea_mask(test_dic,"${fixed_dir}") + if return_code != 0: + exit("Failed to generate land/sea mask...") + del(test_dic) + # Reference + ref_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) + return_code = generate_land_sea_mask(ref_dic,"${fixed_dir}") + if return_code != 0: + exit("Failed to generate land/sea mask...") + del(ref_dic) + + # Run PCMDI for diagnostics +{%- if "mean_climate" in subset %} + ##################################################################### + # calculate test and reference model climatology + ##################################################################### + print("calculate mean climate diagnostics") + outpath = os.path.join("${results_dir}","climo","${case_id}") + method = '{{climatology_process_method}}' + for key in ["test","ref"]: + if key == "test": + data_dir = test_data_dir + start_yr = test_start_yr + end_yr = test_end_yr + elif key == "ref": + data_dir = ref_data_dir + start_yr = ref_start_yr + end_yr = ref_end_yr + data_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(data_dir)) + clim_dic = os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(data_dir)) + if method in [ "pcmdi", "PCMDI", "default" ]: + #method 1: built in PCMDI package (may have memory issue for highres data) + calculate_climatology("pcmdi",start_yr,end_yr,data_dic,clim_dic,outpath,multiprocessing,num_workers) + elif method in [ "nco", "NCO", "alternate"]: + #method 2: use nco package(default,faster) + calculate_climatology("nco",start_yr,end_yr,data_dic,clim_dic,outpath,multiprocessing,num_workers) + if not os.path.exists(clim_dic): + exist("ERROR: failed to process data climatology....") + del(data_dir,start_yr,end_yr,data_dic,clim_dic) + + ##################################################################### + # call mean_climate_driver.py to process diagnostics + ##################################################################### + #defined regions + regional = '{{ regional }}' + if regional == "y": + default_regions = list('{{ regions }}'.split(",")) + else: + default_regions = ["global", "NHEX", "SHEX", "TROPICS"] + # create command list for mean climate driver + lstcmd = [] + reg_var_dic = {} + for vv in list("{{vars}}".split(",")): + vkys = vv.split("-")[0] + reg_var_dic[vkys] = default_regions + vars = vv + cmd = (" ".join(["mean_climate_driver.py", + "-p", "parameterfile.py", + "--vars", '{}'.format(vars)])) + lstcmd.append(cmd); del(cmd,vars,vkys) + + #create regions for regional mean of each variable + json.dump(reg_var_dic, + open(os.path.join("${results_dir}",'var_region_{{sub}}_catalogue.json'),"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + + #finally process the data in parallel + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + if len(lstcmd) > 0: + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + del(reg_var_dic,regional,lstcmd) + + #generate diagnostics figures + print("--- prepare for mean climate metrics plot ---") + parser = create_mean_climate_plot_parser() + parameter = parser.get_parameter(argparse_vals_only=False) + parameter.regions = default_regions + parameter.run_type = "${run_type}" + parameter.period = "{}-{}".format(test_start_yr,test_end_yr) + parameter.pcmdi_data_set = "{{pcmdi_data_set}}" + parameter.pcmdi_data_path = os.path.join('{{pcmdi_data_path}}',"mean_climate") + parameter.test_data_set = "{}.{}".format(test_cmip_name,"${case_id}") + parameter.test_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{% if run_type == "model_vs_obs" %} + parameter.refr_data_set = "" + parameter.refr_period = "" + parameter.refr_data_path = "" +{% elif run_type == "model_vs_model" %} + parameter.refr_data_set = "{}.{}".format(ref_cmip_name,"${case_id}") + parameter.refr_period = "{}-{}".format(ref_start_yr,ref_end_yr) + parameter.refr_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{%- endif %} + parameter.output_path = os.path.join("${results_dir}","graphics","mean_climate") + parameter.ftype = '{{ figure_format }}' + parameter.debug = {{ pmp_debug }} + parameter.parcord_show_markers = {{parcord_show_markers}} #False + parameter.add_vertical_line = {{portrait_vertical_line}} #True + + #generate diagnostics figures + print("--- generate mean climate metrics plot ---") + mean_climate_metrics_plot(parameter) + del(parameter) + +{%- endif %} + +{%- if "variability_mode" in subset %} + print("calculate mode variability metrics") +{%- if subset == "variability_mode_atm" %} + modes = list({{ atm_modes }}) +{% elif subset == "variability_mode_cpl" %} + modes = list({{ cpl_modes }}) +{%- endif %} + ##################################################################### + # call variability_modes_driver.py to process diagnostics + ##################################################################### + lstcmd = [] + for variability_mode in modes: + if variability_mode in ["NPO", "NPGO", "PSA1"]: + eofn_obs = "2" + eofn_mod = "2" + elif variability_mode in ["PSA2"]: + eofn_obs = "3" + eofn_mod = "3" + else: + eofn_obs = "1" + eofn_mod = "1" + cmd = (" ".join(['variability_modes_driver.py', + '-p', "parameterfile.py", + '--variability_mode', variability_mode, + '--eofn_mod', eofn_mod, + '--eofn_obs', eofn_obs ])) + lstcmd.append(cmd); del(cmd) + #finally process the data in parallel + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + del(lstcmd) +{%- endif %} + +{%- if "enso" in subset %} + ##################################################################### + # call enso_driver.py to process diagnostics + ##################################################################### + print("calculate enso metrics") + groups = list({{ groups }}) + lstcmd = [] + for metricsCollection in groups: + cmd = (" ".join(['enso_driver.py', + '-p', "parameterfile.py", + '--metricsCollection',metricsCollection])) + lstcmd.append(cmd); del(cmd) + #finally process the data in parallel + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + procs.append(proc) + while (childCount() > {{num_workers}}): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + del(lstcmd,procs) +{%- endif %} + +if __name__ == "__main__": + main() + +EOF + +################################ +# Run diagnostics +#command="srun -n 1 python -u pcmdi.py" +command="python -u pcmdi.py" +# Run diagnostics +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (10)' > {{ prefix }}.status + exit 9 +fi + +# Copy output to web server +echo +echo ===== COPY FILES TO WEB SERVER ===== +echo + +# Create top-level directory +web_dir=${www}/${case}/pcmdi_diags #/{{ sub }} +mkdir -p ${web_dir} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (10)' > {{ prefix }}.status + exit 10 +fi + +{% if machine in ['pm-cpu', 'pm-gpu'] %} +# For NERSC, make sure it is world readable +f=`realpath ${web_dir}` +while [[ $f != "/" ]] +do + owner=`stat --format '%U' $f` + if [ "${owner}" = "${USER}" ]; then + chgrp e3sm $f + chmod go+rx $f + fi + f=$(dirname $f) +done +{% endif %} + +# Copy files +#rsync -a --delete ${results_dir} ${web_dir}/ +rsync -a ${results_dir} ${web_dir}/ +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (11)' > {{ prefix }}.status + exit 11 +fi + +{% if machine in ['pm-cpu', 'pm-gpu'] %} +# For NERSC, change permissions of new files +pushd ${web_dir}/ +chgrp -R e3sm ${results_dir} +chmod -R go+rX,go-w ${results_dir} +popd +{% endif %} + +{% if machine in ['anvil', 'chrysalis'] %} +# For LCRC, change permissions of new files +pushd ${web_dir}/ +chmod -R go+rX,go-w ${results_dir} +popd +{% endif %} + +# Delete temporary workdir +cd .. +if [[ "${debug,,}" != "true" ]]; then + rm -rf ${workdir} +fi + +# Update status file and exit +{% raw %} +ENDTIME=$(date +%s) +ELAPSEDTIME=$(($ENDTIME - $STARTTIME)) +{% endraw %} +echo ============================================== +echo "Elapsed time: $ELAPSEDTIME seconds" +echo ============================================== +rm -f {{ prefix }}.status +echo 'OK' > {{ prefix }}.status +exit 0 diff --git a/zppy/templates/pcmdi_diags/cmip_variables.json b/zppy/templates/pcmdi_diags/cmip_variables.json new file mode 100755 index 00000000..e5c3336e --- /dev/null +++ b/zppy/templates/pcmdi_diags/cmip_variables.json @@ -0,0 +1,121 @@ +{ + "SImon":[ + "siu", + "siv", + "sitemptop", + "sisnmass", + "simass", + "sisnthick", + "sithick", + "sitimefrac", + "siconc" + ], + "Omon": [ + "areacello", + "fsitherm", + "hfds", + "masso", + "mlotst", + "sfdsi", + "sob", + "soga", + "sos", + "tauuo", + "tauvo", + "thetaoga", + "tob", + "tos", + "tosga", + "volo", + "wfo", + "zos", + "thetaoga", + "hfsifrazil", + "masscello", + "so", + "thetao", + "thkcello", + "uo", + "vo", + "volcello", + "wo", + "zhalfo" + ], + "lnd": [ + "mrsos", + "mrso", + "mrfso", + "mrros", + "mrro", + "prveg", + "evspsblveg", + "evspsblsoi", + "tran", + "tsl", + "lai" + ], + "atm": [ + "hur", + "hus", + "ta", + "ua", + "va", + "wap", + "zg", + "o3", + "pfull", + "phalf", + "tas", + "ts", + "psl", + "ps", + "sfcWind", + "huss", + "pr", + "prc", + "prsn", + "evspsbl", + "tauu", + "tauv", + "hfls", + "clt", + "rlds", + "rlus", + "rsds", + "rsdscs", + "rsus", + "rsuscs", + "hfss", + "cl", + "clw", + "cli", + "clivi", + "clwvi", + "prw", + "rldscs", + "rlut", + "rlutcs", + "rsdt", + "rsut", + "rsutcs", + "rtmt", + "abs550aer", + "od550aer", + "tasmax", + "tasmin", + "clisccp", + "cltisccp", + "albisccp", + "pctisccp", + "clcalipso", + "cltcalipso", + "cllcalipso", + "clmcalipso", + "clhcalipso" + ], + "fx": [ + "areacella", + "sftlf", + "orog" + ] +} diff --git a/zppy/templates/pcmdi_diags/derived_variable.json b/zppy/templates/pcmdi_diags/derived_variable.json new file mode 100755 index 00000000..6ca047ed --- /dev/null +++ b/zppy/templates/pcmdi_diags/derived_variable.json @@ -0,0 +1,26 @@ +{ + "rltcre":{ + "rlutcs" : 1, + "rlut" : -1 + }, + "rstcre":{ + "rsutcs" : 1, + "rsut" : -1 + }, + "netsw":{ + "rsds" : 1, + "rsus" : -1 + }, + "netlw":{ + "rlus" : 1, + "rlds" : -1 + }, + "netflux":{ + "rsds" : 1, + "rsus" : -1, + "rlds" : 1, + "rlus" : -1, + "hfls" : -1, + "hfss" : -1 + } +} diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py new file mode 100755 index 00000000..913621a9 --- /dev/null +++ b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py @@ -0,0 +1,670 @@ +#!/bin/env python +############################################################################## +# This model is used to generate mean climate diagnostic figures +# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) +############################################################################# +import os +import shutil + +import numpy as np +import pandas as pd +from mean_climate_plot_parser import ( + fill_plot_var_and_units, + find_metrics_data, + metrics_inquire, + shift_row_to_bottom, +) +from pcmdi_metrics.graphics import ( + Metrics, + normalize_by_median, + parallel_coordinate_plot, + portrait_plot, +) + + +def load_test_model_data(test_file, refr_file, mip, run_type): + # load the data and reorganize if needed + pd.set_option("future.no_silent_downcasting", True) + test_lib = Metrics(test_file) + + # model_vs_model, merge the reference model data into test model + if run_type == "model_vs_model": + refr_lib = Metrics(refr_file) + test_lib = test_lib.merge(refr_lib) + del refr_lib + + # collect and reorgnize test model data for plotting: + test_models = [] + for stat in test_lib.df_dict: + for season in test_lib.df_dict[stat]: + for region in test_lib.df_dict[stat][season]: + df = pd.DataFrame(test_lib.df_dict[stat][season][region]) + for i, model in enumerate(df["model"].tolist()): + model_run = df["model_run"].tolist()[i] + new_name = "{}-{}".format(mip.upper(), model_run.upper()) + idxs = df[df.iloc[:, 2] == model_run].index + df.loc[idxs, "model"] = list( + map( + lambda x: x.replace(model, new_name), + df.loc[idxs, "model"], + ) + ) + if new_name not in test_models: + test_models.append(new_name) + test_lib.df_dict[stat][season][region] = df + del df + return test_models, test_lib + + +def load_cmip_metrics_data(cmip_file): + # collect cmip multi-model ensemble data for comparison + pd.set_option("future.no_silent_downcasting", True) + cmip_lib = Metrics(cmip_file) + cmip_models = [] + highlight_models = [] + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + # now find all E3SM models in cmip6 + df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) + for model in df["model"].tolist(): + if model not in cmip_models: + cmip_models.append(model) + if ("e3sm" in model.lower()) and (model not in highlight_models): + highlight_models.append(model) + # move highlight_models to the end + for model in highlight_models: + idxs = df[df.iloc[:, 0] == model].index + cmip_models.remove(model) + cmip_models.append(model) + for idx in idxs: + df = shift_row_to_bottom(df, idx) + cmip_lib.df_dict[stat][season][region] = df + del df + return cmip_models, highlight_models, cmip_lib + + +def save_figure_data( + stat, region, season, var_names, var_units, data_dict, template, outdir +): + # construct output file name + fname = ( + template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", season) + ) + outfile = os.path.join(outdir, fname) + outdic = pd.DataFrame(data_dict) + outdic = outdic.drop(columns=["model_run"]) + for var in list(outdic.columns.values[3:]): + if var not in var_names: + print("{} is excluded from the {}".format(var, fname)) + outdic = outdic.drop(columns=[var]) + else: + # replace the variable with the name + units + outdic.columns.values[ + outdic.columns.values.tolist().index(var) + ] = var_units[var_names.index(var)] + + # save data to .csv file + outdic.to_csv(outfile) + del (fname, outfile, outdic) + return + + +def construct_port4sea_axis_lables( + var_names, cmip_models, test_models, highlight_models +): + model_list = cmip_models + test_models + # assign colors for labels of models + lable_colors = [] + for model in model_list: + if model in highlight_models: + lable_colors.append("#5170d7") + elif model in test_models: + lable_colors.append("#FC5A50") + else: + lable_colors.append("#000000") + + if len(model_list) > len(var_names): + xlabels = model_list + ylabels = var_names + landscape = True + else: + xlabels = var_names + ylabels = model_list + landscape = False + del model_list + return xlabels, ylabels, lable_colors, landscape + + +def construct_port4sea_data( + stat, + seasons, + region, + data_dict, + var_names, + var_units, + file_template, + outdir, + landscape, +): + # work array + data_all = dict() + # loop 4 seasons and collect data + for season in seasons: + # save raw metric results as a .csv file for each season + save_figure_data( + stat, + region, + season, + var_names, + var_units, + data_dict[stat][season][region], + file_template, + outdir, + ) + if stat == "cor_xy": + data_nor = data_dict[stat][season][region][var_names].to_numpy() + if landscape: + data_all[season] = data_nor.T + else: + data_all[season] = data_nor + del data_nor + elif stat == "bias_xy": + # calculate the relative bias + data_sea = data_dict[stat][season][region][var_names].to_numpy() + data_rfm = data_dict["mean-obs_xy"][season][region][var_names].to_numpy() + data_msk = np.where(np.abs(data_rfm) == 0.0, np.nan, data_rfm) + data_nor = data_sea * 100.0 / data_msk + if landscape: + data_all[season] = data_nor.T + else: + data_all[season] = data_nor + del (data_sea, data_rfm, data_msk, data_nor) + else: + data_sea = data_dict[stat][season][region][var_names].to_numpy() + if landscape: + data_sea = data_sea.T + data_all[season] = normalize_by_median(data_sea, axis=1) + else: + data_all[season] = normalize_by_median(data_sea, axis=0) + del data_sea + + # data for final plot + data_all_nor = np.stack( + [data_all["djf"], data_all["mam"], data_all["jja"], data_all["son"]] + ) + del data_all + return data_all_nor + + +def port4sea_plot( + stat, + region, + seasons, + data_dict, + var_names, + var_units, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + outdir, + add_vertical_line, + data_version=None, + watermark=False, +): + + # process figure + fontsize = 20 + var_names = sorted(var_names) + var_units = sorted(var_units) + + # construct the axis labels and colors + ( + xaxis_labels, + yaxis_labels, + lable_colors, + landscape, + ) = construct_port4sea_axis_lables( + var_names, cmip_models, test_models, highlight_models + ) + + # construct data for plotting + data_all_nor = construct_port4sea_data( + stat, + seasons, + region, + data_dict, + var_names, + var_units, + file_template, + outdir, + landscape, + ) + + if stat == "cor_xy": + cbar_label = "Pattern Corr." + var_range = (-1.0, 1.0) + cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] + elif stat == "bias_xy": + cbar_label = "{}, relative (%)".format(stat.upper()) + var_range = (-30.0, 30.0) + cmap_bounds = [-30.0, -20.0, -10.0, -5.0, -1, 0.0, 1.0, 5.0, 10.0, 20.0, 30.0] + else: + cbar_label = "{}, normalized by median".format(stat.upper()) + var_range = (-0.5, 0.5) + cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5] + + if landscape: + figsize = (40, 18) + legend_box_xy = (1.08, 1.18) + legend_box_size = 4 + legend_lw = 1.5 + shrink = 0.8 + legend_fontsize = fontsize * 0.8 + else: + figsize = (18, 25) + legend_box_xy = (1.25, 1) + legend_box_size = 3 + legend_lw = 1.5 + shrink = 1.0 + legend_fontsize = fontsize * 0.8 + + # Add Watermark/Logo + if watermark: + logo_rect = [0.85, 0.15, 0.07, 0.07] + logo_off = False + else: + logo_rect = [0, 0, 0, 0] + logo_off = True + + # Using Matplotlib-based PMP Visualization Function to Generate Portrait Plot + fig, ax, cbar = portrait_plot( + data_all_nor, + xaxis_labels=xaxis_labels, + yaxis_labels=yaxis_labels, + cbar_label=cbar_label, + cbar_label_fontsize=fontsize * 1.2, + box_as_square=True, + vrange=var_range, + figsize=figsize, + cmap="RdYlBu_r", + cmap_bounds=cmap_bounds, + cbar_kw={"extend": "both", "shrink": shrink}, + missing_color="white", + legend_on=True, + legend_labels=["DJF", "MAM", "JJA", "SON"], + legend_box_xy=legend_box_xy, + legend_box_size=legend_box_size, + legend_lw=legend_lw, + legend_fontsize=legend_fontsize, + logo_rect=logo_rect, + logo_off=logo_off, + ) + + if add_vertical_line: + ax.axvline( + x=len(xaxis_labels) - len(highlight_models) - len(test_models), + color="k", + linewidth=3, + ) + + if landscape: + ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") + ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") + for xtick, color in zip(ax.get_xticklabels(), lable_colors): + xtick.set_color(color) + ax.yaxis.label.set_color(lable_colors[0]) + else: + ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") + ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") + ax.xaxis.label.set_color(lable_colors[0]) + for ytick, color in zip(ax.get_yticklabels(), lable_colors): + ytick.set_color(color) + + ax.tick_params(axis="x", labelsize=fontsize) + ax.tick_params(axis="y", labelsize=fontsize) + + cbar.ax.tick_params(labelsize=fontsize) + + # Add title + ax.set_title( + "Model Performance of Seasonal Climatology ({}, {})".format( + stat.upper(), region.upper() + ), + fontsize=fontsize * 1.5, + pad=30, + ) + + # Add Watermark + if watermark: + ax.text( + 0.5, + 0.5, + "E3SM-PCMDI", + transform=ax.transAxes, + fontsize=100, + color="black", + alpha=0.5, + ha="center", + va="center", + rotation=25, + ) + # Add data info + fig.text( + 1.25, + 0.9, + "Data version\n" + data_version, + transform=ax.transAxes, + fontsize=12, + color="black", + alpha=0.6, + ha="left", + va="top", + ) + + # Save figure as an image file + figname = ( + figure_template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", "4season") + ) + figfile = os.path.join(outdir, figname) + fig.savefig(figfile, facecolor="w", bbox_inches="tight") + del ( + data_all_nor, + xaxis_labels, + yaxis_labels, + lable_colors, + ) + + return + + +def paracord_plot( + stat, + region, + season, + data_dict, + var_names, + var_units, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + outdir, + identify_all_models, + data_version=None, + watermark=False, +): + + # construct plotting data + var_names = sorted(var_names) + var_units = sorted(var_units) + + # write out the results as a table + save_figure_data( + stat, region, season, var_names, var_units, data_dict, file_template, outdir + ) + + # add ensemble mean + model_data = data_dict[var_names].to_numpy() + + # construct the string for plot + model_list = data_dict[ + "model" + ].to_list() # cmip_models + test_models + ["CMIP6 MME"] + model_list_group2 = highlight_models + test_models + models_to_highlight = test_models + [ + data_dict["model"].to_list()[-1] + ] # ["CMIP6 MME"] + figsize = (40, 12) + fontsize = 20 + legend_ncol = int(7 * figsize[0] / 40.0) + legend_posistion = (0.50, -0.14) + # color map for markers + colormap = "tab20_r" + # color map for highlight lines + xcolors = [ + "#000000", + "#e41a1c", + "#ff7f00", + "#4daf4a", + "#f781bf", + "#a65628", + "#984ea3", + "#999999", + "#377eb8", + "#dede00", + ] + lncolors = xcolors[1 : len(test_models) + 1] + [xcolors[0]] + # Add Watermark/Logo + if watermark: + logo_rect = [0.85, 0.15, 0.07, 0.07] + logo_off = False + else: + logo_rect = [0, 0, 0, 0] + logo_off = True + + xlabel = "Metric" + if "rms" in stat: + ylabel = "RMS Error (" + stat.upper() + ")" + elif "std" in stat: + ylabel = "Standard Deviation (" + stat.upper() + ")" + else: + ylabel = "value (" + stat.upper() + ")" + + if not np.isnan(model_data).all(): + print(model_data.min(), model_data.max()) + title = "Model Performance of {} Climatology ({}, {})".format( + season.upper(), stat.upper(), region.upper() + ) + fig, ax = parallel_coordinate_plot( + model_data, + var_units, + model_list, + model_names2=model_list_group2, + group1_name="CMIP6", + group2_name="E3SM", + models_to_highlight=models_to_highlight, + models_to_highlight_colors=lncolors, + models_to_highlight_labels=models_to_highlight, + identify_all_models=identify_all_models, # hide indiviaul model markers for CMIP6 models + vertical_center="median", + vertical_center_line=True, + title=title, + figsize=figsize, + axes_labelsize=fontsize * 1.1, + title_fontsize=fontsize * 1.1, + yaxes_label=ylabel, + xaxes_label=xlabel, + colormap=colormap, + show_boxplot=False, + show_violin=True, + violin_colors=("lightgrey", "pink"), + legend_ncol=legend_ncol, + legend_bbox_to_anchor=legend_posistion, + legend_fontsize=fontsize * 0.85, + xtick_labelsize=fontsize * 0.95, + ytick_labelsize=fontsize * 0.95, + logo_rect=logo_rect, + logo_off=logo_off, + ) + + # Add Watermark + if watermark: + ax.text( + 0.5, + 0.5, + "E3SM-PCMDI", + transform=ax.transAxes, + fontsize=100, + color="black", + alpha=0.5, + ha="center", + va="center", + rotation=25, + ) + # Add data info + fig.text( + 1.25, + 0.9, + "Data version\n" + data_version, + transform=ax.transAxes, + fontsize=12, + color="black", + alpha=0.6, + ha="left", + va="top", + ) + + # Save figure as an image file + figname = ( + figure_template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", season) + ) + figfile = os.path.join(outdir, figname) + fig.savefig(figfile, facecolor="w", bbox_inches="tight") + + del (model_data, model_list, model_list_group2, models_to_highlight) + + return + + +def mean_climate_metrics_plot(parameter): + # info for test simulation + test_mip = parameter.test_data_set.split(".")[0] + test_exp = parameter.test_data_set.split(".")[1] + test_product = parameter.test_data_set.split(".")[2] + test_case_id = parameter.test_data_set.split(".")[-1] + # output directory + outdir = os.path.join(parameter.output_path, test_mip, test_exp, test_case_id) + + # construct file template to save the figure data in .csv file + file_template = "%(metric)_%(region)_{}_{}_{}_{}_mean_climate_%(season)_{}.csv" + file_template = file_template.format( + parameter.run_type.upper(), + test_mip.upper(), + test_exp.upper(), + test_product.upper(), + parameter.period, + ) + # construct figure template + figure_template = file_template.replace("csv", parameter.ftype) + + # find the metrics data + test_file, refr_file, cmip_file = find_metrics_data(parameter) + + # load cmip metrics data + cmip_models, highlight_models, cmip_lib = load_cmip_metrics_data(cmip_file) + + # load test model metrics data + test_models, test_lib = load_test_model_data( + test_file, refr_file, test_mip, parameter.run_type + ) + # collect overlap sets of variables for plotting: + test_lib, cmip_lib, var_list, var_unit_list = fill_plot_var_and_units( + test_lib, cmip_lib + ) + # search overlap of regions in test and reference + regions = [] + for reg in parameter.regions: + if (reg in test_lib.regions) and (reg in cmip_lib.regions): + regions.append(reg) + + # merge the cmip and model data + merged_lib = cmip_lib.merge(test_lib) + + ################################### + # generate parallel coordinate plot + ################################### + parall_fig_dir = os.path.join(outdir, "paracord_annual") + if os.path.exists(parall_fig_dir): + shutil.rmtree(parall_fig_dir) + os.makedirs(parall_fig_dir) + print("Parallel Coordinate Plots (4 seasons), loop each region and metric....") + # add ensemble mean + for metric in [ + "rms_xyt", + "std-obs_xyt", + "std_xyt", + "rms_y", + "rms_devzm", + "std_xy_devzm", + "std-obs_xy_devzm", + ]: + for region in regions: + for season in ["ann"]: + data_dict = merged_lib.df_dict[metric][season][region] + data_dict.loc["CMIP MMM"] = cmip_lib.df_dict[metric][season][ + region + ].mean(numeric_only=True, skipna=True) + data_dict.at["CMIP MMM", "model"] = "CMIP MMM" + if parameter.parcord_show_markers is not None: + identify_all_models = parameter.parcord_show_markers + else: + identify_all_models = True + paracord_plot( + metric, + region, + season, + data_dict, + var_list, + var_unit_list, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + parall_fig_dir, + identify_all_models, + data_version=None, + watermark=False, + ) + del data_dict + + ################################### + # generate portrait plot + ################################### + ptrait_fig_dir = os.path.join(outdir, "portrait_4seasons") + if os.path.exists(ptrait_fig_dir): + shutil.rmtree(ptrait_fig_dir) + os.makedirs(ptrait_fig_dir) + print("Portrait Plots (4 seasons),loop each region and metric....") + ######################################################################### + seasons = ["djf", "mam", "jja", "son"] + data_dict = merged_lib.df_dict + for metric in ["rms_xy", "cor_xy", "bias_xy"]: + for region in regions: + print("working on {} in {} region".format(metrics_inquire(metric), region)) + if parameter.add_vertical_line is not None: + add_vertical_line = parameter.add_vertical_line + else: + add_vertical_line = False + port4sea_plot( + metric, + region, + seasons, + data_dict, + var_list, + var_unit_list, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + ptrait_fig_dir, + add_vertical_line, + data_version=None, + watermark=False, + ) + + # release the data space + del (merged_lib, cmip_lib, test_lib, var_unit_list, var_list, regions) + + return diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py new file mode 100755 index 00000000..7c66d895 --- /dev/null +++ b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python +import ast +import glob +import os + +import numpy as np +import pandas as pd +from pcmdi_metrics.mean_climate.lib import pmp_parser + + +def create_mean_climate_plot_parser(): + parser = pmp_parser.PMPMetricsParser() + parser.add_argument( + "--test_model", + dest="test_model", + help="Defines target model for the metrics plots", + required=False, + ) + + parser.add_argument( + "--test_data_set", + type=str, + nargs="+", + dest="test_data_set", + help="List of observations or models to test " + + "against the reference_data_set", + required=False, + ) + + parser.add_argument( + "--test_data_path", + dest="test_data_path", + help="Path for the test climitologies", + required=False, + ) + + parser.add_argument( + "--period", dest="period", help="A simulation parameter", required=False + ) + + parser.add_argument( + "--run_type", dest="run_type", help="A post-process parameter", required=False + ) + + parser.add_argument( + "--regions", + type=ast.literal_eval, + dest="regions", + help="Regions on which to run the metrics", + required=False, + ) + + parser.add_argument( + "--pcmdi_data_set", + type=str, + nargs="+", + dest="pcmdi_data_set", + help="PCMDI CMIP dataset that is used as a " + + "CMIP multi-model ensembles against the test_data_set", + required=False, + ) + + parser.add_argument( + "--pcmdi_data_path", + dest="pcmdi_data_path", + help="Path for the PCMDI CMIP mean climate metrics data", + required=False, + ) + + parser.add_argument( + "--refr_model", + dest="refr_model", + help="A simulation parameter", + required=False, + ) + + parser.add_argument( + "--refr_data_set", + type=str, + nargs="+", + dest="refr_data_set", + help="List of reference models to test " + "against the reference_data_set", + required=False, + ) + + parser.add_argument( + "--refr_data_path", + dest="refr_data_path", + help="Path for the reference model climitologies", + required=False, + ) + + parser.add_argument( + "--output_path", + dest="output_path", + help="Path for the metrics plots", + required=False, + ) + + parser.add_argument( + "--parcord_show_markers", + dest="parcord_show_markers", + help="show markers for individual model in parallel coordinate plots", + required=False, + ) + parser.add_argument( + "--add_vertical_line", + dest="add_vertical_line", + help="draw a vertical line to separate test and reference models for portrait plots", + required=False, + ) + return parser + + +def metrics_inquire(name): + # list of metrics name and long-name + metrics = { + "std-obs_xy": "Spatial Standard Deviation (Reference)", + "std_xy": "Spatial Standard Deviation (Model)", + "std-obs_xyt": "Spatial-temporal Standard Deviation (Reference)", + "std_xyt": "Spatial-temporal Standard Deviation (Model)", + "std-obs_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Reference)", + "mean_xy": "Area Weighted Spatial Mean (Model)", + "mean-obs_xy": "Area Weighted Spatial Mean (Reference)", + "std_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Model)", + "rms_xyt": "Spatio-Temporal Root Mean Square Error", + "rms_xy": "Spatial Root Mean Square Error", + "rmsc_xy": "Centered Spatial Root Mean Square Error", + "cor_xy": "Spatial Pattern Correlation Coefficient", + "bias_xy": "Mean Bias (Model - Reference)", + "mae_xy": "Mean Absolute Difference (Model - Reference)", + "rms_y": "Root Mean Square Error of Zonal Mean", + "rms_devzm": "Root Mean Square Error of Deviation From Zonal Mean", + } + if name in metrics.keys(): + long_name = metrics[name] + + return long_name + + +def find_latest(pmprdir, mip, exp): + versions = sorted( + [ + r.split("/")[-1] + for r in glob.glob(os.path.join(pmprdir, mip, exp, "v????????")) + ] + ) + latest_version = versions[-1] + return latest_version + + +def shift_row_to_bottom(df, index_to_shift): + idx = [i for i in df.index if i != index_to_shift] + return df.loc[idx + [index_to_shift]] + + +def find_cmip_metric_data(pmprdir, data_set, var): + # cmip data for comparison + mip = data_set.split(".")[0] + exp = data_set.split(".")[1] + case_id = data_set.split(".")[2] + if case_id == "": + case_id = find_latest(pmprdir, mip, exp) + fpath = glob.glob(os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format(var))) + if len(fpath) < 1 and var == "rtmt": + fpath = glob.glob( + os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format("rt")) + ) + if len(fpath) > 0 and os.path.exists(fpath[0]): + cmip_list = fpath[0] + return_code = 0 + else: + print("Warning: cmip metrics data not found for {}....".format(var)) + print("Warning: remove {} from the metric list....".format(var)) + cmip_list = None + return_code = -99 + return cmip_list, return_code + + +def select_models(df, selected_models): + # Selected models only + model_names = df["model"].tolist() + for model_name in model_names: + drop_model = True + for keyword in selected_models: + if keyword in model_name: + drop_model = False + break + if drop_model: + df.drop(df.loc[df["model"] == model_name].index, inplace=True) + df.reset_index(drop=True, inplace=True) + + return df + + +def exclude_models(df, excluded_models): + # eclude models + model_names = df["model"].tolist() + for model_name in model_names: + drop_model = False + for keyword in excluded_models: + if keyword in model_name: + drop_model = True + break + if drop_model: + df.drop(df.loc[df["model"] == model_name].index, inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +def fill_plot_var_and_units(model_lib, cmip_lib): + # we define fixed sets of variables used for final plotting. + units_all = { + "prw": "[kg m$^{-2}$]", + "pr": "[mm d$^{-1}$]", + "prsn": "[mm d$^{-1}$]", + "prc": "[mm d$^{-1}$]", + "hfls": "[W m$^{-2}$]", + "hfss": "[W m$^{-2}$]", + "clivi": "[kg $m^{-2}$]", + "clwvi": "[kg $m^{-2}$]", + "psl": "[Pa]", + "evspsbl": "[kg m$^{-2} s^{-1}$]", + "rlds": "[W m$^{-2}$]", + "rldscs": "[W $m^{-2}$]", + "rtmt": "[W m$^{-2}$]", + "rsdt": "[W m$^{-2}$]", + "rlus": "[W m$^{-2}$]", + "rluscs": "[W m$^{-2}$]", + "rlut": "[W m$^{-2}$]", + "rlutcs": "[W m$^{-2}$]", + "rsds": "[W m$^{-2}$]", + "rsdscs": "[W m$^{-2}$]", + "rstcre": "[W m$^{-2}$]", + "rltcre": "[W m$^{-2}$]", + "rsus": "[W m$^{-2}$]", + "rsuscs": "[W m$^{-2}$]", + "rsut": "[W m$^{-2}$]", + "rsutcs": "[W m$^{-2}$]", + "ts": "[K]", + "tas": "[K]", + "tauu": "[Pa]", + "tauv": "[Pa]", + "sfcWind": "[m s$^{-1}$]", + "zg-500": "[m]", + "ta-200": "[K]", + "ta-850": "[K]", + "ua-200": "[m s$^{-1}$]", + "ua-850": "[m s$^{-1}$]", + "va-200": "[m s$^{-1}$]", + "va-850": "[m s$^{-1}$]", + "uas": "[m s$^{-1}$]", + "vas": "[m s$^{-1}$]", + "tasmin": "[K]", + "tasmax": "[K]", + "clt": "[%]", + } + + # loop variable list and find them in cmip and target models + variable_units = [] + variable_names = [] + for var in units_all.keys(): + # reorgnize cmip data + if var == "rtmt": + if ("rt" in cmip_lib.var_list) and ("rtmt" in model_lib.var_list): + # special case (rt is used in pcmdi datasets, but rtmt is for cmip) + cmip_lib.var_list = list( + map(lambda x: x.replace("rt", "rtmt"), cmip_lib.var_list) + ) + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + cmip_lib.df_dict[stat][season][region][ + "rtmt" + ] = cmip_lib.df_dict[stat][season][region].pop("rt") + + if var in model_lib.var_list and var in cmip_lib.var_list: + varunt = var + "\n" + str(units_all[var]) + indv1 = cmip_lib.var_list.index(var) + indv2 = model_lib.var_list.index(var) + cmip_lib.var_unit_list[indv1] = varunt + model_lib.var_unit_list[indv2] = varunt + variable_units.append(varunt) + variable_names.append(var) + del (indv1, indv2, varunt) + else: + print("Warning: {} is not found in metrics data".format(var)) + print( + "Warning: {} is possibly not included as default in fill_plot_var_and_units()".format( + var + ) + ) + + # sanity check for cmip data + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) + for i, model in enumerate(df["model"].tolist()): + if model in ["E3SM-1-0", "E3SM-1-1-ECA"]: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "ta-850"] = np.nan + del idxs + if model in ["CIESM"]: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "pr"] = np.nan + del idxs + cmip_lib.df_dict[stat][season][region] = df + del df + + return model_lib, cmip_lib, variable_names, variable_units + + +def find_metrics_data(parameter): + pmp_set = parameter.pcmdi_data_set + pmp_path = parameter.pcmdi_data_path + test_set = parameter.test_data_set + test_path = parameter.test_data_path + refr_set = parameter.refr_data_set + refr_path = parameter.refr_data_path + run_type = parameter.run_type + debug = parameter.debug + + test_mip = test_set.split(".")[0] + test_exp = test_set.split(".")[1] + test_case_id = test_set.split(".")[-1] + test_dir = os.path.join(test_path, test_mip, test_exp, test_case_id) + if run_type == "model_vs_model": + refr_mip = refr_set.split(".")[0] + refr_exp = refr_set.split(".")[1] + refr_case_id = refr_set.split(".")[-1] + refr_dir = os.path.join(refr_path, refr_mip, refr_exp, refr_case_id) + + variables = [ + s.split("/")[-1].split("_")[0] + for s in glob.glob(os.path.join(test_dir, "*{}.json".format(test_case_id))) + if os.path.exists(s) + ] + variables = list(set(variables)) + + # find list of metrics data files + test_list = [] + refr_list = [] + cmip_list = [] + + for vv in variables: + ftest = glob.glob( + os.path.join(test_dir, "{}_*_{}.json".format(vv, test_case_id)) + ) + fcmip, rcode = find_cmip_metric_data(pmp_path, pmp_set, vv) + if rcode == 0: + if len(ftest) > 0 and len(fcmip) > 0: + for fx in ftest: + test_list.append(fx) + cmip_list.append(fcmip) + if debug: + print(ftest[0].split("/")[-1], fcmip.split("/")[-1]) + if run_type == "model_vs_model": + frefr = glob.glob( + os.path.join(refr_dir, "{}_*_{}.json".format(vv, refr_case_id)) + ) + if len(frefr) > 0: + for fr in frefr: + refr_list.append(fr) + if debug: + print( + ftest[0].split("/")[-1], + frefr[0].split("/")[-1], + fcmip.split("/")[-1], + ) + del frefr + del (ftest, fcmip) + return test_list, refr_list, cmip_list diff --git a/zppy/templates/pcmdi_diags/observation_to_cmip.py b/zppy/templates/pcmdi_diags/observation_to_cmip.py new file mode 100755 index 00000000..7b6eccee --- /dev/null +++ b/zppy/templates/pcmdi_diags/observation_to_cmip.py @@ -0,0 +1,85 @@ +#! /usr/bin/env python +import glob +import json +import os +import shutil +import subprocess + +# command = shlex.split("bash -c 'source init_env && env'") +# proc = subprocess.Popen(command, stdout = subprocess.PIPE) + +srcdir = "/lcrc/group/e3sm/ac.szhang/acme_scratch/e3sm_project/test_zppy_pmp/zppy" +cmip_var = json.load( + open(os.path.join(srcdir, "zppy/templates/pcmdi_diags", "cmip_var.json")) +) +ref_dic = json.load( + open(os.path.join(srcdir, "zppy/templates/pcmdi_diags", "reference_data.json")) +) + +output_path = ( + "/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/time-series/NOAA_20C" +) + + +default_metadata = os.path.join( + srcdir, "zppy/templates/pcmdi_diags/default_metadata.json" +) +tables_path = "/lcrc/group/e3sm/diagnostics/cmip6-cmor-tables/Tables" + +input_path = os.path.join(output_path, "input_data") +if not os.path.exists(input_path): + os.makedirs(input_path) + +raw_data_path = "/lcrc/group/acme/ac.szhang/acme_scratch/data/CVDP_RGD/NOAA_20C" +fpaths = sorted(glob.glob(os.path.join(raw_data_path, "{}*.nc".format("NOAA_20C")))) +for fpath in fpaths: + fname = fpath.split("/")[-1] + fname = fname.replace("-", ".") + fout = "_".join(fname.split(".")[2:]) + fout = os.path.join(input_path, fout.replace("_nc", ".nc")) + print("input: ", fpath) + print("output: ", fout) + if os.path.islink(fout): + os.remove(fout) + os.symlink(fpath, fout) + else: + os.symlink(fpath, fout) + del (fname, fout) +del (fpaths, raw_data_path) + +for key in cmip_var.keys(): + cmip_var_list = ", ".join(cmip_var[key]) + print(cmip_var_list) + subprocess.call( + [ + "e3sm_to_cmip", + "--output-path", + output_path, + "--var-list", + cmip_var_list, + "--input-path", + input_path, + "--user-metadata", + default_metadata, + "--tables-path", + tables_path, + ] + ) + +# move data to target location +opaths = sorted(glob.glob(os.path.join(output_path, "CMIP6/CMIP/*/*/*/*/*/*/*/*/*.nc"))) +for opath in opaths: + outfile = opath.split("/")[-1] + outname = outfile.replace("-", "_").split("_") + fout = "_".join([outname[0], outname[-2], outname[-1]]) + fout = os.path.join(output_path, fout.replace("_nc", ".nc")) + if os.path.exists(opath): + os.rename(opath, fout) + del (outfile, outname, fout) + +# clean up directory +if os.path.exists(os.path.join(output_path, "CMIP6")): + shutil.rmtree(os.path.join(output_path, "CMIP6")) + +if os.path.exists(input_path): + shutil.rmtree(input_path) diff --git a/zppy/templates/pcmdi_diags/plot_mean_climate.py b/zppy/templates/pcmdi_diags/plot_mean_climate.py new file mode 100755 index 00000000..e4abd119 --- /dev/null +++ b/zppy/templates/pcmdi_diags/plot_mean_climate.py @@ -0,0 +1,84 @@ +#!/bin/env python +############################################################################## +# This model is used to generate mean climate diagnostic figures +# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) +############################################################################# +import os + +from mean_climate_plot_driver import mean_climate_metrics_plot +from mean_climate_plot_parser import create_mean_climate_plot_parser + + +def main( + run_type, + test_data_set, + test_data_dir, + test_period, + refr_data_set, + refr_data_dir, + refr_period, + cmip_data_set, + pcmdi_data_dir, + results_dir, +): + parser = create_mean_climate_plot_parser() + parameter = parser.get_parameter(argparse_vals_only=False) + + parameter.pcmdi_data_set = cmip_data_set + parameter.pcmdi_data_path = pcmdi_data_dir + + parameter.period = test_period + parameter.test_product = test_data_set.split(".")[2] + parameter.test_data_set = test_data_set + parameter.test_data_path = os.path.join(test_data_dir, "mean_climate") + parameter.run_type = run_type + + if parameter.run_type == "model_vs_model": + parameter.refr_data_set = refr_data_set + parameter.refr_period = refr_period + parameter.refr_data_path = os.path.join(refr_data_dir, "mean_climate") + + parameter.output_path = os.path.join(results_dir, "graphics", "mean_climate") + parameter.ftype = "png" + parameter.debug = False + parameter.regions = ["global", "NHEX", "SHEX", "TROPICS"] + parameter.parcord_show_markers = False + parameter.add_vertical_line = True + + mean_climate_metrics_plot(parameter) + + +if __name__ == "__main__": + cmip_data_set = "cmip6.amip.v20241029" + pcmdi_data_dir = ( + "/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/pcmdi_data/mean_climate" + ) + results_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" + run_type = "model_vs_obs" + + test_data_set = "e3sm.amip.v3-LR.all.v20241030" + test_data_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" + test_period = "1985-2014" + + if run_type == "model_vs_obs": + refr_data_set = "" + refr_data_dir = "" + refr_period = "" + else: + print("need to provide reference data information ...") + refr_data_set = "e3sm.historical.v3-LR.all.v20241030" + refr_data_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" + refr_period = "1985-2014" + + main( + run_type, + test_data_set, + test_data_dir, + test_period, + refr_data_set, + refr_data_dir, + refr_period, + cmip_data_set, + pcmdi_data_dir, + results_dir, + ) diff --git a/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py b/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py new file mode 100755 index 00000000..8e265a14 --- /dev/null +++ b/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +import copy +import glob +import json +import os + +from pcmdi_metrics.utils import StringConstructor +from pcmdi_metrics.variability_mode.lib import dict_merge + + +def main(): + mip = "e3sm" + exp = "amip" + case_id = "v20241030" + period = "1985-2014" + metric_collection = "mean_climate" + run_type = "model_vs_obs" + data_path = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi" + obs_selection = "default" + + # target here is to merge all product models at all realizations to one-big file + # product = ['v3.LR'] + # realm = ["0101", "0151", "0201"] + + # template for diagnostic directory tree + # construct the directory for specific mpi, exp and case + pmprdir_template = StringConstructor( + "%(product).%(exp)_%(realization)/pcmdi_diags/%(run_type)_%(period)" + ) + pmprdir = os.path.join( + data_path, + pmprdir_template( + mip=mip, + exp=exp, + case_id=case_id, + product="*", + realization="*", + run_type=run_type, + period=period, + ), + ) + print("pmprdir:", pmprdir) + + # template for metrics directory tree + json_file_dir_template = StringConstructor( + "metrics_results/%(metric_collection)/%(mip)/%(exp)/%(case_id)" + ) + json_file_dir = os.path.join( + pmprdir, + json_file_dir_template( + metric_collection=metric_collection, + mip=mip, + exp=exp, + case_id=case_id, + ), + ) + print("json_file_dir:", json_file_dir) + + # template for output directory tree + out_file_dir_template = StringConstructor( + "%(run_type)_%(period)/%(metric_collection)/%(mip)/%(exp)/%(case_id)" + ) + out_file_dir = os.path.join( + data_path, + "merged_data", + out_file_dir_template( + metric_collection=metric_collection, + mip=mip, + exp=exp, + case_id=case_id, + run_type=run_type, + period=period, + ), + ) + print("out_file_dir:", out_file_dir) + variables = [ + s.split("/")[-1] + for s in glob.glob( + os.path.join( + json_file_dir, + "*", + ) + ) + if os.path.isdir(s) + ] + variables = list(set(variables)) + print("variables:", variables) + + for var in variables: + # json merge + # try: + if 1: + merge_json( + mip, exp, case_id, var, obs_selection, json_file_dir, out_file_dir + ) + """ + except Exception as err: + print("ERROR: ", mip, exp, var, err) + pass + """ + + +def merge_json(mip, exp, case_id, var, obs, json_file_dir, out_file_dir): + print("json_file_dir:", json_file_dir) + json_file_template = StringConstructor( + "%(var)_%(model)_%(realization)_*_%(obs)_%(case_id).json" + ) + # Search for individual JSONs + json_files = sorted( + glob.glob( + os.path.join( + json_file_dir, + var, + json_file_template( + var=var, + model="*", + realization="*", + obs=obs, + case_id=case_id, + ), + ) + ) + ) + + print("json_files:", json_files) + + # Remove diveDown JSONs and previously generated merged JSONs if included + json_files_revised = copy.copy(json_files) + for j, json_file in enumerate(json_files): + filename_component = json_file.split("/")[-1].split(".")[0].split("_") + if "allModels" in filename_component: + json_files_revised.remove(json_file) + elif "allRuns" in filename_component: + json_files_revised.remove(json_file) + + # Load individual JSON and merge to one big dictionary + for j, json_file in enumerate(json_files_revised): + print(j, json_file) + f = open(json_file) + dict_tmp = json.loads(f.read()) + if j == 0: + dict_final = dict_tmp.copy() + else: + dict_merge(dict_final, dict_tmp) + f.close() + + # Dump final dictionary to JSON + if not os.path.exists(out_file_dir): + os.makedirs(out_file_dir) + + final_json_filename = StringConstructor("%(var)_%(mip)_%(exp)_%(case_id).json")( + var=var, mip=mip, exp=exp, case_id=case_id + ) + final_json_file = os.path.join(out_file_dir, final_json_filename) + if os.path.exists(final_json_file): + # previously generated merged JSONs if included + os.remove(final_json_file) + + with open(final_json_file, "w") as fp: + json.dump(dict_final, fp, sort_keys=True, indent=4) + + +if __name__ == "__main__": + main() diff --git a/zppy/templates/pcmdi_diags/process_sftlf.py b/zppy/templates/pcmdi_diags/process_sftlf.py new file mode 100755 index 00000000..032625b1 --- /dev/null +++ b/zppy/templates/pcmdi_diags/process_sftlf.py @@ -0,0 +1,61 @@ +#!/bin/env python +################################################################## +# This script attemts to generate land/sea mask for a given input +################################################################## +import datetime +import os +import sys + +import cdms2 as cdm +import cdutil +import numpy as np + +if len(sys.argv) > 4: + modvar = sys.argv[1] + modname = sys.argv[2] + modpath = sys.argv[3] + modpath_lf = sys.argv[4] +else: + print("ERROR: must specify {modname},{modpath},{outpath} info") + exit() + +# Set netcdf file criterion - turned on from default 0s +cdm.setCompressionWarnings(0) # Suppress warnings +cdm.setNetcdfShuffleFlag(0) +cdm.setNetcdfDeflateFlag(1) +cdm.setNetcdfDeflateLevelFlag(9) +cdm.setAutoBounds(1) + +cdm.setNetcdfDeflateLevelFlag(9) +cdm.setAutoBounds(1) +f_h = cdm.open(modpath) +var = f_h(modvar)[0, ...] +if var.ndim == 2: + landMask = cdutil.generateLandSeaMask(var) + # Deal with land values + landMask[np.greater(landMask, 1e-15)] = 100 + # Rename + landMask = cdm.createVariable( + landMask, id="sftlf", axes=var.getAxisList(), typecode="float32" + ) + landMask.associated_files = modpath + landMask.long_name = "Land Area Fraction" + landMask.standard_name = "land_area_fraction" + landMask.units = "%" + landMask.setMissing(1.0e20) + landMask.id = "sftlf" # Rename + + # Write variables to file + print("output sftlf:", modpath_lf) + if os.path.isfile(modpath_lf): + os.remove(modpath_lf) + fOut = cdm.open(modpath_lf, "w") + # Use function to write standard global atts + fOut.Conventions = "CF-1.0" + fOut.history = "File processed: " + datetime.datetime.now().strftime("%Y%m%d") + fOut.pcmdi_metrics_version = "0.1-alpha" + fOut.pcmdi_metrics_comment = "PCMDI metrics package" + fOut.write(landMask.astype("float32")) + fOut.close() + f_h.close() + del (f_h, landMask, fOut, var) diff --git a/zppy/templates/pcmdi_diags/reference_alias.json b/zppy/templates/pcmdi_diags/reference_alias.json new file mode 100755 index 00000000..a23f6349 --- /dev/null +++ b/zppy/templates/pcmdi_diags/reference_alias.json @@ -0,0 +1,340 @@ +{ + "rlds" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rldscs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlus" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsds" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsdscs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + + "rsus" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsuscs": { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rstcre" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rltcre" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlut" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlutcs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsdt" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsut" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsutcs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rtmt" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "pr" : { + "default" : "GPCP_v2.3", + "alternate" : "GPCP_v2.2", + "alternate1" : "GPCP_1DD", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "prc" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "prsn" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "prw" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "MERRA2", + "alternate2" : "ERA-Interim", + "alternate3" : "NOAA-20C" + }, + "psl" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ps" : { + "default" : "ERA5", + "alternate " : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "huss" : { + "default" : "MERRA2", + "alternate" : "NOAA-20C", + "alternate1" : "ERA5", + "alternate2" : "ERA-Interim" + }, + "ta" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ua" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "va" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "hur" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "wap" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "zg" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "o3" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "hus" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "uas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "vas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "tauu" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "taux" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tauv" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tauy" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ts" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "HadISST2" + }, + "sst" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "HadISST2" + }, + "sfcWind" : { + "default" : "NOAA-20C", + "alternate" : "ERA5", + "alternate1" : "MERRA2", + "alternate2" : "ERA-Interim" + }, + "hfls" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "OAFlux" + }, + "hfss" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "OAFlux" + }, + "evspsbl" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "clt" : { + "default" : "ERA5", + "alternate3" : "NOAA-20C" + }, + "clwvi" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "clivi" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "tasmin" : { + "default" : "MERRA2" + }, + "tasmax" : { + "default" : "MERRA2" + }, + "sic" : { + "default" : "HadSST2" + }, + "tos" : { + "default" : "HadSST2" + }, + "zos" : { + "default" : "AVISO", + "alternate" : "HadISST" + }, + "sos" : { + "default" : "Aquarius", + "alternate" : "HadISST" + } +} diff --git a/zppy/templates/pcmdi_diags/regions_specs.json b/zppy/templates/pcmdi_diags/regions_specs.json new file mode 100755 index 00000000..811eb1e9 --- /dev/null +++ b/zppy/templates/pcmdi_diags/regions_specs.json @@ -0,0 +1,263 @@ +{ + "global": { + "domain": { "latitude":[-90.0, 90.0]} + }, + "NH": { + "domain": { "latitude":[0.0, 90.0]} + }, + "SH": { + "domain": { "latitude":[-90.0, 0]} + }, + "NHEX": { + "domain": { "latitude":[30.0, 90.0]} + }, + "SHEX": { + "domain": { "latitude":[-90.0, -30.0]} + }, + "TROPICS": { + "domain": { "latitude":[-30.0, 30.0]} + }, + "90S50S": { + "domain": { "latitude":[-90.0, -50.0]} + }, + "50S20S": { + "domain": { "latitude":[-50.0, -20.0]} + }, + "20S20N": { + "domain": { "latitude":[-20.0, 20.0]} + }, + "20N50N": { + "domain": { "latitude":[20.0, 50.0]} + }, + "50N90N": { + "domain": { "latitude":[50.0, 90.0]} + }, + "ocean_NH": { + "value": 0.0, + "domain": { "latitude":[0.0, 90.0]} + }, + "ocean_SH": { + "value": 0.0, + "domain": { "latitude":[-90.0, 0.0]} + }, + "land_NH": { + "value": 100, + "domain": { "latitude":[0.0, 90.0]} + }, + "land_SH": { + "value": 100, + "domain": { "latitude":[-90.0, 0.0]} + }, + "land_NHEX": { + "value": 100, + "domain": { "latitude":[30.0, 90.0]} + }, + "land_SHEX": { + "value": 100, + "domain": { "latitude":[-90.0, -30.0]} + }, + "land_TROPICS": { + "value": 100, + "domain": { "latitude":[-30.0, 30.0]} + }, + "land": { + "value": 100 + }, + "ocean_NHEX": { + "value": 0, + "domain": { "latitude":[30.0, 90.0]} + }, + "ocean_SHEX": { + "value": 0, + "domain": { "latitude":[-90.0, -30.0]} + }, + "ocean_TROPICS": { + "value": 0, + "domain": { "latitude":[30.0, 30.0]} + }, + "ocean": { + "value": 0 + }, + "ocean_50S50N": { + "value": 0.0, + "domain": { "latitude":[-50.0, 50.0]} + }, + "ocean_50S20S": { + "value": 0.0, + "domain": { "latitude":[-50.0, -20.0]} + }, + "ocean_20S20N": { + "value": 0.0, + "domain": { "latitude":[-20.0, 20.0]} + }, + "ocean_20N50N": { + "value": 0.0, + "domain": { "latitude":[20.0, 50.0]} + }, + "ocean_50N90N": { + "value": 0.0, + "domain": { "latitude":[50.0, 90.0]} + }, + "ocean_90S50S": { + "value": 0.0, + "domain": { "latitude":[-90.0, -50.0]} + }, + "NAM": { + "domain": { "latitude":[20.0, 90], + "longitude":[-180, 180]} + }, + "NAO": { + "domain": { "latitude":[20.0, 80], + "longitude":[-90, 40]} + }, + "SAM": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PSA1": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PSA2": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PNA": { + "domain": { "latitude":[20.0, 85], + "longitude":[120, 240]} + }, + "PDO": { + "domain": { "latitude":[20.0, 70], + "longitude":[110, 260]} + }, + "AMO": { + "domain": { "latitude":[0.0, 70], + "longitude":[-80, 0]} + }, + "AllMW": { + "domain": { "latitude":[-40.0, 45.0], + "longitude":[0.0, 360.0]} + }, + "AllM": { + "domain": { "latitude":[-45.0, 45.0], + "longitude":[0.0, 360.0]} + }, + "NAMM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[210.0, 310.0]} + }, + "SAMM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[240.0, 330.0]} + }, + "NAFM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[310.0, 60.0]} + }, + "SAFM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[0.0, 90.0]} + }, + "ASM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[60.0, 180.0]} + }, + "AUSM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[90.0, 160.0]} + }, + "AIR": { + "domain": { "latitude":[7.0, 25.0], + "longitude":[65.0, 85.0]} + }, + "AUS": { + "domain": { "latitude":[-20.0, -10.0], + "longitude":[120.0, 150.0]} + }, + "Sahel": { + "domain": { "latitude":[13.0, 18.0], + "longitude":[-10.0, 10.0]} + }, + "GoG": { + "domain": { "latitude":[0.0, 5.0], + "longitude":[-10.0, 10.0]} + }, + "NAmo": { + "domain": { "latitude":[20.0, 37.0], + "longitude":[-112.0, -103.0]} + }, + "SAmo": { + "domain": { "latitude":[-20.0, 2.5], + "longitude":[-65.0, -40.0]} + }, + "Nino34": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[190.0, 240.0]} + }, + "Nino3": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[210.0, 270.0]} + }, + "Nino4": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[160.0, 210.0]} + }, + "ONI": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[190.0, 240.0]} + }, + "Nino12": { + "value": 0.0, + "domain": { "latitude":[-10.0, 0.0], + "longitude":[270.0, 280.0]} + }, + "AMMS": { + "value": 0.0, + "domain": { "latitude":[-15.0, -5.0], + "longitude":[-20.0, 10.0]} + }, + "AMMN": { + "value": 0.0, + "domain": { "latitude":[5.0, 15.0], + "longitude":[-50.0, -20.0]} + }, + "ATL3": { + "value": 0.0, + "domain": { "latitude":[-3.0, 3.0], + "longitude":[-20.0, 0.0]} + }, + "TSA": { + "value": 0.0, + "domain": { "latitude":[-20.0, 0.0], + "longitude":[-30.0, 10.0]} + }, + "TNA": { + "value": 0.0, + "domain": { "latitude":[5.5, 23.5], + "longitude":[302.5, 345.0]} + }, + "TIO": { + "value": 0.0, + "domain": { "latitude":[-15.0, 15.0], + "longitude":[40.0, 115.0]} + }, + "IODE": { + "value": 0.0, + "domain": { "latitude":[-10.0, 10.0], + "longitude":[50.0, 70.0]} + }, + "IODW": { + "value": 0.0, + "domain": { "latitude":[-10.0, 0.0], + "longitude":[90.0, 110.0]} + }, + "SOCN": { + "value": 0.0, + "domain": { "latitude":[-70.0, -50.0], + "longitude":[0.0, 360.0]} + } +} diff --git a/zppy/templates/ts.bash b/zppy/templates/ts.bash old mode 100644 new mode 100755 index 30ac3a3b..81baba5f --- a/zppy/templates/ts.bash +++ b/zppy/templates/ts.bash @@ -147,19 +147,39 @@ EOF cp -s $dest/*_{{ '%04d' % (yr_start) }}??_{{ '%04d' % (yr_end) }}??.nc $input_dir dest_cmip={{ output }}/post/{{ component }}/{{ grid }}/cmip_ts/{{ frequency }} mkdir -p ${dest_cmip} + {{ e3sm_to_cmip_environment_commands }} + + {% if input_files.split(".")[0] == 'cam' or input_files.split(".")[0] == 'eam' -%} + #add code to do vertical interpolation variables at model levels before e3sm_to_cmip + IFS=',' read -ra mlvars <<< "{{ interp_vars }}" + for var in "${mlvars[@]}" + do + for file in ${input_dir}/${var}_{{ '%04d' % (yr_start) }}??_{{ '%04d' % (yr_end) }}??.nc + do + if [ -f ${file} ]; then + #ncks --rgr xtr_mth=mss_val --vrt_fl='{{cmip_plevdata}}' ${file} ${file}.plev + ncremap -p mpi --vrt_ntp=log --vrt_xtr=mss_val --vrt_out='{{cmip_plevdata}}' ${file} ${file}.plev + #overwrite the model level data + mv ${file}.plev ${file} + fi + done + done + {% endif -%} + + #call e3sm_to_cmip srun -N 1 e3sm_to_cmip \ --output-path \ ${dest_cmip}/${tmp_dir} \ {% if input_files.split(".")[0] == 'clm2' or input_files.split(".")[0] == 'elm' -%} --var-list \ - 'mrsos, mrso, mrfso, mrros, mrro, prveg, evspsblveg, evspsblsoi, tran, tsl, lai, cLitter, cProduct, cSoilFast, cSoilMedium, cSoilSlow, fFire, fHarvest, cVeg, nbp, gpp, ra, rh' \ + 'snd, mrsos, mrso, mrfso, mrros, mrro, prveg, evspsblveg, evspsblsoi, tran, tsl, lai, cLitter, cProduct, cSoilFast, cSoilMedium, cSoilSlow, fFire, fHarvest, cVeg, nbp, gpp, ra, rh' \ --realm \ lnd \ {% endif -%} {% if input_files.split(".")[0] == 'cam' or input_files.split(".")[0] == 'eam' -%} --var-list \ - 'pr, tas, rsds, rlds, rsus' \ + 'ua, va, ta, wa, zg, hur, pr, prc, prsn, ts, tas, prw, psl, sfcWind, tasmax, tasmin, tauu, tauv, rtmt, rsdt, rsds, rsdscs, rlds, rldscs, rsus, rsuscs, rsut, rsutcs, rlus, rlut, rlutcs, clivi, clwvi, clt, evspsbl, hfls, hfss, huss' \ --realm \ atm \ {% endif -%}