diff --git a/.gitignore b/.gitignore
index 8c9d282..7cfce85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@
.ipynb_checkpoints
.DS_Store
*.swp
+
+# WIP folders
+scripts/ouranos-crcm5-cmip6/
diff --git a/CHANGELOG b/CHANGELOG
index 9110b1b..b6bc69b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,20 @@
Changelog
=========
+[v0.5.0] March 5th, 2024
+# Improvements
+ * `DATASETS` file now describes all the datasets available in the script
+ * new parallelization schemes are introduced using models, scenarios,
+ and ensemble members
+ * the `assets` directory is now more organized separating common NCL
+ and bash scripts needed
+ * styles of the script is updated (not completely) to be more compatible
+ with Google's shell scripting style guidelines
+ * Documentations have been updated
+# Datasets
+ * Ouranos ESPO-G6-R2 CMIP6 script added (~9TBs)
+ * NASA GDDP-NEX-CMIP CMIP6 script added (~37TBs)
+ * Alberta Governments CMIP6 script added (~0.1TBs)
+
[v0.4.1] - September 21st, 2023
# Fixed
* minor bug fixes
diff --git a/DATASETS b/DATASETS
new file mode 100644
index 0000000..f0522a2
--- /dev/null
+++ b/DATASETS
@@ -0,0 +1,80 @@
+|--------------------------------|-----------|---------------------------|
+| DATASET NAME | keyword | DOI |
+|--------------------------------|-----------|---------------------------|
+|1. NCAR-GWF WRF CONUS I | conus_i | 10.1007/s00382-016-3327-9 |
+|2. NCAR-GWF WRF CONUS II | conus_ii | 10.5065/49SN-8E08 |
+|3. ECMWF ERA5 | era5 | 10.24381/cds.adbb2d47 |
+|4. ECCC RDRSv2.1 | rdrs | 10.5194/hess-25-4917-2021 |
+|5. CCRN CanRCM4-WFDEI-GEM-CaPA | canrcm4_g | 10.5194/essd-12-629-2020 |
+|6. WFDEI-GEM-CaPA | wfdei_g | 10.20383/101.0111 |
+|7. ORNL Daymet | daymet | 10.3334/ORNLDAAC/2129 |
+|8. Alberta Government | ab-gov | 10.5194/hess-23-5151-201 |
+| 8.1. BCC-CSM2-MR | | ditto |
+| 8.2. CNRM-CM6-1 | | ditto |
+| 8.3. EC-Earth3-Veg | | ditto |
+| 8.4. GFDL-CM4 | | ditto |
+| 8.5. GFDL-ESM4 | | ditto |
+| 8.6. IPSL-CM6A-LR | | ditto |
+| 8.7. MRI-ESM2-0 | | ditto |
+| 8.8. Hybrid-observation | | ditto |
+|9. Ouranos ESPO-G6-R2 |espo-r6-r2 |10.1038/s41597-023-02855-z |
+| 9.1. AS-RCEC | | ditto |
+| 9.2. BCC | | ditto |
+| 9.3. CAS | | ditto |
+| 9.4. CCCma | | ditto |
+| 9.5. CMCC | | ditto |
+| 9.6. CNRM-CERFACS | | ditto |
+| 9.7. CSIRO | | ditto |
+| 9.8. CSIRO-ARCCSS | | ditto |
+| 9.9. DKRZ | | ditto |
+| 9.10. EC-Earth-Con | | ditto |
+| 9.11. INM | | ditto |
+| 9.12. IPS | | ditto |
+| 9.13. MIROC | | ditto |
+| 9.14. MOHC | | ditto |
+| 9.15. MPI-M | | ditto |
+| 9.16. MRI | | ditto |
+| 9.17. NCC | | ditto |
+| 9.18. NIMS-KMA | | ditto |
+| 9.19. NOAA-GFDL | | ditto |
+| 9.20. NUIST | | ditto |
+|10. Ouranos MRCC5-CMIP6 |crcm5-cmip6| TBD |
+| 10.1. CanESM5 | | TBD |
+| 10.2. MPI-ESM1-2-LR | | TBD |
+|11. NASA GDDP-NEX-CMIP6 | gddp-nex |10.1038/s41597-022-01393-4 |
+| 11.0. ACCESS-CM2 | | ditto |
+| 11.1. ACCESS-ESM1-5 | | ditto |
+| 11.2. BCC-CSM2-MR | | ditto |
+| 11.3. CanESM5 | | ditto |
+| 11.4. CESM2 | | ditto |
+| 11.5. CESM2-WACCM | | ditto |
+| 11.6. CMCC-CM2-SR5 | | ditto |
+| 11.7. CMCC-ESM2 | | ditto |
+| 11.8. CNRM-CM6-1 | | ditto |
+| 11.9. CNRM-ESM2-1 | | ditto |
+| 11.10. EC-Earth3 | | ditto |
+| 11.11. EC-Earth3-Veg-LR | | ditto |
+| 11.12. FGOALS-g3 | | ditto |
+| 11.13. GFDL-CM4 | | ditto |
+| 11.14. GFDL-CM4_gr2 | | ditto |
+| 11.15. GFDL-ESM4 | | ditto |
+| 11.16. GISS-E2-1-G | | ditto |
+| 11.17. HadGEM3-GC31-LL | | ditto |
+| 11.18. HadGEM3-GC31-MM | | ditto |
+| 11.19. IITM-ESM | | ditto |
+| 11.20. INM-CM4-8 | | ditto |
+| 11.21. INM-CM5-0 | | ditto |
+| 11.22. IPSL-CM6A-LR | | ditto |
+| 11.23. KACE-1-0-G | | ditto |
+| 11.24. KIOST-ESM | | ditto |
+| 11.25. MIROC6 | | ditto |
+| 11.26. MIROC-ES2L | | ditto |
+| 11.27. MPI-ESM1-2-HR | | ditto |
+| 11.28. MPI-ESM1-2-LR | | ditto |
+| 11.29. MRI-ESM2-0 | | ditto |
+| 11.30. NESM3 | | ditto |
+| 11.31. NorESM2-LM | | ditto |
+| 11.32. NorESM2-MM | | ditto |
+| 11.33. TaiESM1 | | ditto |
+| 11.34. UKESM1-0-LL | | ditto |
+|--------------------------------|-----------|---------------------------|
diff --git a/README.md b/README.md
index 8b9ad3c..55c148c 100644
--- a/README.md
+++ b/README.md
@@ -4,51 +4,56 @@ This repository contains scripts to process meteorological datasets in NetCDF fi
```console
Usage:
extract-dataset [options...]
+
Script options:
- -d, --dataset Meteorological forcing dataset of interest
- -i, --dataset-dir=DIR The source path of the dataset file(s)
- -v, --variable=var1[,var2[...]] Variables to process
- -o, --output-dir=DIR Writes processed files to DIR
- -s, --start-date=DATE The start date of the data
- -e, --end-date=DATE The end date of the data
- -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds
- -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds
- -a, --shape-file=PATH Path to the ESRI shapefile; optional
- -m, --ensemble=ens1,[ens2[...]] Ensemble members to process; optional
- Leave empty to extract all ensemble members
- -j, --submit-job Submit the data extraction process as a job
- on the SLURM system; optional
- -k, --no-chunk No parallelization, recommended for small domains
- -p, --prefix=STR Prefix prepended to the output files
- -b, --parsable Parsable SLURM message mainly used
- for chained job submissions
- -c, --cache=DIR Path of the cache directory; optional
- -E, --email=user@example.com E-mail user when job starts, ends, or
- fails; optional
- -u, --account Digital Research Alliance of Canada's sponsor's
- account name; optional, defaults to 'rpp-kshook'
- -V, --version Show version
- -h, --help Show this screen and exit
+ -d, --dataset Meteorological forcing dataset of interest
+ -i, --dataset-dir=DIR The source path of the dataset file(s)
+ -v, --variable=var1[,var2[...]] Variables to process
+ -o, --output-dir=DIR Writes processed files to DIR
+ -s, --start-date=DATE The start date of the data
+ -e, --end-date=DATE The end date of the data
+ -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds;
+ optional; within the [-90, +90] limits
+ -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds;
+ optional; within the [-180, +180] limits
+ -a, --shape-file=PATH Path to the ESRI shapefile; optional
+ -m, --ensemble=ens1,[ens2,[...]] Ensemble members to process; optional
+ Leave empty to extract all ensemble members
+ -M, --model=model1,[model2,[...]] Models that are part of a dataset,
+ only applicable to climate datasets, optional
+ -S, --scenario=scn1,[scn2,[...]] Climate scenarios to process, only applicable
+ to climate datasets, optional
+ -j, --submit-job Submit the data extraction process as a job
+ on the SLURM system; optional
+ -k, --no-chunk No parallelization, recommended for small domains
+ -p, --prefix=STR Prefix prepended to the output files
+ -b, --parsable Parsable SLURM message mainly used
+ for chained job submissions
+ -c, --cache=DIR Path of the cache directory; optional
+ -E, --email=user@example.com E-mail user when job starts, ends, or
+ fails; optional
+ -u, --account Digital Research Alliance of Canada's sponsor's
+ account name; optional, defaults to 'rpp-kshook'
+ -L, --list-datasets List all the available datasets and the
+ corresponding keywords for '--dataset' option
+ -V, --version Show version
+ -h, --help Show this screen and exit
```
# Available Datasets
-|# |Dataset |Time Scale |DOI |Description |
-|--|--------------------------|--------------------------------|-------------------------|--------------------------------------|
-|1 |WRF-CONUS I (control) |Hourly (Oct 2000 - Dec 2013) |10.1007/s00382-016-3327-9|[link](./scripts/conus_i) |
-|2 |WRF-CONUS II (control)[^1]|Hourly (Jan 1995 - Dec 2015) |10.5065/49SN-8E08 |[link](./scripts/conus_ii) |
-|3 |ERA5[^2] |Hourly (Jan 1950 - Dec 2020) |10.24381/cds.adbb2d47 and [link](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-preliminary-back-extension?tab=overview)|[link](./scripts/era5)|
-|4 |RDRS v2.1 |Hourly (Jan 1980 - Dec 2018) |10.5194/hess-25-4917-2021|[link](./scripts/rdrs) |
-|5 |CanRCM4-WFDEI-GEM-CaPA |3-Hourly (Jan 1951 - Dec 2100) |10.5194/essd-12-629-2020 |[link](./scripts/canrcm4_wfdei_gem_capa)|
-|6 |WFDEI-GEM-CaPA |3-Hoursly (Jan 1979 - Dec 2016) |10.20383/101.0111 |[link](./scripts/wfdei_gem_capa) |
-|7 |Daymet |Daily (Jan 1980 - Dec 2022)[^3] |10.3334/ORNLDAAC/2129 |[link](./scripts/daymet) |
-|8 |BCC-CSM2-MR |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/bcc-csm2-mr) |
-|9 |CNRM-CM6-1 |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/cnrm-cm6-1) |
-|10|EC-Earth3-Veg |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/ec-earth3-veg) |
-|11|GDFL-CM4 |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/gdfl-cm4) |
-|12|GDFL-ESM4 |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/gdfl-esm4) |
-|13|IPSL-CM6A-LR |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/ipsl-cm6a-lr) |
-|14|MRI-ESM2-0 |Daily (Jan 1950 - Dec 2100)[^4] |*TBD* |[link](./scripts/mri-esm2-0) |
-|15|Hybrid Observation(AB Gov)|Daily (Jan 1950 - Dec 2019)[^4] |10.5194/hess-23-5151-2019|[link](./scripts/hybrid_obs) |
+|# |Dataset |Time Period |DOI |Description |
+|--|---------------------------|--------------------------------|--------------------------|-------------------------------------|
+|1 |GWF-NCAR WRF-CONUS I |Hourly (Oct 2000 - Dec 2013) |10.1007/s00382-016-3327-9 |[link](./scripts/gwf-ncar-conus_i) |
+|2 |GWF-NCAR WRF-CONUS II[^1] |Hourly (Jan 1995 - Dec 2015) |10.5065/49SN-8E08 |[link](./scripts/gwf-ncar-conus_ii) |
+|3 |ECMWF ERA5[^2] |Hourly (Jan 1950 - Dec 2020) |10.24381/cds.adbb2d47 and [link](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-preliminary-back-extension?tab=overview)|[link](./scripts/ecmwf-era5)|
+|4 |ECCC RDRSv2.1 |Hourly (Jan 1980 - Dec 2018) |10.5194/hess-25-4917-2021 |[link](./scripts/eccc-rdrs) |
+|5 |CCRN CanRCM4-WFDEI-GEM-CaPA|3-Hourly (Jan 1951 - Dec 2100) |10.5194/essd-12-629-2020 |[link](./scripts/ccrn-canrcm4_wfdei_gem_capa)|
+|6 |CCRN WFDEI-GEM-CaPA |3-Hourly (Jan 1979 - Dec 2016) |10.20383/101.0111 |[link](./scripts/ccrn-wfdei_gem_capa)|
+|7 |ORNL Daymet |Daily (Jan 1980 - Dec 2022)[^3] |10.3334/ORNLDAAC/2129 |[link](./scripts/ornl-daymet) |
+|8 |Alberta Gov Climate Dataset|Daily (Jan 1950 - Dec 2100) |10.5194/hess-23-5151-201 |[link](./scripts/ab-gov) |
+|9 |Ouranos ESPO-G6-R2 |Daily (Jan 1950 - Dec 2100) |10.1038/s41597-023-02855-z|[link](./scripts/ouranos-espo-g6-r2) |
+|10|Ouranos MRCC5-CMIP6 |hourly (Jan 1950 - Dec 2100) |TBD |link |
+|11|NASA NEX-GDDP-CMIP6 |Daily (Jan 1950 - Dec 2100) |10.1038/s41597-022-01393-4|[link](./scripts/nasa-nex-gddp-cmip6)|
[^1]: For access to the files on Graham cluster, please contact [Stephen O'Hearn](mailto:sdo124@mail.usask.ca).
[^2]: ERA5 data from 1950-1979 are based on [ERA5 preliminary extenion](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-preliminary-back-extension?tab=overview) and 1979 onwards are based on [ERA5 1979-present](https://doi.org/10.24381/cds.adbb2d47).
@@ -56,21 +61,22 @@ Script options:
[^4]: Data is not publicly available yet. DOI is to be determined once the relevant paper is published.
# General Example
-As an example, follow the code block below. Please remember that you MUST have access to Graham cluster with Digital Research Alliance of Canada (DRA) and have access to `CONUS I` model outputs. Also, remember to generate a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) with GitHub in advance. Enter the following codes in your Graham shell as a test case:
+As an example, follow the code block below. Please remember that you MUST have access to Digital Research Alliance of Canada (DRA) clusters (specifically `Graham`) and have access to `RDRSv2.1` model outputs. Also, remember to generate a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) with GitHub in advance. Enter the following codes in your Graham shell as a test case:
```console
foo@bar:~$ git clone https://github.com/kasra-keshavarz/datatool # clone the repository
foo@bar:~$ cd ./datatool/ # move to the repository's directory
foo@bar:~$ ./extract-dataset.sh -h # view the usage message
-foo@bar:~$ ./extract-dataset.sh --dataset=CONUS1 \
- --dataset-dir="/project/rpp-kshook/Model_Output/WRF/CONUS/CTRL" \
- --output-dir="$HOME/scratch/conus_i_output/" \
- --start-date="2001-01-01 00:00:00" \
- --end-date="2001-12-31 23:00:00" \
- --lat-lims=49,51 \
- --lon-lims=-117,-115 \
- --variable=T2,PREC_ACC_NC,Q2,ACSWDNB,ACLWDNB,U10,V10,PSFC \
- --prefix="conus_i";
+foo@bar:~$ ./extract-dataset.sh \
+ --dataset="rdrs" \
+ --dataset-dir="/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/rdrsv2.1" \
+ --output-dir="$HOME/scratch/rdrs_outputs/" \
+ --start-date="2001-01-01 00:00:00" \
+ --end-date="2001-12-31 23:00:00" \
+ --lat-lims=49,51 \
+ --lon-lims=-117,-115 \
+ --variable="RDRS_v2.1_A_PR0_SFC,RDRS_v2.1_P_HU_09944" \
+ --prefix="testing_";
```
See the [examples](./examples) directory for real-world scripts for each meteorological dataset included in this repository.
@@ -80,10 +86,16 @@ only in cases where jobs are submitted to clusters' schedulers. If
processing is not submitted as a job, then the logs are printed on screen.
# New Datasets
-If you are considering any new dataset to be added to the data repository, and subsequently the associated scripts added here, you can open a new ticket on the **Issues** tab of the current repository. Or, you can make a [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) on this repository with your own script.
+If you are considering any new dataset to be added to the data
+repository, and subsequently the associated scripts added here,
+you can open a new ticket on the **Issues** tab of the current
+repository. Or, you can make a
+[Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)
+on this repository with your own script.
# Support
-Please open a new ticket on the **Issues** tab of the current repository in case of any issues.
+Please open a new ticket on the **Issues** tab of this repository for
+support.
# License
Meteorological Data Processing Workflow - datatool
diff --git a/VERSION b/VERSION
index 7532512..8f0916f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.4.2-dev
+0.5.0
diff --git a/assets/bash_scripts/extract_subdir_level.sh b/assets/bash_scripts/extract_subdir_level.sh
new file mode 100755
index 0000000..83a4cc9
--- /dev/null
+++ b/assets/bash_scripts/extract_subdir_level.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Input comma-separated string
+root_path="$1"
+input_string="$2"
+
+# Split the input string by comma
+IFS=',' read -ra directories <<< "$input_string"
+
+# Initialize an empty string to store results
+result_string=""
+
+# Iterate over each directory
+for dir in "${directories[@]}"; do
+ # Find subdirectories
+ IFS=' ' read -ra subdirs <<< $(find "$root_path/$dir" -mindepth 1 -maxdepth 1 -type d -printf "%f ")
+
+ # Prepend each subdirectory with its original value from input_string
+ for subdir in ${subdirs[@]}; do
+ result_string+="$dir/${subdir##*/},"
+ done
+done
+
+# Remove the trailing comma, if any
+result_string=${result_string%,}
+
+echo "$result_string"
+
diff --git a/assets/coord_XLAT_XLONG_conus_i.nc b/assets/nc_coords/coord_XLAT_XLONG_conus_i.nc
similarity index 100%
rename from assets/coord_XLAT_XLONG_conus_i.nc
rename to assets/nc_coords/coord_XLAT_XLONG_conus_i.nc
diff --git a/assets/coord_XLAT_XLONG_conus_ii.nc b/assets/nc_coords/coord_XLAT_XLONG_conus_ii.nc
similarity index 100%
rename from assets/coord_XLAT_XLONG_conus_ii.nc
rename to assets/nc_coords/coord_XLAT_XLONG_conus_ii.nc
diff --git a/assets/coord_closest_daymet_idx.ncl b/assets/ncl_scripts/coord_closest_daymet_idx.ncl
similarity index 100%
rename from assets/coord_closest_daymet_idx.ncl
rename to assets/ncl_scripts/coord_closest_daymet_idx.ncl
diff --git a/assets/coord_daymet_idx.ncl b/assets/ncl_scripts/coord_daymet_idx.ncl
similarity index 100%
rename from assets/coord_daymet_idx.ncl
rename to assets/ncl_scripts/coord_daymet_idx.ncl
diff --git a/assets/coord_wrf_idx.ncl b/assets/ncl_scripts/coord_wrf_idx.ncl
similarity index 100%
rename from assets/coord_wrf_idx.ncl
rename to assets/ncl_scripts/coord_wrf_idx.ncl
diff --git a/examples/espo_example_ssrb_1950_2100.sh b/examples/espo_example_ssrb_1950_2100.sh
new file mode 100755
index 0000000..d05e070
--- /dev/null
+++ b/examples/espo_example_ssrb_1950_2100.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Meteorological Data Processing Workflow
+# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
+#
+# This file is part of Meteorological Data Processing Workflow
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# This is a simple example to extract Daymet data for the
+# South Saskatchewan River Basin (SSRB) approximate extents
+# from Jan 1950 to Dec 2100.
+
+# As is mentioned on the main webpage of the repository, it is
+# recommended to submit annual jobs for this dataset.
+
+# Always call the script in the root directory of the repository
+cd ..
+echo "The current directory is: $(pwd)"
+
+./extract-dataset.sh \
+ --dataset="espo-g6-r2" \
+ --dataset-dir="/project/rrg-mclark/data/meteorological-data/espo-g6-r2/ESPO-G6-R2v1.0.0"
+ --variable="pr,tasmax,tasmin" \
+ --output-dir="/project/rrg-mclark/AON/share/ESPO-G6-R2-SMM" \
+ --start-date="1950-01-01" \
+ --end-date="2100-12-31" \
+ --model="AS-RCEC,BCC,CAS,CCCma,CMCC,CNRM-CERFACS,CSIRO,CSIRO-ARCCSS,DKRZ,EC-Earth-Consortium,INM,IPSL,MIROC,MOHC,MPI-M,MRI,NCC,NIMS-KMA,NOAA-GFDL,NUIST"
+ --lat-lims=49,54 \
+ --lon-lims=-120,-98 \
+ --ensemble="r1i1p1f1,r1i1p1f2" \
+ --scenario="ssp245,ssp370,ssp585" \
+ --prefix="SRB_" \
+ --cache='$SLURM_TMPDIR' \
+ --email="example@company.ca" \
+ --submit-job;
+
diff --git a/extract-dataset.sh b/extract-dataset.sh
index ce2540a..b3b39e7 100755
--- a/extract-dataset.sh
+++ b/extract-dataset.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
@@ -35,74 +35,78 @@
# ==============
# Help functions
# ==============
-short_usage () {
- echo "usage: $(basename $0) [-jh] [-i DIR] [-d DATASET] [-co DIR] [-se DATE] [-ln REAL,REAL] [-p STR]" >&1;
+function short_usage () {
+ echo "usage: $(basename $0) [-jh] [-i DIR] [-d DATASET] [-co DIR] [-se DATE] [-ln REAL,REAL] [-p STR]
+
+Try \`$(basename $0) --help\` for more options." >&1;
}
-version () {
+function version () {
echo "$(basename $0): version $(cat $(dirname $0)/VERSION)";
exit 0;
}
-
-usage () {
+function usage () {
echo "Meteorological Data Processing Script - version $(cat $(dirname $0)/VERSION)
Usage:
$(basename $0) [options...]
Script options:
- -d, --dataset Meteorological forcing dataset of interest
- -i, --dataset-dir=DIR The source path of the dataset file(s)
- -v, --variable=var1[,var2[...]] Variables to process
- -o, --output-dir=DIR Writes processed files to DIR
- -s, --start-date=DATE The start date of the data
- -e, --end-date=DATE The end date of the data
- -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds
- -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds
- -a, --shape-file=PATH Path to the ESRI shapefile; optional
- -m, --ensemble=ens1,[ens2[...]] Ensemble members to process; optional
- Leave empty to extract all ensemble members
- -j, --submit-job Submit the data extraction process as a job
- on the SLURM system; optional
- -k, --no-chunk No parallelization, recommended for small domains
- -p, --prefix=STR Prefix prepended to the output files
- -b, --parsable Parsable SLURM message mainly used
- for chained job submissions
- -c, --cache=DIR Path of the cache directory; optional
- -E, --email=user@example.com E-mail user when job starts, ends, or
- fails; optional
- -u, --account Digital Research Alliance of Canada's sponsor's
- account name; optional, defaults to 'rpp-kshook'
- -V, --version Show version
- -h, --help Show this screen and exit
+ -d, --dataset Meteorological forcing dataset of interest
+ -i, --dataset-dir=DIR The source path of the dataset file(s)
+ -v, --variable=var1[,var2[...]] Variables to process
+ -o, --output-dir=DIR Writes processed files to DIR
+ -s, --start-date=DATE The start date of the data
+ -e, --end-date=DATE The end date of the data
+ -l, --lat-lims=REAL,REAL Latitude's upper and lower bounds
+ optional; within the [-90, +90] limits
+ -n, --lon-lims=REAL,REAL Longitude's upper and lower bounds
+ optional; within the [-180, +180] limits
+ -a, --shape-file=PATH Path to the ESRI shapefile; optional
+ -m, --ensemble=ens1,[ens2,[...]] Ensemble members to process; optional
+ Leave empty to extract all ensemble members
+ -M, --model=model1,[model2,[...]] Models that are part of a dataset,
+ only applicable to climate datasets, optional
+ -S, --scenario=scn1,[scn2,[...]] Climate scenarios to process, only applicable
+ to climate datasets, optional
+ -j, --submit-job Submit the data extraction process as a job
+ on the SLURM system; optional
+ -k, --no-chunk No parallelization, recommended for small domains
+ -p, --prefix=STR Prefix prepended to the output files
+ -b, --parsable Parsable SLURM message mainly used
+ for chained job submissions
+ -c, --cache=DIR Path of the cache directory; optional
+ -E, --email=user@example.com E-mail user when job starts, ends, or
+ fails; optional
+ -u, --account=ACCOUNT Digital Research Alliance of Canada's sponsor's
+ account name; optional, defaults to 'rpp-kshook'
+ -L, --list-datasets List all the available datasets and the
+ corresponding keywords for '--dataset' option
+ -V, --version Show version
+ -h, --help Show this screen and exit
+For bug reports, questions, discussions open an issue
+at https://github.com/kasra-keshavarz/datatool/issues" >&1;
-Currently, the following meteorological datasets are
-available for processing:
+ exit 0;
+}
- 1. NCAR-GWF WRF CONUS I (DOI: 10.1007/s00382-016-3327-9)
- 2. NCAR-GWF WRF CONUS II (DOI: 10.5065/49SN-8E08)
- 3. ECMWF ERA5 (DOI: 10.24381/cds.adbb2d47)
- 4. ECCC RDRSv2.1 (DOI: 10.5194/hess-25-4917-2021)
- 5. CCRN CanRCM4-WFDEI-GEM-CaPA (DOI: 10.5194/essd-12-629-2020)
- 6. WFDEI-GEM-CaPA (DOI: 10.20383/101.0111)
- 7. ORNL Daymet (DOI: 10.3334/ORNLDAAC/2129)
- 8. BCC-CSM2-MR (DOI: TBD)
- 9. CNRM-CM6-1 (DOI: TBD)
- 10. EC-Earth3-Veg (DOI: TBD)
- 11. GFDL-CM4 (DOI: TBD)
- 12. GFDL-ESM4 (DOI: TBD)
- 13. IPSL-CM6A-LR (DOI: TBD)
- 14. MRI-ESM2-0 (DOI: TBD)
- 15. Hybrid-observation (DOI: 10.5194/hess-23-5151-2019)
+function list_datasets () {
+echo "Meteorological Data Processing Script - version $(cat $(dirname $0)/VERSION)
-For bug reports, questions, discussions open an issue
-at https://github.com/kasra-keshavarz/datatool/issues" >&1;
+Currently, the following meteorological datasets are
+available for processing:
+$(cat $(dirname $0)/DATASETS | sed 's/^\(.*\)$/\o033[34m\1\o033[0m/')" >&1;
exit 0;
}
+# useful log date format function
+logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
+
+# useful maximum function
+max () { printf "%s\n" "${@:2}" | sort "$1" | tail -n1; }
# =====================
# Necessary Assumptions
@@ -113,12 +117,17 @@ alias date='TZ=UTC date'
# expand aliases for the one stated above
shopt -s expand_aliases
+# necessary local paths for the program
+scriptPath="$(dirname $0)/scripts" # scripts' path
+datatoolPath="$(dirname $0)" # datatool's path
+extract_submodel="${datatoolPath}/assets/bash_scripts/extract_subdir_level.sh" # script path
+
# =======================
# Parsing input arguments
# =======================
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o jhVbE:d:i:v:o:s:e:t:l:n:p:c:m:ka:u: --long submit-job,help,version,parsable,email:,dataset:,dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,no-chunk,shape-file:,account: -- "$@")
+parsedArguments=$(getopt -a -n extract-dataset -o jhVbLE:d:i:v:o:s:e:t:l:n:p:c:m:M:S:ka:u: --long submit-job,help,version,parsable,list-datasets,email:,dataset:,dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,model:,scenario:,no-chunk,shape-file:,account: -- "$@")
validArguments=$?
# check if there is no valid options
if [ "$validArguments" != "0" ]; then
@@ -138,9 +147,10 @@ while :
do
case "$1" in
-h | --help) usage ; shift ;; # optional
- -V | --version) version ; shift ;; # optional
+ -V | --version) version ; shift ;; # optional
+ -L | --list-datasets) list_datasets ; shift ;; # optional
-j | --submit-job) jobSubmission=true ; shift ;; # optional
- -E | --email) email="$2" ; shift 2 ;; # optional
+ -E | --email) email="$2" ; shift 2 ;; # optional
-i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
-d | --dataset) dataset="$2" ; shift 2 ;; # required
-v | --variable) variables="$2" ; shift 2 ;; # required
@@ -151,10 +161,12 @@ do
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # optional
+ -M | --model) model="$2" ; shift 2 ;; # optional
+ -S | --scenario) scenario="$2" ; shift 2 ;; # optional
-k | --no-chunk) parallel=false ; shift ;; # optional
- -p | --prefix) prefixStr="$2" ; shift 2 ;; # required
+ -p | --prefix) prefixStr="$2" ; shift 2 ;; # required
-b | --parsable) parsable=true ; shift ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # optional
-u | --account) account="$2" ; shift 2 ;; # optional
-a | --shape-file) shapefile="$2" ; shift 2 ;; # optional
@@ -205,7 +217,7 @@ else
fi
# if account is not provided, use `rpp-kshook` as default
-if [[ -z $account ]]; then
+if [[ -z $account ]] && [[ $jobSubmission == "true" ]]; then
account="rpp-kshook"
if [[ -z $parsable ]]; then
echo "$(basename $0): WARNING! --account not provided, using \`rpp-kshook\` by default."
@@ -215,18 +227,23 @@ fi
# if shapefile is provided extract the extents from it
if [[ -n $shapefile ]]; then
# load GDAL module
- module -q load gdal;
+ module -q load gcc/9.3.0 gdal/3.4.3;
# extract the shapefile extent
IFS=' ' read -ra shapefileExtents <<< "$(ogrinfo -so -al "$shapefile" | sed 's/[),(]//g' | grep Extent)"
# transform the extents in case they are not in EPSG:4326
- IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" # source Proj4 value
- # transform limits and assing to variables
+ IFS=':' read -ra sourceProj4 <<< "$(gdalsrsinfo $shapefile | grep -e "PROJ.4")" >&2
+ # Assuming EPSG:4326 if no definition of the CRS is provided
+ if [[ ${#sourceProj4[@]} -eq 0 ]]; then
+ echo "$(basename $0): WARNING! Assuming EPSG:4326 for --shape-file as none provided"
+ sourceProj4=('PROJ4.J' '+proj=longlat +datum=WGS84 +no_defs')
+ fi
+ # transform limits and assign to variables
IFS=' ' read -ra leftBottomLims <<< $(echo "${shapefileExtents[@]:1:2}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy)
IFS=' ' read -ra rightTopLims <<< $(echo "${shapefileExtents[@]:4:5}" | gdaltransform -s_srs "${sourceProj4[1]}" -t_srs EPSG:4326 -output_xy)
# define $latLims and $lonLims from $shapefileExtents
lonLims="${leftBottomLims[0]},${rightTopLims[0]}"
latLims="${leftBottomLims[1]},${rightTopLims[1]}"
- module -q unload gdal;
+ module -q unload gdal/3.4.3;
fi
# check mandatory arguments whether provided
@@ -268,7 +285,7 @@ dateFormat="%Y-%m-%d %H:%M:%S"
# Globals:
# startDate: start date of the
# subsetting process
-# parallel: true by default, false if
+# parallel: true by default, false if
# --no-chunk is activated
# startDateArr: array of chunked
# start dates
@@ -279,7 +296,6 @@ dateFormat="%Y-%m-%d %H:%M:%S"
# endDate: end date of the process
# dateFormat: default date format
# for manipulations
-#
#
# Arguments:
# 1: -> tStep: string of time-step
@@ -290,7 +306,7 @@ dateFormat="%Y-%m-%d %H:%M:%S"
# will be filled for each chunk of
# date for further processing
#######################################
-chunk_dates () {
+function chunk_dates () {
# local variables
local toDate="$startDate"
local tStep="$1"
@@ -325,65 +341,36 @@ chunk_dates () {
fi
}
-#######################################
-# Chunking ensemble members in array
-# elements
-#
-# Arguments:
-# 1: -> esnemble: comma-separated
-# values of ensemble members
-#
-# Outputs:
-# Global ensembleArr array containing
-# individual members names or an
-# empty array if '--ensemble'
-# argument was not applicable
-#######################################
-chunk_ensemble () {
- # local variables
- local value="$1"
-
- # make global 'ensembleArr' array
- IFS=',' read -ra ensembleArr <<< "$(echo "$value")"
-
- # check to see if the '--ensemble'
- # argument was applicable
- if [[ "${#ensembleArr[@]}" -gt 0 ]]; then
- :
- else
- # make an empty array for datasets that
- # do not have any ensemble members
- ensembleArr=("")
- fi
-}
-
# ======================
# Necessary preparations
# ======================
# put necessary arguments in an array - just for legibility
declare -A funcArgs=([jobSubmission]="$jobSubmission" \
- [datasetDir]="$datasetDir" \
- [variables]="$variables" \
- [outputDir]="$outputDir" \
- [timeScale]="$timeScale" \
- [startDate]="$startDate" \
- [endDate]="$endDate" \
- [latLims]="$latLims" \
- [lonLims]="$lonLims" \
- [prefixStr]="$prefixStr" \
- [cache]="$cache" \
- [ensemble]="$ensemble" \
- );
+ [datasetDir]="$datasetDir" \
+ [variables]="$variables" \
+ [outputDir]="$outputDir" \
+ [timeScale]="$timeScale" \
+ [startDate]="$startDate" \
+ [endDate]="$endDate" \
+ [latLims]="$latLims" \
+ [lonLims]="$lonLims" \
+ [prefixStr]="$prefixStr" \
+ [cache]="$cache" \
+ [ensemble]="$ensemble" \
+ [model]="$model" \
+ [scenario]="$scenario"
+ );
# ========================
# Data processing function
# ========================
-call_processing_func () {
+function call_processing_func () {
# input arguments as local variables
- scriptFile="$1" # script local path
+ local scriptFile="$1" # script local path
local chunkTStep="$2" # chunking time-frame periods
+ local submodelFlag="$3" # flag for submodels' existence
# local variables
local scriptName=$(basename $scriptFile | cut -d '.' -f 1) # script/dataset name
@@ -393,6 +380,12 @@ call_processing_func () {
# make the $logDir if haven't been created yet
mkdir -p $logDir
+ # if dataset contains sub-models, extract them
+ if [[ $submodelFlag == 1 ]]; then
+ model=$($extract_submodel "$datasetDir" "$model")
+ funcArgs[model]=$model
+ fi
+
# typical script to run for all sub-modules
local script=$(cat <<- EOF
bash ${scriptFile} \
@@ -406,23 +399,52 @@ call_processing_func () {
--lon-lims="${funcArgs[lonLims]}" \
--prefix="${funcArgs[prefixStr]}" \
--cache="${funcArgs[cache]}" \
- --ensemble="${funcArgs[ensemble]}"
+ --ensemble="${funcArgs[ensemble]}" \
+ --scenario="${funcArgs[scenario]}" \
+ --model="${funcArgs[model]}"
EOF
)
# evaluate the script file using the arguments provided
if [[ "${funcArgs[jobSubmission]}" == true ]]; then
- # chunk time-frame and ensembles
+ # ==========================================
+ # Chunk time-frame and other relevant arrays
+ # ==========================================
+ # chunk dates
chunk_dates "$chunkTStep"
- chunk_ensemble "$ensemble" # 'ensemble' is a global variable
-
- # length of total number of tasks and indices
- taskLen=$(( ${#startDateArr[@]} * ${#ensembleArr[@]} ))
- jobArrLen=$(( $taskLen - 1 ))
- # parallel run
+ # chunking ensemble members
+ IFS=',' read -ra ensembleArr <<< $ensemble
+ # chunking models
+ IFS=',' read -ra modelArr <<< $model
+ # chunking scenarios
+ IFS=',' read -ra scenarioArr <<< $scenario
+
+ # ===========================
+ # Building job array iterator
+ # ===========================
+ let "ensembleLen = $(max -g ${#ensembleArr[@]} 1)"
+ let "modelLen = $(max -g ${#modelArr[@]} 1)"
+ let "scenarioLen = $(max -g ${#scenarioArr[@]} 1)"
+ let "dateLen = $(max -g ${#startDateArr[@]} 1)"
+
+ let "dateIter = $ensembleLen * $modelLen * $scenarioLen"
+ let "ensembleIter = $modelLen * $scenarioLen"
+ let "modelIter = $scenarioLen"
+
+ # ==============================
+ # Length of processing job array
+ # ==============================
+
+ # length of total number of tasks and indices
+ let "taskLen = $dateLen * $ensembleLen * $modelLen * $scenarioLen"
+ let "jobArrLen = $taskLen - 1"
+
+ # ============
+ # Parallel run
+ # ============
# FIXME: This needs to be moved into a template scheduler
- # document
+ # document, and various schedulers need to be supported
sbatch <<- EOF
#!/bin/bash
#SBATCH --array=0-$jobArrLen
@@ -441,22 +463,35 @@ call_processing_func () {
$(declare -p startDateArr)
$(declare -p endDateArr)
$(declare -p ensembleArr)
+ $(declare -p modelArr)
+ $(declare -p scenarioArr)
- idxDate="\$(( \${SLURM_ARRAY_TASK_ID} % \${#startDateArr[@]} ))"
- idxMember="\$(( \${SLURM_ARRAY_TASK_ID} / \${#startDateArr[@]} ))"
+ idxDate="\$(( (\${SLURM_ARRAY_TASK_ID} / ${dateIter}) % ${dateLen} ))"
+ idxMember="\$(( (\${SLURM_ARRAY_TASK_ID} / ${ensembleIter}) % ${ensembleLen} ))"
+ idxModel="\$(( (\${SLURM_ARRAY_TASK_ID} / ${modelIter}) % ${modelLen} ))"
+ idxScenario="\$(( \${SLURM_ARRAY_TASK_ID} % ${scenarioLen} ))"
- tBegin="\${startDateArr[\${idxDate}]}"
- tEnd="\${endDateArr[\${idxDate}]}"
- member="\${ensembleArr[\${idxMember}]}"
+ tBegin="\${startDateArr[\$idxDate]}"
+ tEnd="\${endDateArr[\$idxDate]}"
+ memberChosen="\${ensembleArr[\$idxMember]}"
+ modelChosen="\${modelArr[\$idxModel]}"
+ scenarioChosen="\${scenarioArr[\$idxScenario]}"
- echo "${scriptName}.sh: #\${SLURM_ARRAY_TASK_ID} chunk submitted."
- echo "${scriptName}.sh: Chunk start date is \$tBegin"
- echo "${scriptName}.sh: Chunk end date is \$tEnd"
- if [[ -n \${member} ]]; then
- echo "${scriptName}.sh: Ensemble member is \$member"
+ echo "$(logDate)$(basename $0): Calling ${scriptName}.sh..."
+ echo "$(logDate)$(basename $0): #\${SLURM_ARRAY_TASK_ID} chunk submitted."
+ echo "$(logDate)$(basename $0): Chunk start date is \$tBegin"
+ echo "$(logDate)$(basename $0): Chunk end date is \$tEnd"
+ if [[ -n \${modelChosen} ]]; then
+ echo "$(logDate)$(basename $0): Model is \${modelChosen}"
+ fi
+ if [[ -n \${scenarioChosen} ]]; then
+ echo "$(logDate)$(basename $0): Scenario is \${scenarioChosen}"
+ fi
+ if [[ -n \${memberChosen} ]]; then
+ echo "$(logDate)$(basename $0): Ensemble member is \${memberChosen}"
fi
- srun ${script} --start-date="\$tBegin" --end-date="\$tEnd" --cache="${cache}/cache-\${SLURM_ARRAY_JOB_ID}-\${SLURM_ARRAY_TASK_ID}" --ensemble="\${member}"
+ srun ${script} --start-date="\$tBegin" --end-date="\$tEnd" --cache="${cache}/cache-\${SLURM_ARRAY_JOB_ID}-\${SLURM_ARRAY_TASK_ID}" --ensemble="\${memberChosen}" --model="\${modelChosen}" --scenario="\${scenarioChosen}"
EOF
if [[ -z $parsable ]]; then
@@ -476,84 +511,81 @@ call_processing_func () {
# FIXME: This list needs to become part of a configuration
# file in future releases
-
-scriptPath="$(dirname $0)/scripts"
+# $scriptPath is defined at the top
case "${dataset,,}" in
+
+ # ============
+ # WRF products
+ # ============
+
# NCAR-GWF CONUSI
"conus1" | "conusi" | "conus_1" | "conus_i" | "conus 1" | "conus i" | "conus-1" | "conus-i")
- call_processing_func "$scriptPath/conus_i/conus_i.sh" "3months"
+ call_processing_func "$scriptPath/gwf-ncar-conus_i/conus_i.sh" "3months"
;;
# NCAR-GWF CONUSII
"conus2" | "conusii" | "conus_2" | "conus_ii" | "conus 2" | "conus ii" | "conus-2" | "conus-ii")
- call_processing_func "$scriptPath/conus_ii/conus_ii.sh" "1month"
+ call_processing_func "$scriptPath/gwf-ncar-conus_ii/conus_ii.sh" "1month"
;;
+ # ==========
+ # Reanalysis
+ # ==========
+
# ECMWF ERA5
"era_5" | "era5" | "era-5" | "era 5")
- call_processing_func "$scriptPath/era5/era5_simplified.sh" "2years"
+ call_processing_func "$scriptPath/ecmwf-era5/era5_simplified.sh" "2years"
;;
- # ECCC RDRS
+ # ECCC RDRS
"rdrs" | "rdrsv2.1")
- call_processing_func "$scriptPath/rdrs/rdrs.sh" "6months"
+ call_processing_func "$scriptPath/eccc-rdrs/rdrs.sh" "6months"
;;
- # CanRCM4-WFDEI-GEM-CaPA
- "canrcm4-wfdei-gem-capa" | "canrcm4_wfdei_gem_capa")
- call_processing_func "$scriptPath/canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh" "5years"
- ;;
-
- # WFDEI-GEM-CaPA
- "wfdei-gem-capa" | "wfdei_gem_capa" | "wfdei-gem_capa" | "wfdei_gem-capa")
- call_processing_func "$scriptPath/wfdei_gem_capa/wfdei_gem_capa.sh" "5years"
- ;;
+ # ====================
+ # Observation datasets
+ # ====================
# Daymet dataset
"daymet" | "Daymet" )
- call_processing_func "$scriptPath/daymet/daymet.sh" "5years"
+ call_processing_func "$scriptPath/ornl-daymet/daymet.sh" "5years"
;;
- # BCC-CSM2-MR
- "bcc" | "bcc_csm2_mr" | "bcc-csm2-mr" )
- call_processing_func "$scriptPath/bcc_csm2_mr/bcc_csm2_mr.sh" "50years"
- ;;
+ # ================
+ # Climate datasets
+ # ================
- # CNRM_CM6_1
- "cnrm" | "cnrm_cm6_1" | "cnrm-cm6-1" )
- call_processing_func "$scriptPath/cnrm_cm6_1/cnrm_cm6_1.sh" "50years"
+ # ESPO-G6-R2 dataset
+ "espo" | "espo-g6-r2" | "espo_g6_r2" | "espo_g6-r2" | "espo-g6_r2" )
+ call_processing_func "$scriptPath/ouranos-espo-g6-r2/espo-g6-r2.sh" "151years" "1"
;;
- # EC_EARTH3_VEG
- "ec" | "ec_earth3_veg" | "ec-earth3-veg" )
- call_processing_func "$scriptPath/ec_earth3_veg/ec_earth3_veg.sh" "50years"
+ # Ouranos-MRCC5-CMIP6 dataset
+ "crcm5-cmip6" | "mrcc5-cmip6" | "crcm5" | "mrcc5" )
+ call_processing_func "$scriptPath/ouranos-crcm5-cmip6/crcm5-cmip6.sh" "1years"
;;
- # GFDL_CM4
- "gfdl_cm4" | "gfdl-cm4" )
- call_processing_func "$scriptPath/gfdl_cm4/gfdl_cm4.sh" "50years"
- ;;
+ # Alberta Government Downscaled Climate Dataset - CMIP6
+ "alberta" | "ab-gov" | "ab" | "ab_gov" | "abgov" )
+ call_processing_func "$scriptPath/ab-gov/ab-gov.sh" "151years" "0"
+ ;;
- # GDFL_ESM4
- "gfdl_esm4" | "gfdl-esm4" )
- call_processing_func "$scriptPath/gfdl_esm4/gfdl_esm4.sh" "50years"
- ;;
+ # NASA GDDP-NEX-CMIP6
+ "gddp" | "nex" | "gddp-nex" | "nex-gddp" | "gddp-nex-cmip6" | "nex-gddp-cmip6")
+ call_processing_func "$scriptPath/nasa-nex-gddp-cmip6/nex-gddp-cmip6.sh" "100years" "0"
+ ;;
- # IPSL_CM6A_LR
- "ipsl" | "ipsl_cm6a_lr" | "ipsl-cm6a-lr" )
- call_processing_func "$scriptPath/ipsl_cm6a_lr/ipsl_cm6a_lr.sh" "50years"
+ # CanRCM4-WFDEI-GEM-CaPA
+ "canrcm4_g" | "canrcm4-wfdei-gem-capa" | "canrcm4_wfdei_gem_capa")
+ call_processing_func "$scriptPath/ccrn-canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh" "5years"
;;
-
- # MRI_ESM2_0
- "mri" | "mri-esm2-0" | "mri_esm2_0" )
- call_processing_func "$scriptPath/mri_esm2_0/mri_esm2_0.sh" "50years"
+
+ # WFDEI-GEM-CaPA
+ "wfdei_g" | "wfdei-gem-capa" | "wfdei_gem_capa" | "wfdei-gem_capa" | "wfdei_gem-capa")
+ call_processing_func "$scriptPath/ccrn-wfdei_gem_capa/wfdei_gem_capa.sh" "5years"
;;
- # Hybrid Observation Dataset
- "hybrid" | "hybrid-obs" | "hybrid_obs" | "hybrid_observation" | "hybrid-observation" )
- call_processing_func "$scriptPath/hybrid_obs/hybrid_obs.sh" "50years"
- ;;
# dataset not included above
*)
diff --git a/scripts/ab-gov/README.md b/scripts/ab-gov/README.md
new file mode 100644
index 0000000..84f8225
--- /dev/null
+++ b/scripts/ab-gov/README.md
@@ -0,0 +1,129 @@
+# Alberta Government Climate Dataset (`ab-gov`)
+In this file, the details of the dataset is explained.
+
+## Location of Dataset Files
+The `ab-gov` dataset is located under the following directories accessible from Compute Canada (CC) Graham Cluster:
+```console
+/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/ab-gov # rpp-kshook allocation
+/project/rrg-mclark/data/meteorological-data/ab-gov # rrg-mclark allocation
+```
+
+and the structure of the dataset yearly files (containing daily time-steps) is as following:
+```console
+/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/ab-gov
+├── BCC-CSM2-MR
+│ ├── Downscaled_BCC-CSM2-MR_MBCDS_historical_pr_tmn_tmx_1950.nc
+│ ├── .
+│ ├── .
+│ ├── .
+│ ├── Downscaled_BCC-CSM2-MR_MBCDS_historical_pr_tmn_tmx_2014.nc
+│ ├── Downscaled_BCC-CSM2-MR_MBCDS_ssp126_pr_tmn_tmx_2015.nc
+│ ├── .
+│ ├── .
+│ ├── .
+│ ├── Downscaled_BCC-CSM2-MR_MBCDS_ssp126_pr_tmn_tmx_2100.nc
+│ ├── Downscaled_BCC-CSM2-MR_MBCDS_ssp370_pr_tmn_tmx_2015.nc
+│ ├── .
+│ ├── .
+│ ├── .
+│ ├── .
+│ └── Downscaled_BCC-CSM2-MR_MBCDS_ssp126_pr_tmn_tmx_2100.nc
+.
+.
+.
+├── %model
+│ ├── Downscaled_%{model}_MBCDS_historical_pr_tmn_tmx_1950.nc
+│ ├── .
+│ ├── .
+│ ├── Downscaled_%{model}_MBCDS_historical_pr_tmn_tmx_2014.nc
+│ ├── Downscaled_%{model}_MBCDS_ssp%%%_pr_tmn_tmx_2015.nc
+│ ├── .
+│ ├── .
+│ ├── Downscaled_%{model}_MBCDS_%{scenario}_pr_tmn_tmx_%{year}.nc
+│ ├── .
+│ ├── .
+│ └── Downscaled_%{model}_MBCDS_%{scenario}_pr_tmn_tmx_2100.nc
+.
+.
+.
+└── Hybrid-observation
+ ├── Hybrid_Daily_BCABSK_US_pr_1950.nc
+ ├── .
+ ├── .
+ ├── .
+ ├── Hybrid_Daily_BCABSK_US_%{var}_%{year}.nc
+ ├── .
+ ├── .
+ ├── .
+ └── Hybrid_Daily_BCABSK_US_tmin_2019.nc
+```
+
+# `ab-gov` Climate Models
+This dataset offers outputs of various climate models. Table below
+summarizes the models and relevant keywords that could be used with the
+main `datatool` script:
+
+|# |Model (keyword for `--model`) |Scenarios (keyword for `--scenario`) |
+|---|------------------------------|-------------------------------------|
+|1 |`BCC-CSM2-MR` |`historical`, `ssp126`, `ssp370` |
+|2 |`CNRM-CM6-1` |`historical`, `ssp126`, `ssp585` |
+|3 |`EC-Earth3-Veg` |`historical`, `ssp126`, `ssp370` |
+|4 |`GFDL-CM4` |`historical`, `ssp245` |
+|5 |`GFDL-ESM4` |`historical`, `ssp585` |
+|6 |`IPSL-CM6A-LR` |`historical`, `ssp126`, `ssp370` |
+|7 |`MRI-ESM2-0` |`historical`, `ssp370`, `ssp585` |
+|8 |`Hybrid-observation` |no keyword necessary |
+
+
+## Coordinate Variables, Spatial and Temporal extents, and Time-stamps
+
+### Coordinate Variables
+The coordinate variables of the `ab-gov` climate dataset files are `lon` and `lat` representing the longitude and latitude points, respectively.
+
+### Temporal Extents and Time-stamps
+The time-stamps are already included in the original files. The dataset offers
+**daily** time-series of climate variables. The following table
+describes the temporal extent for senarios included in this dataset:
+|# |Scenarios (keyword for `--scenario`) |Temporal extent |
+|---|-------------------------------------|----------------------------|
+|1 |`historical` |`1950-01-02` to `2015-01-01`|
+|2 |`ssp126` |`2015-01-02` to `2101-01-01`|
+|3 |`ssp245` |`2015-01-02` to `2101-01-01`|
+|4 |`ssp370` |`2015-01-02` to `2101-01-01`|
+|5 |`ssp585` |`2015-01-02` to `2101-01-01`|
+
+> [!Note]
+> Values of the `Temporal extent` column are the limits for `--start-date`
+> and `--end-date` options with the main `datatool` script.
+
+> [!Note]
+> The `Hybrid-observation` model does not accept any `--scenario` values,
+> however, it covers climate date from `1950-01-01` to `2020-01-01`.
+
+
+## Dataset Variables
+The NetCDF files of the dataset contain 3 variables. You may see a list of variables by using the `ncdump -h` command on one of the files:
+```console
+foo@bar:~$ module load gcc/9.3.0
+foo@bar:~$ module load cdo/2.0.4
+foo@bar:~$ ncdump -h /project/rpp-kshook/Climate_Forcing_Data/meteorological-data/ab-gov/BCC-CSM2-MR/Downscaled_BCC-CSM2-MR_MBCDS_ssp126_pr_tmn_tmx_2015.nc
+```
+
+## Spatial Extent
+The `ab-gov` dataset covers the entire Canadian province of Alberta (AB), in addition to northern parts of British Columbia (BC), western parts of Saskatchewan (SK), and northern parts of the American State of Montana (MT).
+
+## Short Description on `ab-gov` Climate Dataset Variables
+This dataset only offers three climate variables: 1) daily precipitation
+time-series (surface level), 2) daily minimum temperature time-series
+(@1.5m, near-surface level), and 3) daily maximum temperature time-series
+(@1.5m, near-surface level). Since the frequency of this dataset is daily,
+and only offers precipitation and temperature values, therefore, it could
+be potentially used for forcing conceptual hydrological models that only
+need daily time-series of these variables.
+
+The table below, summarizes the variables offered by this dataset:
+|Variable Name |Variable (keyword for `--variable`)|Unit |IPCC Abbreviation|Comments |
+|----------------------|-----------------------------------|------|-----------------|----------------------|
+|maximum temperature |`tmax` |°C |tasmax | |
+|minimum temperature |`tmin` |°C |tasmin | |
+|preciptiation |`pr` |mm/day|pr | |
diff --git a/scripts/ab-gov/ab-gov.sh b/scripts/ab-gov/ab-gov.sh
new file mode 100755
index 0000000..1158921
--- /dev/null
+++ b/scripts/ab-gov/ab-gov.sh
@@ -0,0 +1,511 @@
+#!/bin/bash
+# Meteorological Data Processing Workflow
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
+#
+# This file is part of Meteorological Data Processing Workflow
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# =========================
+# Credits and contributions
+# =========================
+# 1. Parts of the code are taken from
+# https://www.shellscript.sh/tips/getopt/index.html
+
+
+# ================
+# General comments
+# ================
+# * All variables are camelCased for distinguishing from function names;
+# * function names are all in lower_case with words seperated by underscore for legibility;
+# * shell style is based on Google Open Source Projects'
+# Style Guide: https://google.github.io/styleguide/shellguide.html
+
+
+# ===============
+# Usage Functions
+# ===============
+short_usage() {
+ echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR] [-MmS STR[,...]]"
+}
+
+
+# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
+parsedArguments=$(getopt -a -n ab-gov -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
+validArguments=$?
+if [ "$validArguments" != "0" ]; then
+ short_usage;
+ exit 1;
+fi
+
+# check if no options were passed
+if [ $# -eq 0 ]; then
+ echo "ERROR $(basename $0): arguments missing";
+ exit 1;
+fi
+
+# check long and short options passed
+eval set -- "$parsedArguments"
+while :
+do
+ case "$1" in
+ -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
+ -v | --variables) variables="$2" ; shift 2 ;; # required
+ -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
+ -s | --start-date) startDate="$2" ; shift 2 ;; # required
+ -e | --end-date) endDate="$2" ; shift 2 ;; # required
+ -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
+ -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
+ -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
+ -m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # required
+ -M | --model) model="$2" ; shift 2 ;; # required
+
+ # -- means the end of the arguments; drop this, and break out of the while loop
+ --) shift; break ;;
+
+ # in case of invalid option
+ *)
+ echo "ERROR $(basename $0): invalid option '$1'";
+ short_usage; exit 1 ;;
+ esac
+done
+
+# useful log date format function
+logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
+
+# check if $model is given
+if [[ -z $model ]]; then
+ echo "$(logDate)$(basename $0): ERROR! \`--model\` value(s) required"
+ exit 1;
+fi
+
+# check if $scenario is not given
+if [[ ! "${model,,}" == *"hybrid"* ]] && \
+ [[ -z $scenario ]]; then
+ echo "$(logDate)$(basename $0): ERROR! \`--scenario\` value(s) required"
+ echo "$(logDate)$(basename $0): WARNING! \`--scenario\` not required for \`Hybrid-observation\` model"
+ exit 1;
+fi
+
+# check the prefix of not set
+if [[ -z $prefix ]]; then
+ prefix="data_"
+fi
+
+
+# =====================
+# Necessary assumptions
+# =====================
+# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
+alias date='TZ=UTC date'
+
+# expand aliases for the one stated above
+shopt -s expand_aliases
+
+
+# ==========================
+# Necessary global variables
+# ==========================
+latDim="lat"
+lonDim="lon"
+timeDim="time"
+
+
+# ===================
+# Necessary functions
+# ===================
+# Modules below available on Digital Research Alliance of Canada's Graham HPC
+## core modules
+function load_core_modules () {
+ module -q load cdo/2.0.4
+ module -q load nco/5.0.6
+}
+function unload_core_modules () {
+ # WARNING: DO NOT USE IF YOU ARE NOT SURE HOW TO URE IT
+ module -q unload cdo/2.0.4
+ module -q unload nco/5.0.6
+}
+## ncl modules
+function load_ncl_module () {
+ module -q load ncl/6.6.2
+}
+function unload_ncl_module () {
+ module -q unload ncl/6.6.2
+}
+
+# loading core modules for the script
+load_core_modules
+
+
+# =================
+# Useful one-liners
+# =================
+#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
+unix_epoch () { date --date="$@" +"%s"; }
+
+#check whether the input is float or real
+check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
+
+#convert to float if the number is 'int'
+to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
+
+#join array element by the specified delimiter
+join_by () { local IFS="$1"; shift; echo "$*"; }
+
+#to_float the latLims and lonLims, real numbers delimited by ','
+lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
+
+# log date format
+log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
+
+#offset lims
+offset () { float="$1"; offset="$2"; printf "%.1f," $(echo "$float + $offset" | bc) | sed 's/,$//'; }
+
+
+# ===============
+# Data processing
+# ===============
+# display info
+echo "$(log_date)$(basename $0): processing Alberta Government Climate dataset..."
+
+# array of scenarios
+IFS=',' read -ra scenarioArr <<< "$scenario"
+# array of models
+IFS=',' read -ra modelArr <<< "$model"
+# array of variables
+IFS=',' read -ra variableArr <<< "$variables"
+# there is no "esemble" members defined for this dataset
+
+# since, the dataset's grid cell system is gaussian, assure to to_float()
+# the $latLims and $lonLims values
+latLims="$(lims_to_float "$latLims")"
+lonLims="$(lims_to_float "$lonLims")"
+
+# since longitudes are within the [-180, +180] range, no change is
+# necessary
+
+# since Hybrid-observation has no scenario, add a hidden scenario for
+# later usage down the file
+if [[ "${model,,}" == *"hybrid"* ]]; then
+ scenarioArr+=('HiddenScenario')
+fi
+
+
+# ================
+# Necessary checks
+# ================
+
+# check if the dates are within datasets date range
+# define $startYear and $endYear
+startYear=$(date --date "$startDate" +"%Y")
+endYear=$(date --date "$endDate" +"%Y")
+
+# taking care of various possible scenarios for $startDate and $endDate
+# $scenario and $model
+## #1 if startYear is before 2015, and historical is NOT selected as a
+## scenario, issue a WARNING and add historical to $scenarioArr
+if [[ "$startYear" -lt 2015 ]] && \
+ [[ "${scenarioArr[*]}" == "historical" ]]; then
+ # issue a warning and add historical to the scenarios
+ echo "$(logDate)$(basename $0): WARNING! Dates preceeding 2015 belongs to \`hisotrical\` scenario"
+ echo "$(logDate)$(basename $0): WARNING! \`historical\` is added to \`--scenario\` list"
+ scenarioArr+=("historical")
+fi
+
+## #2 if endYear is beyond 2014, and SSP scenarios are NOT
+## selected, issue an ERROR and terminate with exitcode 1
+if [[ "$endYear" -gt 2014 ]] && \
+ [[ "${scenarioArr[*]}" == "ssp" ]]; then # `ssp` is treated as *ssp*
+ echo "$(logDate)$(basename $0): ERROR! Dates past 2015 belong to \`ssp\` scenarios"
+ echo "$(logDate)$(basename $0): ERROR! Choose the appropriate date range and try again"
+ exit 1;
+fi
+
+## #3 if "Hybrid-observations" is needed, SSP scenarios are not applicable
+## and $startYear and $endYear must be between 1950-2019
+if [[ "${model,,}" == *"hybrid"* ]] && \
+ [[ "${scenario,,}" == *"ssp"* ]]; then
+ echo "$(logDate)$(basename $0): WARNING! \`Hybrid-observations\` does not have SSP scenarios"
+fi
+if [[ "${model,,}" == *"hybrid"* ]]; then
+ if [[ "$startYear" -lt "1950" ]] || \
+ [[ "$endYear" -gt "2019" ]]; then
+ echo "$(logDate)$(basename $0): WARNING! \`Hybrid-observations\` date range is only from 1950 until 2019"
+ fi
+fi
+
+## #4 if "historical" scenario's date range is from 1950 until 2014
+if [[ "${scenarios,,}" == *"historical"* ]]; then
+ if [[ "$startYear" -lt "1950" ]] || \
+ [[ "$endYear" -gt "2014" ]]; then
+ echo "$(logDate)$(basename $0): WARNING! \`historical\` scenario's date range is only from 1950 until 2014"
+ fi
+fi
+
+## #5 if "ssp*" scenario's date range is before 2014 or beyond 2100
+if [[ "${scenarios,,}" == *"ssp"* ]]; then
+ if [[ "$startYear" -lt "2015" ]]; then
+ echo "$(logDate)$(basename $0): WARNING! \`ssp*\` scenario's start date is 2014-01-01"
+ elif [[ "$endYear" -gt "2100" ]]; then
+ echo "$(logDate)$(basename $0): WARNING! \`ssp*\` scenario's end date is 2100-12-31"
+ echo "$(logDate)$(basename $0): WARNING! \`--end-date\` is set to 2100-12-31"
+ fi
+fi
+
+
+# ============================================
+# Build date arrays for time-series extraction
+# ============================================
+# file date intervals in years - dataset's default
+interval=1
+
+fileDateFormat="%Y"
+actualDateFormat="%Y-%m-%d"
+
+# define needed variables
+let "difference = $endYear - $startYear"
+let "steps = $difference / $interval"
+
+# build $startDateFileArr, $endDateFileArr
+startDateFileArr=()
+endDateFileArr=()
+actualStartDateArr=()
+actualEndDateArr=()
+
+# range of jumps
+range=$(seq 0 $steps)
+
+# filling the arrays
+for iter in $range; do
+ # jumps every $interval years
+ let "jumps = $iter * $interval"
+
+ # current date after necessary jumps
+ let "toDate = $jumps + $startYear"
+
+ # extract start and end values for files
+ startValue="$(date --date "${toDate}0101" +"${fileDateFormat}")"
+ endValue="$(date --date "${toDate}0101 +${interval}years -1days" +"${fileDateFormat}")"
+
+ # double-check end-date
+ if [[ "$endValue" -gt 2100 ]]; then
+ endValue="2100" # irregular last date for dataset files
+ fi
+
+ # extract start and end values for actual dates
+ actualStartValue="$(date --date "${toDate}0102" +"${actualDateFormat}")"
+ actualEndValue="$(date --date "${toDate}0101 +${interval}years" +"${actualDateFormat}")"
+
+ # fill up relevant arrays
+ startDateFileArr+=("${startValue}")
+ endDateFileArr+=("${endValue}")
+
+ actualStartDateArr+=("${actualStartValue}")
+ actualEndDateArr+=("${actualEndValue}")
+done
+
+# build actualStartArr array for temporal subsetting
+actualStartDateArr[0]="$(date --date "${startDate}" +"${actualDateFormat}")"
+
+# and similarly, the actualEndArr array
+lastIndex=$(( "${#actualEndDateArr[@]}" - 1 ))
+actualEndDateArr[${lastIndex}]="$(date --date "${endDate}" +"${actualDateFormat}")"
+
+
+# =====================
+# Extract dataset files
+# =====================
+# Typical directory structure of the dataset is:
+# ${datasetDir}/${model}/
+# and each ${model} directory contains files in the following nomenclature:
+# Downscaled_${model}_MBCDS_${scenario}_pr_tmn_tmx_%Y.nc
+# with the %Y year value indicating the starting year of data inside the
+# file
+#
+# The nomenclature for the "hybrid-observations" $model is different:
+# Hybrid_Daily_BCABSK_US_${var}_%Y.nc
+#
+# The date range of each $model and scenario is as follows:
+# * all models except "Hybrid-observations":
+# * historical: 1950-2014
+# * ssp126: 2015-2100
+# * ssp285: 2015-2100
+# * ssp370: 2015-2100
+# * ssp585: 2015-2100
+# * "Hybrid-observations" model: 1950-2019 (no scenarios)
+
+# create dataset directories in $cache and $outputDir
+echo "$(logDate)$(basename $0): creating output directory under $outputDir"
+mkdir -p "$outputDir"
+echo "$(logDate)$(basename $0): creating cache directory under $cache"
+mkdir -p "$cache"
+
+# iterate over models/submodels
+for model in "${modelArr[@]}"; do
+ # extract model and submodel names
+ modelName=$(echo $model | cut -d '/' -f 1)
+
+ # $modelVerboseFlag is set to 1
+ modelVerboseFlag=1
+
+ # iterate over scenarios, e.g., ssp126, ssp245, ssp370, ssp585
+ for scenario in "${scenarioArr[@]}"; do
+
+ # $scenarioVerboseFlag set to 1
+ scenarioVerboseFlag=1
+
+ # FIXME: the check needs to consider various names of the
+ # "hybrid-observations", as it is a long name and users will make typo
+ # mistakes.
+ pathTemplate="${modelName}/"
+ if [[ -e "${datasetDir}/${pathTemplate}" ]]; then
+ mkdir -p "${cache}/${pathTemplate}"
+ mkdir -p "${outputDir}/${pathTemplate}"
+ else
+ echo "$(logDate)$(basename $0): ERROR! '${model}' model does not exist."
+ break;
+ fi
+
+ # iterate over date range of interest using index
+ for idx in "${!startDateFileArr[@]}"; do
+
+ # dates for files
+ fileStartDate="${startDateFileArr[$idx]}"
+ fileEndDate="${endDateFileArr[$idx]}"
+ # dates for subsetting
+ actualStartDate="${actualStartDateArr[$idx]}"
+ actualEndDate="${actualEndDateArr[$idx]}"
+ # dates for ncks slabs
+ actualStartDateFormatted="$(date --date "${actualStartDate}" +'%Y-%m-%d')"
+ actualEndDateFormatted="$(date --date "${actualEndDate}" +'%Y-%m-%d')"
+
+ # variable list for output file print
+ variableNames=$(echo $variables | tr ',' '_')
+
+ # if 'Hybrid_observation' is selected, it will be treated
+ # differently
+ case "${modelName,,}" in
+ "hybrid_observation" | "hybrid-observation" | "hybrid" | "hybrid-obs" | "hybrid_obs" )
+ # if 'HiddenScenario' is not selected, then break
+ if [[ ${scenario} == "HiddenScenario" ]]; then
+
+ # if $fileStartDate is beyond 2019, break the loop
+ if [[ $fileStartDate -gt "2019" ]]; then
+ break # break $scenario for loop
+ fi
+ pathTemplate="Hybrid-observation/"
+ for var in ${variableArr[@]}; do
+ # source and destination file names
+ src="Hybrid_Daily_BCABSK_US_${var}_${fileStartDate}.nc"
+ dst="Hybrid_Daily_BCABSK_US_${variableNames}_${fileStartDate}.nc"
+
+ # verbose message
+ if [[ -n $modelVerboseFlag ]]; then
+ echo "$(logDate)$(basename $0): processing '${model}' files"
+ unset modelVerboseFlag
+ fi
+
+ # spatial subsetting
+ until ncks -A -v ${var} \
+ -d "$latDim","${latLims}" \
+ -d "$lonDim","${lonLims}" \
+ -d "$timeDim","${actualStartDateFormatted}","${actualEndDateFormatted}" \
+ ${datasetDir}/${pathTemplate}/${src} \
+ ${cache}/${pathTemplate}/${dst}; do
+ echo "$(logDate)$(basename $0): Process killed: restarting process" >&2
+ sleep 10;
+ done # until ncks
+
+ # copy the results
+ cp -r ${cache}/${pathTemplate}/${dst} \
+ ${outputDir}/${pathTemplate}/${prefix}${dst};
+
+ done # for $variableArr
+ else
+ # see if SSP scenario exists for the $model
+ sspFile=$(find ${datasetDir}/${pathTemplate} -type f -name "*${scenario}*.nc" | head -n 1)
+ if [[ -z $sspFile ]] &&
+ [[ -n $scenarioVerboseFlag ]]; then
+ echo "$(logDate)$(basename $0): ERROR! '${model}.${scenario}' does not exist"
+ unset scenarioVerboseFlag
+ fi
+ fi
+ ;;
+
+ # all other models
+ *)
+ if [[ "${scenario}" == *"HiddenScenario"* ]]; then
+ break 2;
+ fi
+ # define file for further operation
+ src="Downscaled_${modelName}_MBCDS_${scenario}_pr_tmn_tmx_${fileStartDate}.nc"
+ dst="Downscaled_${modelName}_MBCDS_${scenario}_${variableNames}_${fileStartDate}.nc"
+
+ # if historical is set as a scenario, and $fileStartDate is beyond 2014, break
+ # the loop
+ if [[ ${model,,} == *"historical"* ]] && \
+ [[ ${fileStartDate} -gt "2014" ]]; then
+ break
+ fi
+
+ # see if SSP scenario exists for the $model
+ sspFile=$(find ${datasetDir}/${pathTemplate} -type f -name "*${scenario}*.nc" | head -n 1)
+ if [[ -z $sspFile ]]; then
+ echo "$(logDate)$(basename $0): ERROR! '${model}.${scenario}' does not exist"
+ break
+ fi
+
+ # verbose message
+ if [[ -n $scenarioVerboseFlag ]]; then
+ echo "$(logDate)$(basename $0): processing '${model}.${scenario}' files"
+ unset scenarioVerboseFlag
+ fi
+
+ # spatial subsetting
+ until ncks -A -v "${variables}" \
+ -d "$latDim","${latLims}" \
+ -d "$lonDim","${lonLims}" \
+ -d "$timeDim","${actualStartDateFormatted}","${actualEndDateFormatted}" \
+ ${datasetDir}/${pathTemplate}/${src} \
+ ${cache}/${pathTemplate}/${dst}; do
+ echo "$(logDate)$(basename $0): Process killed: restarting process" >&2
+ sleep 10;
+ done # until ncks
+
+ # copy the results
+ cp -r ${cache}/${pathTemplate}/${dst} \
+ ${outputDir}/${pathTemplate}/${prefix}${dst};
+
+ ;;
+ esac
+
+ done # for $startDateArr
+ done # for $scenarioArr
+done # for $modelArr
+
+# wait for everything to finish - just in case
+sleep 10
+
+mkdir "$HOME/empty_dir"
+echo "$(logDate)$(basename $0): deleting temporary files from $cache"
+rsync -aP --delete "$HOME/empty_dir/" "$cache"
+rm -r "$cache"
+echo "$(logDate)$(basename $0): temporary files from $cache are removed"
+echo "$(logDate)$(basename $0): results are produced under $outputDir"
+
+
diff --git a/scripts/bcc_csm2_mr/README.md b/scripts/bcc_csm2_mr/README.md
deleted file mode 100644
index e69de29..0000000
diff --git a/scripts/bcc_csm2_mr/bcc_csm2_mr.sh b/scripts/bcc_csm2_mr/bcc_csm2_mr.sh
deleted file mode 100755
index 3fdf38a..0000000
--- a/scripts/bcc_csm2_mr/bcc_csm2_mr.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n bcc-cm2-mr -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_BCC-CSM2-MR_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing BCC-CSM2-MR dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/canrcm4_wfdei_gem_capa/LICENSE b/scripts/ccrn-canrcm4_wfdei_gem_capa/LICENSE
similarity index 100%
rename from scripts/canrcm4_wfdei_gem_capa/LICENSE
rename to scripts/ccrn-canrcm4_wfdei_gem_capa/LICENSE
diff --git a/scripts/canrcm4_wfdei_gem_capa/README.md b/scripts/ccrn-canrcm4_wfdei_gem_capa/README.md
similarity index 100%
rename from scripts/canrcm4_wfdei_gem_capa/README.md
rename to scripts/ccrn-canrcm4_wfdei_gem_capa/README.md
diff --git a/scripts/canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh b/scripts/ccrn-canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh
similarity index 84%
rename from scripts/canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh
rename to scripts/ccrn-canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh
index e377a79..ace5534 100755
--- a/scripts/canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh
+++ b/scripts/ccrn-canrcm4_wfdei_gem_capa/canrcm4_wfdei_gem_capa.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -41,7 +42,7 @@ short_usage() {
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n canrcm4-wfdei-gem-capa -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -67,9 +68,11 @@ do
-t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # optional
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -88,17 +91,24 @@ else
IFS=' ' read -ra ensembleArr <<< $(echo $(cd $datasetDir && ls -d */ | cut -d '/' -f 1))
fi
+# raise error when $model or $scenario is provided
+if [[ -n "$scenario" ]] || \
+ [[ -n "$model" ]]; then
+ echo "ERROR $(basename $0): redundant argument provided";
+ exit 1;
+fi
+
# make array of variable names
IFS=',' read -ra variablesArr <<< "$(echo "$variables")"
# check the prefix of not set
if [[ -z $prefix ]]; then
- prefix="data"
+ prefix="data_"
fi
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -108,7 +118,7 @@ shopt -s expand_aliases
# ==========================
-# Necessary Global Variables
+# Necessary global variables
# ==========================
format="%Y-%m-%dT%H:%M:%S" # date format
fileStruct="z1_1951-2100.Feb29.nc4" # source dataset files' suffix constant
@@ -117,20 +127,21 @@ latVar="lat"
lonVar="lon"
timeVar="time"
+
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
+function load_core_modules () {
module -q load cdo/2.0.4
module -q load nco/5.0.6
}
load_core_modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -148,7 +159,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# ===============
-# Data Processing
+# Data processing
# ===============
# display info
echo "$(basename $0): processing CCRN CanRCM4-WFDEI-GEM_CaPA..."
diff --git a/scripts/wfdei_gem_capa/README.md b/scripts/ccrn-wfdei_gem_capa/README.md
similarity index 100%
rename from scripts/wfdei_gem_capa/README.md
rename to scripts/ccrn-wfdei_gem_capa/README.md
diff --git a/scripts/wfdei_gem_capa/wfdei_gem_capa.sh b/scripts/ccrn-wfdei_gem_capa/wfdei_gem_capa.sh
similarity index 82%
rename from scripts/wfdei_gem_capa/wfdei_gem_capa.sh
rename to scripts/ccrn-wfdei_gem_capa/wfdei_gem_capa.sh
index 642397e..d9a8406 100755
--- a/scripts/wfdei_gem_capa/wfdei_gem_capa.sh
+++ b/scripts/ccrn-wfdei_gem_capa/wfdei_gem_capa.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -20,7 +21,8 @@
# =========================
# Credits and contributions
# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
+# 1. Parts of the code are taken from
+# https://www.shellscript.sh/tips/getopt/index.html
# ================
@@ -33,7 +35,7 @@
# ===============
-# Usage Functions
+# Usage functions
# ===============
short_usage() {
echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
@@ -41,7 +43,7 @@ short_usage() {
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n wfdei_gem_capa -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n wfdei_gem_capa -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -67,9 +69,11 @@ do
-t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -81,10 +85,12 @@ do
esac
done
-# raise error in case --ensemble argument are provided
-if [[ -n "$ensemble" ]]; then
- echo "$(basename $0): ERROR! invalid option '--ensemble'"
- exit 1
+# check if $ensemble is provided
+if [[ -n "$ensemble" ]] || \
+ [[ -n "$scenario" ]] || \
+ [[ -n "$model" ]]; then
+ echo "ERROR $(basename $0): redundant argument provided";
+ exit 1;
fi
# make array of variable names
@@ -92,12 +98,12 @@ IFS=',' read -ra variablesArr <<< "$(echo "$variables")"
# check the prefix of not set
if [[ -z $prefix ]]; then
- prefix="data"
+ prefix="data_"
fi
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -107,7 +113,7 @@ shopt -s expand_aliases
# ==========================
-# Necessary Global Variables
+# Necessary global variables
# ==========================
# the structure of file names is as follows: "%var__WFDEI_GEM_1979_2016.Feb29.nc"
format="%Y-%m-%dT%H:%M:%S" # date format
@@ -119,19 +125,19 @@ timeVar="time"
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
+function load_core_modules () {
module -q load cdo/2.0.4
module -q load nco/5.0.6
}
load_core_modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -149,7 +155,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# ===============
-# Data Processing
+# Data processing
# ===============
# display info
echo "$(basename $0): processing CCRN WFDEI-GEM_CaPA..."
diff --git a/scripts/cnrm_cm6_1/cnrm_cm6_1.sh b/scripts/cnrm_cm6_1/cnrm_cm6_1.sh
deleted file mode 100755
index 1cc4454..0000000
--- a/scripts/cnrm_cm6_1/cnrm_cm6_1.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n cnrm_cm6_1 -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_CNRM-CM6-1_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing CNRM-CM6-1 dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/ec_earth3_veg/ec_earth3_veg.sh b/scripts/ec_earth3_veg/ec_earth3_veg.sh
deleted file mode 100755
index f74b9d6..0000000
--- a/scripts/ec_earth3_veg/ec_earth3_veg.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n ec_earth3_veg -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_EC-Earth3-Veg_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing EC-Earth3-Veg dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/conus_i/LICENSE b/scripts/eccc-rdrs/LICENSE
similarity index 100%
rename from scripts/conus_i/LICENSE
rename to scripts/eccc-rdrs/LICENSE
diff --git a/scripts/rdrs/README.md b/scripts/eccc-rdrs/README.md
similarity index 51%
rename from scripts/rdrs/README.md
rename to scripts/eccc-rdrs/README.md
index 9c8adda..cf65495 100644
--- a/scripts/rdrs/README.md
+++ b/scripts/eccc-rdrs/README.md
@@ -4,7 +4,7 @@ In this file, the details of the dataset is explained.
## Location of Dataset Files
The `RDRS` v2.1 dataset is located under the following directory accessible from Compute Canada (CC) Graham Cluster:
```console
-/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/RDRSv2.1 # rpp-kshook allocation
+/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/rdrsv2.1 # rpp-kshook allocation
/project/rrg-mclark/data/meteorological-data/rdrsv2.1 # rrg-mclark allocation
```
and the structure of the dataset hourly files is as following:
@@ -62,13 +62,18 @@ The spatial extent of the `RDRS` v2.1 is on latitutes from `+5.75` to `+64.75` a
The time-steps are hourly covering from `January 1980` to `December 2018`.
## Short Description on `RDRS` v2.1 Variables
-In most hydrological modelling applications, usually 7 variables are needed detailed as following: 1) specific humidity at 1.5 (or 2) meters, 2) surface pressure, 3) air temperature at 1.5 (or 2) meters, 4) wind speed at 10 meters, 5) precipitation, 6) downward short wave radiation, and 7) downward long wave radiation. These variables are available through `RDRS` v2.1 dataset and their details are described in the table below:
-|Variable Name |RDRSv2.1 Variable |Unit |IPCC abbreviation|Comments |
-|----------------------|-------------------|-----|-----------------|----------------------|
-|surface pressure |RDRS_v2.1_P_P0_SFC |mb |ps | |
-|specific humidity@1.5m|RDRS_v2.1_P_HU_1.5m|1 |huss | |
-|air tempreature @1.5m |RDRS_v2.1_P_TT_1.5m|C |tas | |
-|wind speed @10m |RDRS_v2.1_P_UVC_10m|kts |wspd |WIND=SQRT(U102+V102)|
-|precipitation |RDRS_v2.1_A_PR0_SFC|m/hr | |CaPA outputs |
-|short wave radiation |RDRS_v2.1_P_FB_SFC |W m-2|rsds |Downward solar flux |
-|long wave radiation |RDRS_v2.1_P_FI_SFC |W m-2|rlds |Downward infrared flux|
+In most hydrological modelling applications, usually 7 variables are needed detailed as following: 1) specific humidity (@1.5-2.0m or @40m), 2) surface pressure, 3) air temperature (@1.5m-2.0m or 40m), 4) wind speed (@10m or @40m), 5) precipitation (surface level), 6) downward short wave radiation (surface level), and 7) downward long wave radiation (surface level). These variables are available through `RDRS` v2.1 dataset and their details are described in the table below:
+|Variable Name |RDRSv2.1 Variable |Unit |IPCC abbreviation|Comments |
+|----------------------|---------------------|-----|-----------------|----------------------|
+|surface pressure |RDRS_v2.1_P_P0_SFC |mb |ps | |
+|specific humidity@1.5m|RDRS_v2.1_P_HU_1.5m |1 |huss | |
+|air tempreature @1.5m |RDRS_v2.1_P_TT_1.5m |°C |tas | |
+|wind speed @10m |RDRS_v2.1_P_UVC_10m |kts |wspd |WIND=SQRT(U102+V102)|
+|precipitation |RDRS_v2.1_A_PR0_SFC |m/hr | |CaPA outputs |
+|short wave radiation |RDRS_v2.1_P_FB_SFC |W m-2|rsds |Downward solar flux |
+|long wave radiation |RDRS_v2.1_P_FI_SFC |W m-2|rlds |Downward infrared flux|
+|specific humidity @40m|RDRS_v2.1_P_HU_09944 |1 |huss | |
+|air temperature @40m |RDRS_v2.1_P_TT_09944 |°C |tas | |
+|wind speed @40m |RDRS_v2.1_P_UVC_09944|kts |wspd | |
+
+Please visit the [official website](https://github.com/julemai/CaSPAr/wiki/Available-products) for the dataset for the most up-to-date information.
diff --git a/scripts/rdrs/rdrs.sh b/scripts/eccc-rdrs/rdrs.sh
similarity index 85%
rename from scripts/rdrs/rdrs.sh
rename to scripts/eccc-rdrs/rdrs.sh
index a8ec96b..3cf239a 100755
--- a/scripts/rdrs/rdrs.sh
+++ b/scripts/eccc-rdrs/rdrs.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -20,7 +21,8 @@
# =========================
# Credits and contributions
# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
+# 1. Parts of the code are taken from
+# https://www.shellscript.sh/tips/getopt/index.html
# ================
@@ -33,7 +35,7 @@
# ===============
-# Usage Functions
+# Usage functions
# ===============
short_usage() {
echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
@@ -41,7 +43,7 @@ short_usage() {
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n rdrs -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -68,8 +70,10 @@ do
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
-p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # required
+ -c | --cache) cache="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -82,19 +86,21 @@ do
done
# check if $ensemble is provided
-if [[ -n "$ensemble" ]]; then
- echo "ERROR $(basename $0): redundant argument (ensemble) provided";
+if [[ -n "$ensemble" ]] || \
+ [[ -n "$scenario" ]] || \
+ [[ -n "$model" ]]; then
+ echo "ERROR $(basename $0): redundant argument provided";
exit 1;
fi
# check the prefix of not set
if [[ -z $prefix ]]; then
- prefix="data"
+ prefix="data_"
fi
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -104,7 +110,7 @@ shopt -s expand_aliases
# ==========================
-# Necessary Global Variables
+# Necessary global variables
# ==========================
# the structure of file names is as follows: "YYYYMMDD12.nc"
rdrsFormat="%Y%m%d" # rdrs file date format
@@ -114,20 +120,22 @@ fileStruct="" # source dataset files' prefix constant
latVar="rlat"
lonVar="rlon"
+
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
+function load_core_modules () {
+ module -q load gcc/9.3.0
+ module -q load cdo/2.0.4
+ module -q load nco/5.0.6
}
load_core_modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -145,7 +153,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# ===============
-# Data Processing
+# Data processing
# ===============
# display info
echo "$(basename $0): processing ECCC RDRSv2.1..."
diff --git a/scripts/era5/ERA5_downloads.zip b/scripts/ecmwf-era5/ERA5_downloads.zip
similarity index 100%
rename from scripts/era5/ERA5_downloads.zip
rename to scripts/ecmwf-era5/ERA5_downloads.zip
diff --git a/scripts/conus_ii/LICENSE b/scripts/ecmwf-era5/LICENSE
similarity index 100%
rename from scripts/conus_ii/LICENSE
rename to scripts/ecmwf-era5/LICENSE
diff --git a/scripts/era5/README.md b/scripts/ecmwf-era5/README.md
similarity index 100%
rename from scripts/era5/README.md
rename to scripts/ecmwf-era5/README.md
diff --git a/scripts/era5/era5.sh b/scripts/ecmwf-era5/era5.sh
similarity index 92%
rename from scripts/era5/era5.sh
rename to scripts/ecmwf-era5/era5.sh
index 8111fa6..c173383 100755
--- a/scripts/era5/era5.sh
+++ b/scripts/ecmwf-era5/era5.sh
@@ -1,10 +1,9 @@
#!/bin/bash
-# Global Water Futures (GWF) Meteorological Data Processing Workflow
-# Copyright (C) 2022, Global Water Futures (GWF), University of Saskatchewan
+# Meteorological Data Processing Workflow
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2023, University of Calgary
#
-# This file is part of GWF Meteorological Data Processing Workflow
-#
-# For more information see: https://gwf.usask.ca/
+# This file is part of Meteorological Data Processing Workflow
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -23,7 +22,7 @@
# Credits and contributions
# =========================
# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-# 2. Dr. Gouqiang Tang provided the downloaded ERA5 dataset files
+# 2. Drs. Gouqiang Tang and Wouter Knoben provided the downloaded ERA5 dataset files
# ================
@@ -44,7 +43,7 @@ short_usage() {
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n era5 -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -70,8 +69,8 @@ do
-t | --time-scale) timeScale="$2" ; shift 2 ;; # required
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
@@ -85,18 +84,21 @@ do
done
# check if $ensemble is provided
-if [[ -n "$ensemble" ]]; then
- echo "ERROR $(basename $0): redundant argument (ensemble) provided";
+if [[ -n "$ensemble" ]] || \
+ [[ -n "$scenario" ]] || \
+ [[ -n "$model" ]]; then
+ echo "ERROR $(basename $0): redundant argument provided";
exit 1;
fi
# check the prefix of not set
if [[ -z $prefix ]]; then
- prefix="data"
+ prefix="data_"
fi
+
# ==========================
-# Necessary Global Variables
+# Necessary global variables
# ==========================
# the structure of file names is as follows: "ERA5_merged_YYYYMM.nc"
era5Format="%Y%m" # era5 file date format
@@ -106,16 +108,19 @@ fileStruct="ERA5_merged" # source dataset files' prefix constant
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-module load cdo/2.0.4
-module load nco/5.0.6
+function load_core_modules () {
+ module load cdo/2.0.4
+ module load nco/5.0.6
+}
+load_core_modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -156,7 +161,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# c) fileNameYear
# d) fileNameMonth
#######################################
-extract_filename_info () {
+function extract_filename_info () {
# define local variable for input argument
local fPath="$1" # format: "/path/to/file/ERA5_merged_YYYYMM.nc"
@@ -191,7 +196,7 @@ extract_filename_info () {
# produces the following variables:
# 5) dateRangeArr
#######################################
-date_range () {
+function date_range () {
local start=$1 # start date
local end=$2 # end date
local fmt=$3 # format of the ouput dates
@@ -240,7 +245,7 @@ date_range () {
# prints the end of the time-frame
# at the last time-step to the stdout
#######################################
-time_frame_end () {
+function time_frame_end () {
local dateStr=$1 # date string
local timeFrame=$2 # time-frame
local timeStep=$3 # time-step
@@ -296,7 +301,7 @@ time_frame_end () {
# on the time-steps (argument #9)
#
#######################################
-split_ts () {
+function split_ts () {
# assign local variables
local start=$1 # start date
local end=$2 # end date
@@ -348,7 +353,7 @@ split_ts () {
# endPoint: end point of the time
# frame
#######################################
-define_time_points () {
+function define_time_points () {
local fDate=$1
diff --git a/scripts/era5/era5_simplified.sh b/scripts/ecmwf-era5/era5_simplified.sh
similarity index 84%
rename from scripts/era5/era5_simplified.sh
rename to scripts/ecmwf-era5/era5_simplified.sh
index b0bf729..9200ea6 100755
--- a/scripts/era5/era5_simplified.sh
+++ b/scripts/ecmwf-era5/era5_simplified.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -20,8 +21,10 @@
# =========================
# Credits and contributions
# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-# 2. Dr. Quoqiang Tang downloaded, pre-processed, and produced relevant associated scripts.
+# 1. Parts of the code are taken from
+# https://www.shellscript.sh/tips/getopt/index.html
+# 2. Drs. Quoqiang Tang and Wouter Knoben downloaded, pre-processed, and
+# produced relevant associated scripts.
# ================
@@ -42,7 +45,7 @@ short_usage() {
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n era5 -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -68,9 +71,11 @@ do
-t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
-l | --lat-lims) latLims="$2" ; shift 2 ;; # required
-n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -90,12 +95,12 @@ fi
# check the prefix of not set
if [[ -z $prefix ]]; then
- prefix="data"
+ prefix="data_"
fi
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -105,7 +110,7 @@ shopt -s expand_aliases
# ==========================
-# Necessary Global Variables
+# Necessary global variables
# ==========================
# the structure of file names is as follows: "ERA5_merged_YYYYMM.nc"
era5Format="%Y%m" # era5 file date format
@@ -114,20 +119,21 @@ fileStruct="ERA5_merged" # source dataset files' prefix constant
latVar="latitude"
lonVar="longitude"
+
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
+function load_core_modules () {
module -q load cdo/2.0.4
module -q load nco/5.0.6
}
load_core_modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -145,7 +151,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# ===============
-# Data Processing
+# Data processing
# ===============
# display info
echo "$(basename $0): processing ECMWF ERA5..."
diff --git a/scripts/gfdl_cm4/gfdl_cm4.sh b/scripts/gfdl_cm4/gfdl_cm4.sh
deleted file mode 100755
index c058faa..0000000
--- a/scripts/gfdl_cm4/gfdl_cm4.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n gdfl_cm4 -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_GFDL-CM4_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing GDFL-CM4 dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/gfdl_esm4/gfdl_esm4.sh b/scripts/gfdl_esm4/gfdl_esm4.sh
deleted file mode 100755
index 53c9c58..0000000
--- a/scripts/gfdl_esm4/gfdl_esm4.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n gdfl_esm4 -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_GFDL-ESM4_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing GDFL-ESM4 dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/daymet/LICENSE b/scripts/gwf-ncar-conus_i/LICENSE
similarity index 100%
rename from scripts/daymet/LICENSE
rename to scripts/gwf-ncar-conus_i/LICENSE
diff --git a/scripts/conus_i/README.md b/scripts/gwf-ncar-conus_i/README.md
similarity index 100%
rename from scripts/conus_i/README.md
rename to scripts/gwf-ncar-conus_i/README.md
diff --git a/scripts/conus_i/conus_i.sh b/scripts/gwf-ncar-conus_i/conus_i.sh
similarity index 92%
rename from scripts/conus_i/conus_i.sh
rename to scripts/gwf-ncar-conus_i/conus_i.sh
index e78d400..db90686 100755
--- a/scripts/conus_i/conus_i.sh
+++ b/scripts/gwf-ncar-conus_i/conus_i.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -20,11 +21,13 @@
# =========================
# Credits and contributions
# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
+# 1. Parts of the code are taken from
+# https://www.shellscript.sh/tips/getopt/index.html
# 2. Dr. Zhenhua Li provided scripts to extract and process CONUSI datasets
# 3. Dr. Shervan Gharari produced the netCDF file containing XLAT and XLONG
# coordinate variables put under /assets/coord_XLAT_XLONG_conus_i.nc.
-# 4. Sorting workflow is taken from the following link; https://stackoverflow.com/a/11789688/5188208
+# 4. Sorting workflow is taken from the following link:
+# https://stackoverflow.com/a/11789688/5188208
# ================
# General comments
@@ -36,14 +39,14 @@
# ===============
-# Usage Functions
+# Usage functions
# ===============
short_usage() {
echo "usage: $(basename $0) [-io DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL]"
}
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:c:p:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,cache:,prefix:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n conus-i -o i:v:o:s:e:t:l:n:c:p:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,cache:,prefix:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -72,6 +75,8 @@ do
-c | --cache) cacheDir="$2" ; shift 2 ;; # required
-p | --prefix) prefix="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -84,28 +89,31 @@ do
done
# check if $ensemble is provided
-if [[ -n "$ensemble" ]]; then
- echo "ERROR $(basename $0): redundant argument (ensemble) provided";
+if [[ -n "$ensemble" ]] || \
+ [[ -n "$scenario" ]] || \
+ [[ -n "$model" ]]; then
+ echo "ERROR $(basename $0): redundant argument provided";
exit 1;
fi
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# hard-coding the address of the co-ordinate NetCDF files
# containing XLAT and XLONG variables each having dimensions
# of "south_north" and "west_east".
+datatoolPath="$(dirname $0)/../../" # datatool's path
coordMainFile="/project/rpp-kshook/Model_Output/WRF/CONUS/coord.nc"
-coordEssFile="$(dirname $0)/../../assets/coord_XLAT_XLONG_conus_i.nc"
+coordEssFile="${datatoolPath}/assets/nc_coords/coord_XLAT_XLONG_conus_i.nc"
latVar="south_north"
lonVar="west_east"
# The structure of file names is as follows: "wrf2d_d01_YYYY-MM-DD_HH:MM:SS" (no file extension)
format="%Y-%m-%d_%H:%M:%S"
fileStruct="wrf2d_d01"
-coordIdxScript="$(dirname $0)/../../assets/coord_wrf_idx.ncl"
+coordIdxScript="${datatoolPath}/assets/ncl_scripts/coord_wrf_idx.ncl"
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -113,19 +121,20 @@ alias date='TZ=UTC date'
# expand aliases for the one stated above
shopt -s expand_aliases
+
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
+function load_core_modules () {
module -q load cdo/2.0.4;
module -q load nco/5.0.6;
}
load_core_modules # load necessary modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -165,7 +174,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# 2: -> fDate: date of the forcing
# 3: -> fTime: time of the forcing
#######################################
-generate_netcdf () {
+function generate_netcdf () {
# defining local variables
local fName="$1" # raw file name string
@@ -216,7 +225,7 @@ generate_netcdf () {
# e) fileNameDay
# f) fileNameTime
#######################################
-extract_file_info () {
+function extract_file_info () {
# define local variable for input argument
local fPath="$1" # format: "/path/to/file/wrf2d_d01_YYYY-MM-DD_HH:MM:SS"
@@ -252,7 +261,7 @@ extract_file_info () {
# 3: the position within the matching
# string split by '-'
#######################################
-date_match_idx () {
+function date_match_idx () {
# defining local variables
local str="$1" # string to be matched
@@ -294,7 +303,7 @@ date_match_idx () {
# produces $fName_cat.nc under $fDir
# out of all elements of $filesArr
#######################################
-concat_files () {
+function concat_files () {
# defining local variables
local fName="$1" # output file name
local fTempDir="$2" # temporary directory
@@ -338,7 +347,7 @@ concat_files () {
# 4) uniqueMonthsArr
# 5) unqiueDatesArr
#######################################
-populate_date_arrays () {
+function populate_date_arrays () {
# defining empty arrays
datesArr=();
monthsArr=();
@@ -359,7 +368,7 @@ populate_date_arrays () {
# ===============
-# Data Processing
+# Data processing
# ===============
# display info
echo "$(basename $0): processing NCAR-GWF CONUSI..."
diff --git a/scripts/era5/LICENSE b/scripts/gwf-ncar-conus_ii/LICENSE
similarity index 100%
rename from scripts/era5/LICENSE
rename to scripts/gwf-ncar-conus_ii/LICENSE
diff --git a/scripts/conus_ii/README.md b/scripts/gwf-ncar-conus_ii/README.md
similarity index 100%
rename from scripts/conus_ii/README.md
rename to scripts/gwf-ncar-conus_ii/README.md
diff --git a/scripts/conus_ii/conus_ii.sh b/scripts/gwf-ncar-conus_ii/conus_ii.sh
similarity index 92%
rename from scripts/conus_ii/conus_ii.sh
rename to scripts/gwf-ncar-conus_ii/conus_ii.sh
index 1581cfe..dcfcf84 100755
--- a/scripts/conus_ii/conus_ii.sh
+++ b/scripts/gwf-ncar-conus_ii/conus_ii.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -20,7 +21,8 @@
# =========================
# Credits and contributions
# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
+# 1. Parts of the code are taken from
+# https://www.shellscript.sh/tips/getopt/index.html
# 2. Dr. Zhenhua Li provided scripts to extract and process CONUSII datasets
# 3. Dr. Shervan Gharari produced the netCDF file containing XLAT and XLONG
# coordinate variables put under /assets/coord_XLAT_XLONG_conus_i.nc.
@@ -35,14 +37,14 @@
# ===============
-# Usage Functions
+# Usage functions
# ===============
short_usage() {
echo "usage: $(basename $0) [-io DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL]"
}
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:c:p:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,cache:,prefix:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n conus-ii -o i:v:o:s:e:t:l:n:c:p:m:S:M: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,cache:,prefix:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -71,6 +73,8 @@ do
-c | --cache) cacheDir="$2" ; shift 2 ;; # required
-p | --prefix) prefix="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -83,20 +87,24 @@ do
done
# check if $ensemble is provided
-if [[ -n "$ensemble" ]]; then
- echo "ERROR $(basename $0): redundant argument (ensemble) provided";
+if [[ -n "$ensemble" ]] || \
+ [[ -n "$scenario" ]] || \
+ [[ -n "$model" ]]; then
+ echo "ERROR $(basename $0): redundant argument provided";
exit 1;
fi
+
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# hard-coding the address of the co-ordinate NetCDF files
# containing XLAT and XLONG variables each having dimensions
# of "south_north" and "west_east".
+datatoolPath="$(dirname $0)/../../" # datatool's path
coordMainFile="/project/rpp-kshook/Model_Output/wrf-conus/CONUSII/hist/wrf04km_coord.nc"
-coordEssFile="$(dirname $0)/../../assets/coord_XLAT_XLONG_conus_ii.nc"
+coordEssFile="${datatoolPath}/assets/nc_coords/coord_XLAT_XLONG_conus_ii.nc"
latVar="south_north"
lonVar="west_east"
@@ -105,7 +113,7 @@ format="%Y-%m-%d_%H:%M:%S"
tarFormat="%Y%m%d"
fileStruct="wrf2d_d01"
tarFileStruct="wrf2d_conusii"
-coordIdxScript="$(dirname $0)/../../assets/coord_wrf_idx.ncl"
+coordIdxScript="${datatoolPath}/assets/ncl_scripts/coord_wrf_idx.ncl"
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -115,18 +123,18 @@ shopt -s expand_aliases
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
+function load_core_modules () {
module -q load cdo/2.0.4;
module -q load nco/5.0.6;
}
load_core_modules # load necessary modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
unix_epoch () { date --date="$@" +"%s"; }
@@ -166,7 +174,7 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# 2: -> fDate: date of the forcing
# 3: -> fTime: time of the forcing
#######################################
-generate_netcdf () {
+function generate_netcdf () {
# defining local variables
local fName="$1" # raw file name string
@@ -217,7 +225,7 @@ generate_netcdf () {
# e) fileNameDay
# f) fileNameTime
#######################################
-extract_file_info () {
+function extract_file_info () {
# define local variable for input argument
local fPath="$1" # format: "/path/to/file/wrf2d_d01_YYYY-MM-DD_HH:MM:SS"
@@ -253,7 +261,7 @@ extract_file_info () {
# 3: the position within the matching
# string split by '-'
#######################################
-date_match_idx () {
+function date_match_idx () {
# defining local variables
local str="$1" # string to be matched
@@ -295,7 +303,7 @@ date_match_idx () {
# produces $fName_cat.nc under $fDir
# out of all elements of $filesArr
#######################################
-concat_files () {
+function concat_files () {
# defining local variables
local fName="$1" # output file name
local fTempDir="$2" # temporary directory
@@ -339,7 +347,7 @@ concat_files () {
# 4) uniqueMonthsArr
# 5) unqiueDatesArr
#######################################
-populate_date_arrays () {
+function populate_date_arrays () {
# defining empty arrays
datesArr=();
monthsArr=();
@@ -360,7 +368,7 @@ populate_date_arrays () {
# ===============
-# Data Processing
+# Data processing
# ===============
# display info
echo "$(basename $0): processing NCAR-GWF CONUSII..."
diff --git a/scripts/hybrid_obs/README.md b/scripts/hybrid_obs/README.md
deleted file mode 100644
index f16e2ad..0000000
--- a/scripts/hybrid_obs/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Alberta Government `Hybrid Observation` Dataset
-In this file, the details of the dataset is explained.
-
-## Location of Dataset Files
-The downscaled `Hybrid observation` dataset is located under the following directory accessible from Digital Research Alliance of Canada (DRA) Graham Cluster:
-```
-/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/hybrid_obs
-```
-and the structure of the dataset hourly files is as following:
-```console
-/project/rpp-kshook/CompHydCore/climateForcingData/ERA5/ERA5_for_SUMMA/2_merged_data
-├── Hybrid_Daily_BCABSK_US_pr_1950.nc
-├── .
-├── .
-├── .
-├── Hybrid_Daily_BCABSK_US_pr_2019.nc
-├── Hybrid_Daily_BCABSK_US_tmax_1950.nc
-├── .
-├── .
-├── .
-├── Hybrid_Daily_BCABSK_US_%var_%yr.nc
-├── .
-├── .
-├── .
-├── Hybrid_Daily_BCABSK_US_tmax_2019.nc
-├── Hybrid_Daily_BCABSK_US_tmin_1950.nc
-├── .
-├── .
-├── .
-└── Hybrid_Daily_BCABSK_US_tmin_2019.nc
-```
-
-## Coordinate Variables and Time-stamps
-
-### Coordinate Variables
-The coordinate variables of the `Hybrid Observation` datasets are `lon` and `lat` representing the longitude and latitude points, respectively.
-### Time-stamps
-The time-stamps are included in the original files. The data is avilable on a daily time-scale.
-
-## Dataset Variables
-The NetCDF files of the dataset contain 1 variable. You may see a list of variables by browsing the files.
-
-## Spatial Extent
-The spatial extent of the `Hybrid Observation` is on latitutes from `+45.95` to `60.25` and longitudes from `-128.05` to `-106.05`. The resolution is 0.1 degrees.
-
-## Temporal Extent
-The time-steps are daily covering from January 1950 to December 2019.
-
-## Short Description on `ERA5` Variables
-|Variable Name |ERA5 Variable |Unit |Comments |
-|---------------------|-------------------|-------|--------------------|
-|precipitation |pr |mm/day | |
-|maximum temperature |tmax |degC | |
-|minimum temperature |tmin |degC | |
-
-For a complete description of the dataset, see [here](https://doi.org/10.5194/hess-23-5151-2019).
-
-## Downloading Original `Hybrid Observation` Data
-The data can be requested to download from 'hyung.eum AT gov.ab.ca'.
diff --git a/scripts/hybrid_obs/hybrid_obs.sh b/scripts/hybrid_obs/hybrid_obs.sh
deleted file mode 100755
index 1b33ee5..0000000
--- a/scripts/hybrid_obs/hybrid_obs.sh
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n hybrid_obs -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check if ensemble is given
-if [[ -n $ensemble ]]; then
- echo "$(basename $0): ERROR! This dataset dos not have any ensemble" \
- "members."
-fi
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Hybrid_Daily_BCABSK_US" # source dataset files' suffix constant
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing hybrid_obs dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- :
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variableArr of the comma-separated values of variables
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")"
-
-# loop over variables
-for var in "${variablesArr[@]}"; do
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$var" \
- "$datasetDir/${filePrefix}_${var}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${var}_${yr}.nc"
- done
-
- # wait to assure the background processes in `for` loop is finished
- wait
-
-done
-
-# wait to assure the background processes in `for` loop is finished
-wait
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/ipsl_cm6a_lr/ipsl_cm6a_lr.sh b/scripts/ipsl_cm6a_lr/ipsl_cm6a_lr.sh
deleted file mode 100755
index 23e82c7..0000000
--- a/scripts/ipsl_cm6a_lr/ipsl_cm6a_lr.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n ipsl_cm6a_lr -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_IPSL-CM6A-LR_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing IPSL-CM6A-LR dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/mri_esm2_0/mri_esm2_0.sh b/scripts/mri_esm2_0/mri_esm2_0.sh
deleted file mode 100755
index 968b288..0000000
--- a/scripts/mri_esm2_0/mri_esm2_0.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
-#
-# This file is part of Meteorological Data Processing Workflow
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# =========================
-# Credits and contributions
-# =========================
-# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
-
-
-# ================
-# General comments
-# ================
-# * All variables are camelCased for distinguishing from function names;
-# * function names are all in lower_case with words seperated by underscore for legibility;
-# * shell style is based on Google Open Source Projects'
-# Style Guide: https://google.github.io/styleguide/shellguide.html
-
-
-# ===============
-# Usage Functions
-# ===============
-short_usage() {
- echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR]"
-}
-
-
-# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n mri_esm2_0 -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variables:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
-validArguments=$?
-if [ "$validArguments" != "0" ]; then
- short_usage;
- exit 1;
-fi
-
-# check if no options were passed
-if [ $# -eq 0 ]; then
- echo "ERROR $(basename $0): arguments missing";
- exit 1;
-fi
-
-# check long and short options passed
-eval set -- "$parsedArguments"
-while :
-do
- case "$1" in
- -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
- -v | --variables) variables="$2" ; shift 2 ;; # required
- -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
- -s | --start-date) startDate="$2" ; shift 2 ;; # required
- -e | --end-date) endDate="$2" ; shift 2 ;; # required
- -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
- -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
- -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
- -p | --prefix) prefix="$2" ; shift 2 ;; # optional
- -c | --cache) cache="$2" ; shift 2 ;; # redundant - added for compatibility
- -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
-
- # -- means the end of the arguments; drop this, and break out of the while loop
- --) shift; break ;;
-
- # in case of invalid option
- *)
- echo "ERROR $(basename $0): invalid option '$1'";
- short_usage; exit 1 ;;
- esac
-done
-
-# check the prefix of not set
-if [[ -z $prefix ]]; then
- prefix="data"
-fi
-
-
-# =====================
-# Necessary Assumptions
-# =====================
-# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
-alias date='TZ=UTC date'
-
-# expand aliases for the one stated above
-shopt -s expand_aliases
-
-
-# ==========================
-# Necessary Global Variables
-# ==========================
-format="%Y-%m-%dT%H:%M:%S" # date format
-filePrefix="Downscaled_MRI-ESM2-0_MBCDS" # source dataset files' suffix constant
-fileSuffix="pr_tmn_tmx" # suffix before the date format
-
-latVar="lat"
-lonVar="lon"
-timeVar="time"
-
-# ===================
-# Necessary Functions
-# ===================
-# Modules below available on Compute Canada (CC) Graham Cluster Server
-load_core_modules () {
- module -q load cdo/2.0.4
- module -q load nco/5.0.6
-}
-load_core_modules
-
-
-#######################################
-# useful one-liners
-#######################################
-#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
-unix_epoch () { date --date="$@" +"%s"; }
-
-#check whether the input is float or real
-check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
-
-#convert to float if the number is 'int'
-to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
-
-#join array element by the specified delimiter
-join_by () { local IFS="$1"; shift; echo "$*"; }
-
-#to_float the latLims and lonLims, real numbers delimited by ','
-lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
-
-# log date format
-log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
-
-
-# ===============
-# Data Processing
-# ===============
-# display info
-echo "$(log_date)$(basename $0): processing MRI-ESM2-0 dataset..."
-
-# make the output directory
-echo "$(log_date)$(basename $0): creating output directory under $outputDir"
-mkdir -p "$outputDir"
-
-# make array of ensemble members
-if [[ -n "$ensemble" ]]; then
- IFS=',' read -ra ensembleArr <<< "$(echo "$ensemble")" # comma separated input
-else
- # if nothing has been entred, throw an error and exit
- echo "$(log_date)$(basename $0): ERROR! --ensemble argument does not" \
- "have valid value(s)"
- # exit the script
- exit 1;
-fi
-
-# define necessary dates
-startYear=$(date --date="$startDate" +"%Y") # start year
-endYear=$(date --date="$endDate" +"%Y") # end year
-yearsRange=$(seq $startYear $endYear)
-
-# make variable string for output file creation
-IFS=',' read -ra variablesArr <<< "$(echo "$variables")" # array for vars
-varStr=$(join_by "_" "${variablesArr[@]}")
-
-for member in "${ensembleArr[@]}"; do
- # creating yearly directory
- echo "$(log_date)$(basename $0): processing member $member"
-
- # loop over years
- for yr in $yearsRange; do
- # extract variables and spatially and temporally subset
- ncks -O \
- -d "$latVar",$(lims_to_float "$latLims") \
- -d "$lonVar",$(lims_to_float "$lonLims") \
- -v "$variables" \
- "$datasetDir/${filePrefix}_${member}_${fileSuffix}_${yr}.nc" \
- "$outputDir/${prefix}${filePrefix}_${member}_${varStr}_${yr}.nc"
- done
-
- # wait to assure the `for` loop is finished
- wait
-
-done
-
-# printing final prompt
-echo "$(log_date)$(basename $0): results are produced under $outputDir."
-
diff --git a/scripts/nasa-nex-gddp-cmip6/README.md b/scripts/nasa-nex-gddp-cmip6/README.md
new file mode 100644
index 0000000..dfc691a
--- /dev/null
+++ b/scripts/nasa-nex-gddp-cmip6/README.md
@@ -0,0 +1,305 @@
+# NASA NEX-GDDP-CMIP6 Climate Dataset (`nex-gddp-cmip6`)
+In this file, the details of the dataset is explained.
+
+## Location of Dataset Files
+The `nex-gddp-cmip6` dataset is located under the following directory(s) accessible from Compute Canada (CC) Graham Cluster:
+```console
+/project/rrg-mclark/data/meteorological-data/nasa-nex-gddp-cmip6/NEX-GDDP-CMIP6 # rrg-mclark allocation
+```
+
+and the structure of the dataset's yearly files (containing daily time-steps) is as following:
+```console
+/project/rrg-mclark/data/meteorological-data/nasa-nex-gddp-cmip6/NEX-GDDP-CMIP6/
+├── ACCESS-CM2
+│ ├── historical
+│ │ └── r1i1p1f1
+│ │ ├── hurs
+│ │ │ ├── hurs_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+| | | ├── hurs_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950_v1.1.nc
+│ │ │ ├── hurs_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+│ │ │ ├── hurs_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951_v1.1.nc
+│ │ │ ├── .
+│ │ │ ├── .
+│ │ │ ├── .
+│ | │ ├── hurs_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+│ | │ └── hurs_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014_v1.1.nc
+│ | ├── huss
+│ | | ├── huss_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | | ├── huss_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | | ├── huss_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── huss_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | | └── huss_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+| | ├── pr
+│ | | ├── pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | | ├── pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | | ├── pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | | └── pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+| | ├── rlds
+│ | | ├── rlds_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | | ├── rlds_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | | ├── rlds_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── rlds_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | | └── rlds_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+| | ├── rsds
+│ | | ├── rsds_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | | ├── rsds_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | | ├── rsds_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── rsds_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | | └── rsds_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+| | ├── tas
+│ | | ├── tas_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | | ├── tas_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | | ├── tas_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── tas_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | | └── tas_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+| | ├── tasmax
+│ | | ├── tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | | ├── tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | | ├── tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | | └── tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+| | └── tasmin
+│ | ├── tasmin_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
+│ | ├── tasmin_day_ACCESS-CM2_historical_r1i1p1f1_gn_1951.nc
+| | ├── tasmin_day_ACCESS-CM2_historical_r1i1p1f1_gn_1952.nc
+| | ├── .
+| | ├── .
+| | ├── .
+| | ├── tasmin_day_ACCESS-CM2_historical_r1i1p1f1_gn_2013.nc
+| | └── tasmin_day_ACCESS-CM2_historical_r1i1p1f1_gn_2014.nc
+│ ├── ssp126
+│ | └── r1i1p1f1
+│ | ├── hurs
+│ | | ├── hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2015.nc
+│ | | ├── hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2015_v1.1.nc
+│ | | ├── .
+│ | | ├── .
+│ | | ├── .
+│ | | ├── hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2100.nc
+│ | | └── hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2100_v1.1.nc
+| | .
+| | .
+| | .
+| | └── tasmin
+| | ├── tasmin_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2015.nc
+| | ├── tasmin_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2016.nc
+| | ├── .
+| | ├── .
+| | ├── .
+| | └── tasmin_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2100.nc
+| .
+| .
+| .
+| ├── %{scenario}
+| ├── %{ensemble}
+| . . ├── %{var}
+| . . . ├── %{var}_day_ACCESS-CM2_%{scenario}_%{ensemble}_gn_%{year}%{version}.nc
+| . . . .
+| . . . .
+| . . . .
+| └── ssp585
+│ └── r1i1p1f1
+│ ├── hurs
+│ | ├── hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2015.nc
+│ | ├── .
+│ | ├── .
+│ | ├── .
+│ | └── hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2100.nc
+| .
+| .
+| .
+| └── tasmin
+| ├── tasmin_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2015.nc
+| ├── .
+| ├── .
+| ├── .
+| └── tasmin_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2100.nc
+.
+. .
+. .
+├── %{model}
+. ├── %{scenario}
+. . └── %{ensemble}
+. . ├── %{var}
+. . . ├── %{var}_day_%{model}_%{scenario}_%{ensemble}_gn_%{year}%{version}.nc
+. . . .
+. . . .
+. . . .
+└── UKESM1-0-LL
+ ├── historical
+ | └── r1i1p1f2
+ | ├── hurs
+ | | ├── hurs_day_UKESM1-0-LL_historical_r1i1p1f2_gn_1950.nc
+ | | ├── hurs_day_UKESM1-0-LL_historical_r1i1p1f2_gn_1950_v1.1.nc
+ | | ├── .
+ | | ├── .
+ | | ├── .
+ | | ├── hurs_day_UKESM1-0-LL_historical_r1i1p1f2_gn_2014.nc
+ | | └── hurs_day_UKESM1-0-LL_historical_r1i1p1f2_gn_2014_v1.1.nc
+ | . .
+ | . .
+ | . .
+ | └── tasmin
+ | .
+ | .
+ | └── tasmin_day_UKESM1-0-LL_historical_r1i1p1f2_gn_2014.nc
+ .
+ .
+ .
+ └── ssp585
+ └── r1i1p1f2
+ ├── hurs
+ | ├── hurs_day_UKESM1-0-LL_ssp585_r1i1p1f2_gn_2015.nc
+ | ├── .
+ | ├── .
+ | ├── .
+ | └── hurs_day_UKESM1-0-LL_ssp585_r1i1p1f2_gn_2100.nc
+ .
+ .
+ .
+ └── tasmin
+ ├── tasmin_day_UKESM1-0-LL_ssp585_r1i1p1f2_gn_2015.nc
+ .
+ .
+ .
+ └── tasmin_day_UKESM1-0-LL_ssp585_r1i1p1f2_gn_2100.nc
+```
+
+> [!important]
+> Not all models have the same number of scenarios, enesmble members, and
+> variables. Each individual model needs to be investigate individually.
+
+> [!caution]
+> Currently, `datatool` is NOT capable of identifying various versions of
+> dataset files. In this dataset, as can be observed files for `v1.1`
+> (those indicated with a `_v1.1_` in their file names) are ignored.
+> This will be addressed in the future versions.
+
+
+## `nex-gddp-cmip6` Climate Models
+This dataset offers downscaled outputs of various climate models. Table below
+summarizes the models and relevant keywords that could be used with the
+main `datatool` script:
+
+|# |Model (keyword for `--model`) |Scenarios (keyword for `--scenario`) |
+|---| -------------------------------|----------------------------------------------------|
+|1 |`ACCESS-CM2` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|2 |`ACCESS-ESM1-5` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|3 |`BCC-CSM2-MR` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|4 |`CanESM5` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|5 |`CESM2` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|6 |`CESM2-WACCM` |`historical`, `ssp245`, `ssp585` |
+|7 |`CMCC-CM2-SR5` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|8 |`CMCC-ESM2` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|9 |`CNRM-CM6-1` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|10 |`CNRM-ESM2-1` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|11 |`EC-Earth3` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|12 |`EC-Earth3-Veg-LR` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|13 |`FGOALS-g3` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|14 |`GFDL-CM4` |`historical`, `ssp245`, `ssp585` |
+|15 |`GFDL-CM4_gr2` |`historical`, `ssp245`, `ssp585` |
+|16 |`GFDL-ESM4` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|17 |`GISS-E2-1-G` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|18 |`HadGEM3-GC31-LL` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|19 |`HadGEM3-GC31-MM` |`historical`, `ssp126`, `ssp245`, `ssp585` |
+|20 |`IITM-ESM` |`historical`, `ssp126`, `ssp585` |
+|21 |`INM-CM4-8` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|22 |`INM-CM5-0` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|23 |`IPSL-CM6A-LR` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|24 |`KACE-1-0-G` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|25 |`KIOST-ESM` |`historical`, `ssp126`, `ssp245`, `ssp585` |
+|26 |`MIROC6` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|27 |`MIROC-ES2L` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|28 |`MPI-ESM1-2-HR` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|29 |`MPI-ESM1-2-LR` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|30 |`MRI-ESM2-0` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|31 |`NESM3` |`historical`, `ssp126`, `ssp245`, `ssp585` |
+|32 |`NorESM2-LM` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|33 |`NorESM2-MM` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|34 |`TaiESM1` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+|35 |`UKESM1-0-LL` |`historical`, `ssp126`, `ssp245`, `ssp370`, `ssp585`|
+
+
+## Coordinate Variables, Spatial and Temporal extents, and Time-stamps
+
+### Coordinate Variables
+The coordinate variables of the `nex-gddp-cmip6` climate dataset files are `rlon` and `rlat` representing the longitude and latitude points, respectively.
+
+### Temporal Extents and Time-stamps
+The time-stamps are already included in the original files. The dataset offers
+**daily** time-series of climate variables. The following table
+describes the temporal extent for senarios included in this dataset:
+|# |Scenarios (keyword for `--scenario`) |Temporal extent |
+|---|-------------------------------------|----------------------------|
+|1 |`historical` |`1950-01-01` to `2014-12-31`|
+|2 |`ssp126` |`2015-01-01` to `2100-12-31`|
+|3 |`ssp245` |`2015-01-01` to `2100-12-31`|
+|4 |`ssp370` |`2015-01-01` to `2100-12-31`|
+|5 |`ssp585` |`2015-01-01` to `2100-12-31`|
+
+> [!Note]
+> Values of the `Temporal extent` column are the limits for `--start-date`
+> and `--end-date` options with the main `datatool` script.
+
+
+## Dataset Variables
+The NetCDF files of the dataset contain various variables. You may see a list of variables by browsing the dataset's directory:
+```console
+foo@bar:~$ ls /project/rrg-mclark/data/meteorological-data/nasa-nex-gddp-cmip6/NEX-GDDP-CMIP6/ACCESS-CM2/ssp126/r1i1p1f1/
+hurs huss pr rlds rsds sfcWind tas tasmax tasmin
+```
+
+## Spatial Extent
+The `nex-gddp-cmip6` dataset spatial extent is global.
+
+## Short Description on `nex-gddp-cmip6` Climate Dataset Variables
+This dataset offers 9 climate variables: 1) precipitation, 2) mean
+air temperature, 3) daily maximum temperature, 4) daily minimum
+temperature, 5) specific humidity, 6) relative humidity, 7) shortwave
+radiation, 8) longwave radiation, and 9) wind speed.
+
+Since the frequency of this dataset is daily, including daily
+time-series of precipitation and air temperature, it could
+be potentially used for forcing conceptual hydrological models that only
+need daily time-series of these variables.
+
+Furthermore, with common existing disaggregation methods existing in the
+literature, one can generate sub-daily time-series of each variable and
+use them for forcing physically based models that may need more
+climate variables as their forcing data.
+
+The table below, summarizes the variables offered by this dataset:
+
+|Variable Name |Variable (keyword for `--variable`)|Unit |IPCC Abbreviation|Comments |
+|-----------------------|-----------------------------------|------|-----------------|----------------------|
+|maximum temperature@2m |`tasmax` |K |tasmax |near-surface 2m level |
+|minimum temperature@2m |`tasmin` |K |tasmin |near-surface 2m level |
+|preciptiation |`pr` |mm/day|pr |surface level |
+|relative humidity |`hurs` |% |hurs |near-surface level |
+|specific humidity |`huss` |1 |huss |near-surface 2m level |
+|longwave radiation |`rlds` |W m-2 |rlds |surface level |
+|shortwave radiation |`rsds` |W m-2 |rsds |surface level |
+|wind speed@10m |`sfcWind` |m s-1 | |near-surface 10m level|
+|mean air temperature@2m|`tas` |K |tas |near-surface 2m level |
+
+For the most up-to-date information please visit [NASA's NEX-GDDP-CMIP6
+project website](https://www.nccs.nasa.gov/services/data-collections/land-based-products/nex-gddp-cmip6).
diff --git a/scripts/nasa-nex-gddp-cmip6/nex-gddp-cmip6.sh b/scripts/nasa-nex-gddp-cmip6/nex-gddp-cmip6.sh
new file mode 100755
index 0000000..98977fc
--- /dev/null
+++ b/scripts/nasa-nex-gddp-cmip6/nex-gddp-cmip6.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# Meteorological Data Processing Workflow
+# Copyright (C) 2024, University of Calgary
+#
+# This file is part of Meteorological Data Processing Workflow
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# =========================
+# Credits and contributions
+# =========================
+# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
+
+
+# ================
+# General comments
+# ================
+# * All variables are camelCased for distinguishing from function names;
+# * function names are all in lower_case with words seperated by underscore for legibility;
+# * shell style is based on Google Open Source Projects'
+# Style Guide: https://google.github.io/styleguide/shellguide.html
+
+
+# ===============
+# Usage Functions
+# ===============
+short_usage() {
+ echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR] [-MmS STR[,...]]"
+}
+
+
+# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
+parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
+validArguments=$?
+if [ "$validArguments" != "0" ]; then
+ short_usage;
+ exit 1;
+fi
+
+# check if no options were passed
+if [ $# -eq 0 ]; then
+ echo "ERROR $(basename $0): arguments missing";
+ exit 1;
+fi
+
+# check long and short options passed
+eval set -- "$parsedArguments"
+while :
+do
+ case "$1" in
+ -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
+ -v | --variable) variables="$2" ; shift 2 ;; # required
+ -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
+ -s | --start-date) startDate="$2" ; shift 2 ;; # required
+ -e | --end-date) endDate="$2" ; shift 2 ;; # required
+ -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
+ -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
+ -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
+ -m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant
+ -S | --scenario) scenario="$2" ; shift 2 ;; # required
+ -M | --model) model="$2" ; shift 2 ;; # required
+
+ # -- means the end of the arguments; drop this, and break out of the while loop
+ --) shift; break ;;
+
+ # in case of invalid option
+ *)
+ echo "ERROR $(basename $0): invalid option '$1'";
+ short_usage; exit 1 ;;
+ esac
+done
+
+# check the prefix is not set
+if [[ -z $prefix ]]; then
+ prefix="data"
+fi
+
+# check if $model is provided
+if [[ -z $model ]]; then
+ echo "ERROR $(basename $0): --model value(s) missing"
+ exit 1;
+fi
+
+# useful log date format function
+logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
+
+# check if the dates are within datasets date range
+# define $startYear and $endYear
+startYear=$(date --date "$startDate" +"%Y")
+endYear=$(date --date "$endDate" +"%Y")
+
+
+# =====================
+# Necessary Assumptions
+# =====================
+# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
+alias date='TZ=UTC date'
+
+# expand aliases for the one stated above
+shopt -s expand_aliases
+
+
+# ==========================
+# Necessary Global Variables
+# ==========================
+latDim="lat"
+lonDim="lon"
+timeDim="time"
+resolution="0.25"
+
+
+# ===================
+# Necessary Functions
+# ===================
+# Modules below available on Digital Research Alliance of Canada's Graham HPC
+## core modules
+function load_core_modules () {
+ module -q load gcc/9.3.0
+ module -q load cdo/2.0.4
+ module -q load nco/5.0.6
+}
+function unload_core_modules () {
+ # WARNING: DO NOT USE IF YOU ARE NOT SURE HOW TO URE IT
+ module -q unload cdo/2.0.4
+ module -q unload nco/5.0.6
+}
+## ncl modules
+function load_ncl_module () {
+ module -q load gcc/9.3.0
+ module -q load ncl/6.6.2
+}
+function unload_ncl_module () {
+ module -q unload ncl/6.6.2
+}
+
+# loading core modules for the script
+load_core_modules
+
+
+# =================
+# Useful one-liners
+# =================
+#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
+unix_epoch () { date --date="$@" +"%s"; }
+
+#check whether the input is float or real
+check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
+
+#convert to float if the number is 'int'
+to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
+
+#join array element by the specified delimiter
+join_by () { local IFS="$1"; shift; echo "$*"; }
+
+#to_float the latLims and lonLims, real numbers delimited by ','
+lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
+
+#offset lims
+offset () { float="$1"; offset="$2"; printf "%.1f," $(echo "$float + $offset" | bc) | sed 's/,$//'; }
+
+
+# ================
+# Useful functions
+# ================
+#######################################
+# expand the upper and lower limits of
+# $latLims by the resolution value of
+# the dataset - only for the sake of an
+# abundance of caution
+#
+# Arguments:
+# 1. lims -> spatial extents in a
+# comma-delimited form
+# 2. resolution -> resolution of the
+# dataset for
+# expansion
+#
+# Globals:
+# 1. globalLims -> expanded Lims
+#######################################
+function expand_lims () {
+ # local variables
+ local lims="$1"
+ local res="$2"
+ local limArr
+
+ # expansion
+ IFS=',' read -ra limArr <<< $lims
+ limArr[0]=$(echo "${limArr[0]} - $res" | bc)
+ limArr[1]=$(echo "${limArr[1]} + $res" | bc)
+ echo "$(join_by , ${limArr[@]})"
+}
+
+
+# ===============
+# Data Processing
+# ===============
+# create $modelArr array from input comma-delimited values
+IFS=',' read -ra modelArr <<< $(echo $model)
+# create $scenarioArr array from input comma-delimited values
+IFS=',' read -ra scenarioArr <<< $(echo $scenario)
+# create $ensembleArr array from input comma-delimited values
+IFS=',' read -ra ensembleArr <<< $(echo $ensemble)
+# create $variableArr array from input comma-delimited values
+IFS=',' read -ra variableArr <<< $(echo $variables)
+
+# taking care of various possible scenarios for $startDate and $endDate
+## #1 if startYear is before 2015, and historical is NOT selected as a
+## scenario, issue a WARNING and add historical to $scenarioArr
+if [[ "$startYear" -lt 2015 ]] && \
+ [[ "${scenarioArr[*]}" == "historical" ]]; then
+ # issue a warning and add historical to the scenarios
+ echo "$(logDate)$(basename $0): WARNING! Dates preceeding 2015 belongs to \`hisotrical\` scenario"
+ echo "$(logDate)$(basename $0): WARNING! \`historical\` is added to \`--scenario\` list"
+ scenarioArr+=("historical")
+fi
+
+## #2 if endYear is beyond 2014, and SSP scenarios are NOT
+## selected, issue an ERROR and terminate with exitcode 1
+if [[ "$endYear" -gt 2014 ]] && \
+ [[ "${scenarioArr[*]}" == "ssp" ]]; then # `ssp` is treated as *ssp*
+ echo "$(logDate)$(basename $0): ERROR! Dates past 2015 belong to \`ssp\` scenarios"
+ echo "$(logDate)$(basename $0): ERROR! Choose the appropriate date range and try again"
+ exit 1;
+fi
+
+# display info
+echo "$(logDate)$(basename $0): processing NASA NEX-GDDP-CMIP6..."
+
+# since, the dataset's grid cell system is gaussian, assure to to_float()
+# the $latLims and $lonLims values
+latLims="$(lims_to_float "$latLims")"
+lonLims="$(lims_to_float "$lonLims")"
+
+# since longitudes are within the [0, 360] range, offset input $lonLims by
+# -180, if they are greater than 180.
+IFS=',' read -ra lims <<< $lonLims
+f_arr=()
+for lim in "${lims[@]}"; do
+ if [[ $(echo "$lim < 0" | bc -l ) ]]; then
+ f_arr+=($(offset "$lim" 180))
+ else
+ f_arr+=($lim)
+ fi
+done
+lonLims="$(join_by , ${f_arr[@]})"
+
+# expand the upper and lower limits of latLims by the resolution value
+latLims=$(expand_lims $latLims $resolution)
+lonLims=$(expand_lims $lonLims $resolution)
+
+
+# ============================================
+# Build date arrays for time-series extraction
+# ============================================
+# file date intervals in years - dataset's default
+interval=1
+
+fileDateFormat="%Y"
+actualDateFormat="%Y-%m-%d"
+
+# define needed variables
+let "difference = $endYear - $startYear"
+let "steps = $difference / $interval"
+
+# build $startDateFileArr, $endDateFileArr
+startDateFileArr=()
+endDateFileArr=()
+actualStartDateArr=()
+actualEndDateArr=()
+
+# range of jumps
+range=$(seq 0 $steps)
+
+# filling the arrays
+for iter in $range; do
+ # jumps every $interval years
+ let "jumps = $iter * $interval"
+
+ # current date after necessary jumps
+ let "toDate = $jumps + $startYear"
+
+ # extract start and end values
+ startValue="$(date --date "${toDate}0101" +"${fileDateFormat}")"
+ endValue="$(date --date "${toDate}0101 +${interval}years -1days" +"${fileDateFormat}")"
+
+ # double-check end-date
+ if [[ "$endValue" -gt 2100 ]]; then
+ endValue="2100" # irregular last date for dataset files
+ fi
+
+ # extract start and end values for actual dates
+ actualStartValue="$(date --date "${toDate}0101" +"${actualDateFormat}")"
+ actualEndValue="$(date --date "${toDate}0101 +${interval}years -1days" +"${actualDateFormat}")"
+
+ # fill up relevant arrays
+ startDateFileArr+=("${startValue}")
+ endDateFileArr+=("${endValue}")
+
+ actualStartDateArr+=("${actualStartValue}")
+ actualEndDateArr+=("${actualEndValue}")
+done
+
+# build actualStartArr array for temporal subsetting
+actualStartDateArr[0]="$(date --date "${startDate}" +"${actualDateFormat}")"
+
+# and similarly, the actualEndArr array
+lastIndex=$(( "${#actualEndDateArr[@]}" - 1 ))
+actualEndDateArr[${lastIndex}]="$(date --date "${endDate}" +"${actualDateFormat}")"
+
+
+# =====================
+# Extract dataset files
+# =====================
+# Typical directory structure of the dataset is:
+# ${datasetDir}/${model}/${scenario}/${ensemble}/${var}/
+# and each ${var} directory contains files in the following nomenclature:
+# ${var}_day_${model}_${scenario}_${ensemble}_gn_%Y.nc
+# with the %Y year value indicating the starting year of data inside the
+# file
+
+# create dataset directories in $cache and $outputDir
+echo "$(logDate)$(basename $0): creating output directory under $outputDir"
+mkdir -p "$outputDir"
+echo "$(logDate)$(basename $0): creating cache directory under $cache"
+mkdir -p "$cache"
+
+# iterate over models/submodels
+for model in "${modelArr[@]}"; do
+ # extract model and submodel names
+ modelName=$(echo $model | cut -d '/' -f 1)
+
+ # iterate over scenarios, e.g., ssp126, ssp245, ssp370, ssp585
+ for scenario in "${scenarioArr[@]}"; do
+
+ # iterate over ensemble members, e.g., r1p1, r1p2, r1p3
+ for ensemble in "${ensembleArr[@]}"; do
+
+ pathTemplate="${modelName}/${scenario}/${ensemble}/"
+ if [[ -e "${datasetDir}/${pathTemplate}" ]]; then
+ echo "$(logDate)$(basename $0): processing ${model}.${scenario}.${ensemble} files"
+ mkdir -p "${cache}/${pathTemplate}"
+ mkdir -p "${outputDir}/${pathTemplate}"
+ else
+ echo "$(logDate)$(basename $0): ERROR! ${model}.${scenario}.${ensemble} does not exist." 2>&1
+ break 1;
+ fi
+
+ # iterate over date range of interest using index
+ for idx in "${!startDateFileArr[@]}"; do
+
+ # dates for files
+ fileStartDate="${startDateFileArr[$idx]}"
+ fileEndDate="${endDateFileArr[$idx]}"
+ # dates for subsetting
+ actualStartDate="${actualStartDateArr[$idx]}"
+ actualEndDate="${actualEndDateArr[$idx]}"
+ # dates for ncks slabs
+ actualStartDateFormatted="$(date --date "${actualStartDate}" +'%Y-%m-%d')"
+ actualEndDateFormatted="$(date --date "${actualEndDate}" +'%Y-%m-%d')"
+
+ # iterate over dataset variables of interest
+ for var in "${variableArr[@]}"; do
+
+ # define file for further operation
+ src="${var}_day_${modelName}_${scenario}_${ensemble}_gn_${fileStartDate}.nc"
+ dst="day_${modelName}_${scenario}_${ensemble}_gn_${fileStartDate}.nc"
+
+ # subsetting variable, spatial extents, and temporal extents
+ until ncks -A -v ${var} \
+ -d "$latDim","${latLims}" \
+ -d "$lonDim","${lonLims}" \
+ -d "$timeDim","${actualStartDateFormatted}","${actualEndDateFormatted}" \
+ ${datasetDir}/${pathTemplate}/${var}/${src} \
+ ${cache}/${pathTemplate}/${dst}; do
+ echo "$(logDate)$(basename $0): Process killed: restarting process" 2>&1
+ sleep 10;
+ done # until ncks
+
+ # apply offset to $lonDim values of each NetCDF file to have
+ # longitude range within [-180, +180]
+ until ncap2 -O -s "${lonDim}=${lonDim}-180" \
+ ${cache}/${pathTemplate}/${dst} \
+ ${cache}/${pathTemplate}/${dst}; do
+ echo "$(logDate)$(basename $0): Process killed: restarting process" 2>&1
+ sleep 10;
+ done # until ncap2
+
+ # copy the results
+ cp -r ${cache}/${pathTemplate}/${dst} \
+ ${outputDir}/${pathTemplate}/${prefix}${dst};
+
+ done # for $variableArr
+ done # for $startDateArr
+ done # for $ensembleArr
+ done # for $scenarioArr
+done # for $modelArr
+
+# wait for everything to finish - just in case
+sleep 10
+
+mkdir "$HOME/empty_dir"
+echo "$(logDate)$(basename $0): deleting temporary files from $cache"
+rsync -aP --delete "$HOME/empty_dir/" "$cache"
+rm -r "$cache"
+echo "$(logDate)$(basename $0): temporary files from $cache are removed"
+echo "$(logDate)$(basename $0): results are produced under $outputDir"
+
diff --git a/scripts/rdrs/LICENSE b/scripts/ornl-daymet/LICENSE
similarity index 100%
rename from scripts/rdrs/LICENSE
rename to scripts/ornl-daymet/LICENSE
diff --git a/scripts/daymet/README.md b/scripts/ornl-daymet/README.md
similarity index 91%
rename from scripts/daymet/README.md
rename to scripts/ornl-daymet/README.md
index 0f8edee..1f4daa7 100644
--- a/scripts/daymet/README.md
+++ b/scripts/ornl-daymet/README.md
@@ -1,7 +1,8 @@
# `Daymet` dataset
In this file, the details of the dataset is explained.
-:warning: the dataset files are divided between three different spatial domains: 1) North America (na), 2) Peurto Rico (pr), and 3) Hawaii (hi). For the moment, only the `na` domain is considered in `datatool`.
+> [!WARNING]
+> the dataset files are divided between three different spatial domains: 1) North America (na), 2) Peurto Rico (pr), and 3) Hawaii (hi). For the moment, only the `na` domain is considered in `datatool`.
## Location of Dataset Files
The global `Daymet` dataset is located under the following directory accessible from Digital Research Alliance of Canada (DRA) Graham cluster:
@@ -62,7 +63,8 @@ The spatial resolutaion of `Daymet` gridded data is 1 $km$. The model files are
|2 |`pr` | `+16.85`° to `+19.93`° | `-67.97`° to `-64.13`° |
|3 |`hi` | `+17.96`° to `+23.51`° | `-160.30`° to `-154.78`° |
-:warning: As mentioned previously, only the `na` domain is considered in the subsetting process of `datatool`.
+> [!WARNING]
+> As mentioned previously, only the `na` domain is considered in the subsetting process of `datatool`.
## Temporal Extent
The time-steps are daily and the temporal extent for each domain is listed in the following table:
@@ -74,7 +76,8 @@ The time-steps are daily and the temporal extent for each domain is listed in th
Also, "[t]he Daymet calendar is based on a standard calendar year. All Daymet years have 1 - 365 days, including leap years. For leap years, the Daymet database includes leap day. Values for December 31 are discarded from leap years to maintain a 365-day year."
-:warning: As mentioned previously, only the `na` domain is considered in the subsetting process of `datatool`.
+> [!WARNING]
+> As mentioned previously, only the `na` domain is considered in the subsetting process of `datatool`.
## Short Description on `Daymet` Variables
The variables currently available through the `Daymet` dataset and their details are described in the table below, taken from the [source](https://daymet.ornl.gov/overview):
diff --git a/scripts/daymet/daymet.sh b/scripts/ornl-daymet/daymet.sh
similarity index 90%
rename from scripts/daymet/daymet.sh
rename to scripts/ornl-daymet/daymet.sh
index 12d569c..962869e 100755
--- a/scripts/daymet/daymet.sh
+++ b/scripts/ornl-daymet/daymet.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# Meteorological Data Processing Workflow
-# Copyright (C) 2022, University of Saskatchewan
-# Copyright (C) 2023, University of Calgary
+# Copyright (C) 2022-2023, University of Saskatchewan
+# Copyright (C) 2023-2024, University of Calgary
#
# This file is part of Meteorological Data Processing Workflow
#
@@ -42,7 +42,7 @@ short_usage() {
# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
-parsedArguments=$(getopt -a -n daymet -o i:v:o:s:e:t:l:n:p:c:m: --long dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble: -- "$@")
+parsedArguments=$(getopt -a -n daymet -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
validArguments=$?
if [ "$validArguments" != "0" ]; then
short_usage;
@@ -71,6 +71,8 @@ do
-p | --prefix) prefix="$2" ; shift 2 ;; # optional
-c | --cache) cache="$2" ; shift 2 ;; # required
-m | --ensemble) ensemble="$2" ; shift 2 ;; # redundant - added for compatibility
+ -S | --scenario) scenario="$2" ; shift 2 ;; # redundant - added for compatibility
+ -M | --model) model="$2" ; shift 2 ;; # redundant - added for compatibility
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
@@ -90,12 +92,12 @@ fi
# check the prefix of not set
if [[ -z $prefix ]]; then
- prefix="data"
+ prefix="data_"
fi
# =====================
-# Necessary Assumptions
+# Necessary assumptions
# =====================
# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
alias date='TZ=UTC date'
@@ -107,7 +109,7 @@ shopt -s expand_aliases
mkdir -p $cache
# ==========================
-# Necessary Global Variables
+# Necessary global variables
# ==========================
# the structure of file names is as follows: "YYYYMMDD12.nc"
daymetDateFormat="%Y" # Daymet dataset date format
@@ -124,29 +126,33 @@ lonVar="lon" # longitude variable
latDim="y" # latitude dimension
lonDim="x" # longitude dimension
-# spatial extraction script address
-coordIdxScript="$(dirname $0)/../../assets/coord_daymet_idx.ncl"
-coordClosestIdxScript="$(dirname $0)/../../assets/coord_closest_daymet_idx.ncl"
+# paths
+datatoolPath="$(dirname $0)/../../" # datatool's path
+# daymet index scripts works on RDRSv2.1 grids as well
+# and ESPO-G6-R2 has similar grid system as RDRSv2.1
+coordIdxScript="$datatoolPath/assets/ncl_scripts/coord_daymet_idx.ncl"
+coordClosestIdxScript="$datatoolPath/assets/ncl_scripts/coord_closest_daymet_idx.ncl"
+
# ===================
-# Necessary Functions
+# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
## core modules
-load_core_modules () {
+function load_core_modules () {
module -q load cdo/2.0.4
module -q load nco/5.0.6
}
-unload_core_modules () {
+function unload_core_modules () {
# WARNING: DO NOT USE IF YOU ARE NOT SURE HOW TO URE IT
module -q unload cdo/2.0.4
module -q unload nco/5.0.6
}
## ncl modules
-load_ncl_module () {
+function load_ncl_module () {
module -q load ncl/6.6.2
}
-unload_ncl_module () {
+function unload_ncl_module () {
module -q unload ncl/6.6.2
}
@@ -154,9 +160,9 @@ unload_ncl_module () {
load_core_modules
-#######################################
-# useful one-liners
-#######################################
+# =================
+# Useful one-liners
+# =================
# log date format
log_date () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
@@ -197,7 +203,7 @@ delim_max () { IFS=', ' read -r -a l <<< "$@"; printf "%s\n" "${l[@]}" | sort -n
# 2: -> SecondNum: second int/float
# 3: -> operator: comparison operator
#######################################
-bc_compare () {
+function bc_compare () {
# local variables
local firstNum=$1
local secondNum=$2
@@ -218,7 +224,7 @@ bc_compare () {
# listing files
# 3: -> nth: nth file to return
#######################################
-nth_file () {
+function nth_file () {
# local variables
local parentDir=$1
local wildcard=$2
diff --git a/scripts/ouranos-espo-g6-r2/README.md b/scripts/ouranos-espo-g6-r2/README.md
new file mode 100644
index 0000000..013fa9a
--- /dev/null
+++ b/scripts/ouranos-espo-g6-r2/README.md
@@ -0,0 +1,164 @@
+# Ouranos `ESPO-G6-R2 v1.0.0` dataset
+In this file, the details of the dataset is explained.
+
+## Location of Dataset Files
+The `ESPO-G6-R2 v1.0.0` dataset is located under the following directory accessible from Compute Canada (CC) Graham Cluster:
+```console
+/project/rpp-kshook/Climate_Forcing_Data/meteorological-data/ouranos-espo-g6-r2 # rpp-kshook allocation
+/project/rrg-mclark/data/meteorological-data/ouranos-espo-g6-r2 # rrg-mclark allocation
+```
+
+and the structure of the dataset hourly files is as following:
+```console
+/project/rrg-mclark/data/meteorological-data/ouranos-espo-g6-r2
+├── AS-RCEC
+│ └── TaiESM1
+│ ├── ssp245
+| | └── r1i1p1f1
+│ | └── day
+| | ├── pr
+│ | | ├── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_19500101-19531231.nc
+| | | ├── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_19540101-19571231.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_20940101-20971231.nc
+| | | └── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_20980101-210031231.nc
+│ | ├── tasmax
+| | | ├── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_19500101-19531231.nc
+| | | ├── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_19540101-19571231.nc
+| | | ├── .
+| | | ├── .
+| | | ├── .
+| | | ├── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_20940101-20971231.nc
+| | | └── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_20980101-21001231.nc
+| | └── tasmin
+| | ├── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_19500101-19531231.nc
+| | ├── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_19540101-19571231.nc
+| | ├── .
+| | ├── .
+| | ├── .
+| | ├── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_20940101-20971231.nc
+| | └── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_AS-RCEC_TaiESM1_ssp245_r1i1p1f1_20980101-21001231.nc
+│ └── ssp370
+| └── r1i1p1f1
+│ └── day
+| ├── pr
+│ | ├── .
+│ | ├── .
+│ | └── .
+| ├── tasmax
+│ | ├── .
+│ | ├── .
+│ | └── .
+| tasmin
+│ ├── .
+│ ├── .
+│ └── .
+│
+.
+.
+.
+├── %{model}
+| ├── %{submodel} # no need for explicit declaration in the scripts
+| | ├── %{scenario}
+| | | └── %{ensemble}
+| | | └── day
+| | | ├── %{var}
+| | | | ├── %{var}_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_%{model}_%{submodel}_%{scenario}_%{ensemble}_%{year}0101_%{year+3}1231.nc
+| | | | ├── .
+| | | | ├── .
+| | | | ├── .
+| | | | └── %{var}_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_%{model}_%{submodel}_%{scenario}_%{ensemble}_%{year}0101_%{year+2}1231.nc
+. . . .
+. . . .
+. . . .
+└── NUIST
+ └── NESM3
+ ├── ssp245
+ | └── r1i1p1f1
+ | └── day
+ | ├── pr
+ | | ├── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_19500101-19531231.nc
+ | | ├── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_19540101-19571231.nc
+ | | ├── .
+ | | ├── .
+ | | ├── .
+ | | ├── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_20940101-20971231.nc
+ | | └── pr_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_20980101-210031231.nc
+ | ├── tasmax
+ | | ├── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_19500101-19531231.nc
+ | | ├── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_19540101-19571231.nc
+ | | ├── .
+ | | ├── .
+ | | ├── .
+ | | ├── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_20940101-20971231.nc
+ | | └── tasmax_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_20980101-21001231.nc
+ | └── tasmin
+ | ├── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_19500101-19531231.nc
+ | ├── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_19540101-19571231.nc
+ | ├── .
+ | ├── .
+ | ├── .
+ | ├── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_20940101-20971231.nc
+ | └── tasmin_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_NUIST_NESM3_ssp245_r1i1p1f1_20980101-21001231.nc
+ └── ssp370
+ └── r1i1p1f1
+ └── day
+ ├── pr
+ | ├── .
+ | ├── .
+ | └── .
+ ├── tasmax
+ | ├── .
+ | ├── .
+ | └── .
+ tasmin
+ ├── .
+ ├── .
+ └── .
+```
+
+## Coordinate Variables and Time-stamps
+
+### Coordinate Variables
+The coordinate variables of the `ESPO-G6-R2 v1.0.0` simulations are `rlon` and `rlat` representing the longitude and latitude points, respectively.
+### Time-stamps
+The time-stamps are included in the original files.
+
+## Dataset Variables
+The NetCDF files of the dataset contain one variable per file. You may see a list of variables by browsing the dataset files:
+```console
+foo@bar:~$ ls /project/rrg-mclark/data/meteorological-data/ouranos-espo-g6-r2/ESPO-G6-R2v1.0.0/AS-RCEC/TaiESM1/ssp245/r1i1p1f1/day
+```
+
+## Spatial Extent
+The spatial extent of the `ESPO-G6-R2 v1.0.0` is on latitutes from `+5.75` to `+83.98` and longitudes from `-179.9925` to `179.9728` covering North America. The resolution is 0.09 degrees (~10km).
+
+## Temporal Extent
+The time-stamps are already included in the original files. The dataset offers
+**daily** time-series of climate variables. The following table
+describes the temporal extent for senarios included in this dataset:
+|# |Scenarios (keyword for `--scenario`) |Temporal extent |
+|---|-------------------------------------|----------------------------|
+|1 |`ssp245` |`2015-01-01` to `2100-12-31`|
+|2 |`ssp370` |`2015-01-01` to `2100-12-31`|
+|3 |`ssp585` |`2015-01-01` to `2100-12-31`|
+
+
+## Short Description on `ESPO-G6-R2 v1.0.0` Variables
+This dataset only offers three climate variables: 1) daily precipitation
+time-series (surface level), 2) daily minimum temperature time-series
+(@2m, near-surface level), and 3) daily maximum temperature time-series
+(@2m, near-surface level). Since the frequency of this dataset is daily,
+and only offers precipitation and temperature values, therefore, it could
+be potentially used for forcing conceptual hydrological models that only
+need daily time-series of these variables.
+
+The table below, summarizes the variables offered by this dataset:
+|Variable Name |Variable (keyword for `--variable`)|Unit |IPCC Abbreviation|Comments |
+|----------------------|-----------------------------------|----------|-----------------|----------------------|
+|maximum temperature |`tasmax` |K |tasmax |near-surface 2m height|
+|minimum temperature |`tasmin` |K |tasmin |near-surface 2m height|
+|preciptiation |`pr` |kg m-2 s-1|pr |surface level |
+
diff --git a/scripts/ouranos-espo-g6-r2/espo-g6-r2.sh b/scripts/ouranos-espo-g6-r2/espo-g6-r2.sh
new file mode 100755
index 0000000..bfdcd7f
--- /dev/null
+++ b/scripts/ouranos-espo-g6-r2/espo-g6-r2.sh
@@ -0,0 +1,424 @@
+#!/bin/bash
+# Meteorological Data Processing Workflow
+# Copyright (C) 2024, University of Calgary
+#
+# This file is part of Meteorological Data Processing Workflow
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# =========================
+# Credits and contributions
+# =========================
+# 1. Parts of the code are taken from https://www.shellscript.sh/tips/getopt/index.html
+
+
+# ================
+# General comments
+# ================
+# * All variables are camelCased for distinguishing from function names;
+# * function names are all in lower_case with words seperated by underscore for legibility;
+# * shell style is based on Google Open Source Projects'
+# Style Guide: https://google.github.io/styleguide/shellguide.html
+
+
+# ===============
+# Usage functions
+# ===============
+short_usage() {
+ echo "usage: $(basename $0) [-cio DIR] [-v VARS] [-se DATE] [-t CHAR] [-ln REAL,REAL] [-p STR] [-MmS STR[,...]]"
+}
+
+
+# argument parsing using getopt - WORKS ONLY ON LINUX BY DEFAULT
+parsedArguments=$(getopt -a -n extract-dataset -o i:v:o:s:e:t:l:n:p:c:m:S:M: --long dataset-dir:,variable:,output-dir:,start-date:,end-date:,time-scale:,lat-lims:,lon-lims:,prefix:,cache:,ensemble:,scenario:,model: -- "$@")
+validArguments=$?
+if [ "$validArguments" != "0" ]; then
+ short_usage;
+ exit 1;
+fi
+
+# check if no options were passed
+if [ $# -eq 0 ]; then
+ echo "ERROR $(basename $0): arguments missing";
+ exit 1;
+fi
+
+# check long and short options passed
+eval set -- "$parsedArguments"
+while :
+do
+ case "$1" in
+ -i | --dataset-dir) datasetDir="$2" ; shift 2 ;; # required
+ -v | --variable) variables="$2" ; shift 2 ;; # required
+ -o | --output-dir) outputDir="$2" ; shift 2 ;; # required
+ -s | --start-date) startDate="$2" ; shift 2 ;; # required
+ -e | --end-date) endDate="$2" ; shift 2 ;; # required
+ -t | --time-scale) timeScale="$2" ; shift 2 ;; # redundant - added for compatibility
+ -l | --lat-lims) latLims="$2" ; shift 2 ;; # required
+ -n | --lon-lims) lonLims="$2" ; shift 2 ;; # required
+ -p | --prefix) prefix="$2" ; shift 2 ;; # optional
+ -c | --cache) cache="$2" ; shift 2 ;; # required
+ -m | --ensemble) ensemble="$2" ; shift 2 ;; # required
+ -S | --scenario) scenario="$2" ; shift 2 ;; # required
+ -M | --model) model="$2" ; shift 2 ;; # required
+
+ # -- means the end of the arguments; drop this, and break out of the while loop
+ --) shift; break ;;
+
+ # in case of invalid option
+ *)
+ echo "ERROR $(basename $0): invalid option '$1'";
+ short_usage; exit 1 ;;
+ esac
+done
+
+# check the prefix is not set
+if [[ -z $prefix ]]; then
+ prefix="data_"
+fi
+
+# useful log date format function
+logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }
+
+
+# ================
+# Necessary checks
+# ================
+
+# check if the dates are within datasets date range
+# define $startYear and $endYear
+startYear=$(date --date "$startDate" +"%Y")
+endYear=$(date --date "$endDate" +"%Y")
+
+# if $startYear is before 1950 raise a "WARNING" and set startDate
+if [[ $startYear -lt 1950 ]]; then
+ echo "$(logDate)$(basename $0): WARNING! The date range of the dataset is between 1950-01-01 and 2100-12-31"
+ echo "$(logDate)$(basename $0): WARNING! \`start-date\` is set to 1950-01-01 00:00:00"
+ startDate="1950-01-01"
+ startYear="1950"
+fi
+
+# if $endYear is beyond 2100 raise a "WARNING" and set endDate
+if [[ $endYear -gt 2100 ]]; then
+ echo "$(logDate)$(basename $0): WARNING! The date range of the dataset is between 1950-01-01 and 2100-12-31"
+ echo "$(logDate)$(basename $0): WARNING! \`end-date\` is set to 2100-12-31 00:00:00"
+ endDate="2100-12-31"
+ endYear="2100"
+fi
+
+# check if $model, $ensemble, and $scenario is given
+if [[ -z $model ]] || \
+ [[ -z $ensemble ]] || \
+ [[ -z $scenario ]]; then
+ echo "$(logDate)$(basename $0): ERROR! \`--model\`, \`--ensemble\`, and \`--scenario\` values are required"
+ exit 1;
+fi
+
+
+# =====================
+# Necessary assumptions
+# =====================
+# TZ to be set to UTC to avoid invalid dates due to Daylight Saving
+alias date='TZ=UTC date'
+
+# expand aliases for the one stated above
+shopt -s expand_aliases
+
+# paths
+datatoolPath="$(dirname $0)/../../" # datatool's path
+# daymet index scripts works on RDRSv2.1 grids as well
+# and ESPO-G6-R2 has similar grid system as RDRSv2.1
+coordIdxScript="$datatoolPath/assets/ncl_scripts/coord_daymet_idx.ncl"
+coordClosestIdxScript="$datatoolPath/assets/ncl_scripts/coord_closest_daymet_idx.ncl"
+
+
+# ==========================
+# Necessary global variables
+# ==========================
+latDim="rlat"
+lonDim="rlon"
+timeDim="time"
+
+
+# ===================
+# Necessary functions
+# ===================
+# Modules below available on Digital Research Alliance of Canada's Graham HPC
+## core modules
+function load_core_modules () {
+ module -q load gcc/9.3.0
+ module -q load cdo/2.0.4
+ module -q load nco/5.0.6
+}
+function unload_core_modules () {
+ # WARNING: DO NOT USE IF YOU ARE NOT SURE HOW TO URE IT
+ module -q unload cdo/2.0.4
+ module -q unload nco/5.0.6
+}
+## ncl modules
+function load_ncl_module () {
+ module -q load gcc/9.3.0
+ module -q load ncl/6.6.2
+}
+function unload_ncl_module () {
+ module -q unload ncl/6.6.2
+}
+
+# loading core modules for the script
+load_core_modules
+
+
+# =================
+# Useful one-liners
+# =================
+#calcualte Unix EPOCH time in seconds from 1970-01-01 00:00:00
+unix_epoch () { date --date="$@" +"%s"; }
+
+#check whether the input is float or real
+check_real () { if [[ "$1" == *'.'* ]]; then echo 'float'; else echo 'int'; fi; }
+
+#convert to float if the number is 'int'
+to_float () { if [[ $(check_real $1) == 'int' ]]; then printf "%.1f" "$1"; echo; else printf "%.5f" "$1"; echo; fi; }
+
+#join array element by the specified delimiter
+join_by () { local IFS="$1"; shift; echo "$*"; }
+
+#to_float the latLims and lonLims, real numbers delimited by ','
+lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f_arr+=($(to_float $i)); done; echo $(join_by , "${f_arr[@]}"); }
+
+
+# ===============
+# Data processing
+# ===============
+# display info
+echo "$(logDate)$(basename $0): processing Ouranos ESPO-G6-R2..."
+
+# create $modelArr array from input comma-delimited values
+IFS=',' read -ra modelArr <<< $(echo $model)
+# create $scenarioArr array from input comma-delimited values
+IFS=',' read -ra scenarioArr <<< $(echo $scenario)
+# create $ensembleArr array from input comma-delimited values
+IFS=',' read -ra ensembleArr <<< $(echo $ensemble)
+# create $variableArr array from input comma-delimited values
+IFS=',' read -ra variableArr <<< $(echo $variables)
+
+
+# ======================
+# Extract domain extents
+# ======================
+
+# parse the upper and lower bounds of a given spatial limit
+minLat=$(echo $latLims | cut -d ',' -f 1)
+maxLat=$(echo $latLims | cut -d ',' -f 2)
+minLon=$(echo $lonLims | cut -d ',' -f 1)
+maxLon=$(echo $lonLims | cut -d ',' -f 2)
+
+# unload and load necessary modules
+unload_core_modules
+load_ncl_module
+# choose a sample file as all files share the same grid
+domainFile=$(find ${datasetDir} -type f -name "*.nc" | head -n 1)
+# parse the upper and lower bounds of a given spatial limit
+minLat=$(echo $latLims | cut -d ',' -f 1)
+maxLat=$(echo $latLims | cut -d ',' -f 2)
+minLon=$(echo $lonLims | cut -d ',' -f 1)
+maxLon=$(echo $lonLims | cut -d ',' -f 2)
+
+# extract the associated indices corresponding to $latLims and $lonLims
+coordIdx="$(ncl -nQ 'coord_file='\"$domainFile\" 'minlat='"$minLat" 'maxlat='"$maxLat" 'minlon='"$minLon" 'maxlon='"$maxLon" "$coordIdxScript")"
+
+# if spatial index out-of-bound, i.e., 'ERROR' is return
+if [[ "${coordIdx}" == "ERROR" ]]; then
+ # extract the closest index values
+ coordIdx="$(ncl -nQ 'coord_file='\"$domainFile\" 'minlat='"$minLat" 'maxlat='"$maxLat" 'minlon='"$minLon" 'maxlon='"$maxLon" "$coordClosestIdxScript")"
+fi
+
+# parse the output index for latitude and longitude
+lonLimsIdx+="$(echo $coordIdx | cut -d ' ' -f 1)"
+latLimsIdx+="$(echo $coordIdx | cut -d ' ' -f 2)"
+
+# reload necessary modules
+unload_ncl_module
+load_core_modules
+
+# ============================================
+# Build date arrays for time-series extraction
+# ============================================
+# file date intervals in years - dataset's default
+interval=4
+
+startFormat="%Y0101"
+endFormat="%Y1231" # will be redefined later depending on the $modelName
+
+actualFormat='%Y%m%d'
+
+# define needed variables
+let "difference = $endYear - $startYear"
+let "steps = $difference / $interval"
+
+# build $startDateFileArr, $endDateFileArr
+startDateFileArr=()
+endDateFileArr=()
+
+# range of jumps
+range=$(seq 0 $steps)
+
+# filling the arrays
+for iter in $range; do
+ # jumps every $interval years
+ let "jumps = $iter * $interval"
+
+ # current date after necessary jumps
+ let "toDate = $jumps + $startYear"
+
+ # extract start and end values
+ startValue="$(date --date "${toDate}0101" +"${startFormat}")"
+ endValue="$(date --date "${toDate}0101 +${interval}years -1days" +"${endFormat}")"
+
+ # check if endValue is beyond 2100
+ endValueYear="$(date --date "${endValue}" +"%Y")"
+ # double-check end-date
+ if [[ "$endValueYear" -gt 2100 ]]; then
+ endValue="21001231" # irregular last date for dataset files
+ fi
+
+ # fill up relevant arrays
+ startDateFileArr+=("${startValue}")
+ endDateFileArr+=("${endValue}")
+
+done
+
+# build actualStartArr array for temporal subsetting
+actualStartDateArr=("${startDateFileArr[@]}")
+actualStartDateArr[0]="$(date --date "${startDate}" +"${actualFormat}")"
+
+# and similarly, the actualEndArr array
+actualEndDateArr=("${endDateFileArr[@]}")
+lastIndex=$(( "${#actualEndDateArr[@]}" - 1 ))
+actualEndDateArr[${lastIndex}]="$(date --date "${endDate}" +"${actualFormat}")"
+
+
+# =====================
+# Extract dataset files
+# =====================
+# Typical directory structure of the dataset is:
+# ${datasetDir}/${model}/%submodel/${scenario}/${ensemble}/day/${var}/
+# and each ${var} directory contains files in the following nomenclature:
+# ${var}_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_${model}_%submodel_${scenario}_${ensemble}_%yyyymmdd-%yyyymmdd.nc
+# with the former date value indicating the starting year of data inside the
+# file, and the latter demonstrating the ending date of data
+#
+# NOTE: %submodel must be determined in the upstream caller
+#
+
+# create dataset directories in $cache and $outputDir
+echo "$(logDate)$(basename $0): creating output directory under $outputDir"
+mkdir -p "$outputDir"
+echo "$(logDate)$(basename $0): creating cache directory under $cache"
+mkdir -p "$cache"
+
+# iterate over models/submodels
+for model in "${modelArr[@]}"; do
+ # extract model and submodel names
+ modelName=$(echo $model | cut -d '/' -f 1)
+ submodelName=$(echo $model | cut -d '/' -f 2)
+
+ # iterate over scenarios, e.g., ssp245, ssp370, ssp585
+ for scenario in "${scenarioArr[@]}"; do
+
+ # iterate over ensemble members, e.g., r1p1, r1p2, etc.
+ for ensemble in "${ensembleArr[@]}"; do
+
+ pathTemplate="${modelName}/${submodelName}/${scenario}/${ensemble}/day/"
+ if [[ -e "${datasetDir}/${pathTemplate}" ]]; then
+ echo "$(logDate)$(basename $0): processing ${model}.${scenario}.${ensemble} files"
+ mkdir -p "${cache}/${pathTemplate}"
+ mkdir -p "${outputDir}/${pathTemplate}"
+ else
+ echo "$(logDate)$(basename $0): ERROR! ${model}.${scenario}.${ensemble} does not exist."
+ break 1;
+ fi
+
+ # iterate over date range of interest using index
+ for idx in "${!startDateFileArr[@]}"; do
+
+ # dates for files
+ fileStartDate="${startDateFileArr[$idx]}"
+ fileEndDate="${endDateFileArr[$idx]}"
+ # dates for subsetting
+ actualStartDate="${actualStartDateArr[$idx]}"
+ actualEndDate="${actualEndDateArr[$idx]}"
+ # dates for ncks slabs
+ actualStartDateFormatted="$(date --date $actualStartDate +'%Y-%m-%d')"
+ actualEndDateFormatted="$(date --date $actualEndDate +'%Y-%m-%d')"
+
+ # destination NetCDF file
+ dst="day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_${modelName}_${submodelName}_${scenario}_${ensemble}_${actualStartDate}-${actualEndDate}.nc"
+
+ # address inconsistencies with NetCDF file end-date values in the
+ # dataset
+ if [[ "$modelName" == "NIMS-KMA" ]] || \
+ [[ "$modelName" == "MOHC" ]]; then
+ fileEndDate=$(date --date "${fileEndDate}" +"%Y1230")
+ fi
+
+ # iterate over dataset variables of interest
+ for var in "${variableArr[@]}"; do
+
+ # define file for further operation
+ # address inconsistencies with NetCDF file name in the dataset
+ if [[ "$modelName" == "DKRZ" ]]; then
+ src="${var}_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_MPI-M_${submodelName}_${scenario}_${ensemble}_${fileStartDate}-${fileEndDate}.nc"
+ else
+ src="${var}_day_ESPO-G6-R2_v1.0.0_CMIP6_ScenarioMIP_NAM_${modelName}_${submodelName}_${scenario}_${ensemble}_${fileStartDate}-${fileEndDate}.nc"
+ fi
+
+ # spatial subsetting
+ until ncks -A -v ${var} \
+ -d "$latDim","${latLimsIdx}" \
+ -d "$lonDim","${lonLimsIdx}" \
+ -d "$timeDim","${actualStartDateFormatted}","${actualEndDateFormatted}" \
+ ${datasetDir}/${pathTemplate}/${var}/${src} \
+ ${cache}/${pathTemplate}/${dst}; do
+ echo "$(logDate)$(basename $0): Process killed: restarting process in 10 sec" >&2
+ echo "NCKS failed" >&2
+ sleep 10;
+ done # until ncks
+ done # for $variableArr
+
+ # change lon values so the extents are from ~-180 to 0
+ # this is solely for easymore compatibility
+ until ncap2 -O -s "where(lon>0) lon=lon-360" \
+ "${cache}/${pathTemplate}/${dst}" \
+ "${outputDir}/${pathTemplate}/${prefix}${dst}"; do
+ echo "$(logDate)$(basename $0): Process killed: restarting process in 10 sec" >&2
+ echo "NCAP2 failed" >&2
+ sleep 10;
+ done # until ncap2
+
+ done # for $startDateArr
+ done # for $ensembleArr
+ done # for $scenarioArr
+done # for $modelArr
+
+# wait for everything to finish - just in case
+sleep 10
+
+mkdir "$HOME/empty_dir"
+echo "$(logDate)$(basename $0): deleting temporary files from $cache"
+rsync -aP --delete "$HOME/empty_dir/" "$cache"
+rm -r "$cache"
+echo "$(logDate)$(basename $0): temporary files from $cache are removed"
+echo "$(logDate)$(basename $0): results are produced under $outputDir"
+