diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 75c9a072..3a959a91 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -71,11 +71,6 @@ jobs:
create-args: >-
mamba
python=${{ matrix.python-version }}
- - name: Downgrade intake-esm (for xscen) and xclim
- if: matrix.python-version == '3.9'
- run: |
- micromamba install -y intake-esm=2023.11.10
- micromamba install -y xclim=0.47.0
- name: Conda and Mamba versions
run: |
mamba --version
diff --git a/CHANGES.rst b/CHANGES.rst
index 95904a74..6d5f811a 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,7 @@ Changelog
v0.4.0 (unreleased)
-------------------
-Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Thomas-Charles Fortier Filion (:user:`TC-FF`).
+Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Thomas-Charles Fortier Filion (:user:`TC-FF`), Gabriel Rondeau-Genesse (:user:`RondeauG`).
New features and enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -15,10 +15,13 @@ New features and enhancements
* Added new functions to `xhydro.frequency_analysis.local` to calculate plotting positions and to prepare plots. (:pull:`87`).
* `xscen` now supports Python3.12. (:pull:`99`).
* `xscen` now supports `pandas` >= 2.2.0, `xarray` >= 2023.11.0, and `xclim` >= 0.47.0. (:pull:`99`).
+* Added `xh.cc.sampled_indicators` to compute future indicators using a perturbation approach and random sampling. (:pull:`54`).
Breaking changes
^^^^^^^^^^^^^^^^
* Added `pooch` as an installation dependency. (:pull:`62`).
+* `xhydro` now requires `xarray`>=2023.11.0, `xclim`>=0.48.2, `xscen`>=0.8.3, and, indirectly, `pandas`>=2.2.0. The main breaking change is in how yearly frequencies are called ('YS-' instead of 'AS-'). (:pull:`54`).
+* Functions that output a dict with keys as xrfreq (namely, ``xh.indicators.compute_indicators``) will now return the new nomenclature (e.g. "YS-JAN" instead of "AS-JAN"). (:pull:`54`).
Internal changes
^^^^^^^^^^^^^^^^
diff --git a/docs/index.rst b/docs/index.rst
index 20624f18..5d5aa16d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,6 +10,7 @@ Welcome to xHydro's documentation!
installation
usage
notebooks/local_frequency_analysis
+ notebooks/climate_change
planification
apidoc/modules
contributing
diff --git a/docs/notebooks/climate_change.ipynb b/docs/notebooks/climate_change.ipynb
new file mode 100644
index 00000000..ad347289
--- /dev/null
+++ b/docs/notebooks/climate_change.ipynb
@@ -0,0 +1,640 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "0948f5b8-7d86-4db4-8b3c-b041280fd1fc",
+ "metadata": {},
+ "source": [
+ "# Climate change analysis of hydrological data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "97f36911-c803-4a43-a467-b3845e57be73",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Imports\n",
+ "from zipfile import ZipFile\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import pooch\n",
+ "\n",
+ "import numpy as np\n",
+ "import xarray as xr\n",
+ "import hvplot.xarray\n",
+ "import xclim\n",
+ "\n",
+ "import xhydro as xh\n",
+ "\n",
+ "# Hide INFO-level logs\n",
+ "import logging\n",
+ "logger = logging.getLogger()\n",
+ "logger.setLevel(logging.CRITICAL)\n",
+ "\n",
+ "# This notebook will use data from the 2022 edition of the Hydrological Atlas of Southern Quebec, which can be accessed from the xhydro-testdata repository. \n",
+ "# They cover 2 stations: ABIT00057 and ABIT00058\n",
+ "GITHUB_URL = \"https://github.com/hydrologie/xhydro-testdata\"\n",
+ "BRANCH_OR_COMMIT_HASH = \"main\"\n",
+ "\n",
+ "# Streamflow file (1 file - Hydrotel driven by BCC-CSM-1.1(m))\n",
+ "streamflow_file = pooch.retrieve(\n",
+ " url=f\"{GITHUB_URL}/raw/{BRANCH_OR_COMMIT_HASH}/data/cc_indicators/streamflow_BCC-CSM1.1-m_rcp45.nc\",\n",
+ " known_hash=\"md5:0ac83a4ee9dceecda68ac1ee542f50de\",\n",
+ ")\n",
+ "\n",
+ "# Reference QMOYAN (6 platforms)\n",
+ "ref_zip = pooch.retrieve(\n",
+ " url=f\"{GITHUB_URL}/raw/{BRANCH_OR_COMMIT_HASH}/data/cc_indicators/reference.zip\",\n",
+ " known_hash=\"md5:192544f3a081375a81d423e08038d32a\",\n",
+ ")\n",
+ "directory_to_extract_to = Path(ref_zip).parent\n",
+ "with ZipFile(ref_zip, 'r') as ziploc:\n",
+ " ziploc.extractall(directory_to_extract_to)\n",
+ " files = ziploc.namelist()\n",
+ "reference_files = [directory_to_extract_to / f for f in files]\n",
+ "\n",
+ "# QMOYAN deltas (63 simulations x 6 platforms)\n",
+ "deltas_zip = pooch.retrieve(\n",
+ " url=f\"{GITHUB_URL}/raw/{BRANCH_OR_COMMIT_HASH}/data/cc_indicators/deltas.zip\",\n",
+ " known_hash=\"md5:ce6371e073e5324f9ade385c1c03e7eb\",\n",
+ ")\n",
+ "directory_to_extract_to = Path(deltas_zip).parent\n",
+ "with ZipFile(deltas_zip, 'r') as ziploc:\n",
+ " ziploc.extractall(directory_to_extract_to)\n",
+ " files = ziploc.namelist()\n",
+ "deltas_files = [directory_to_extract_to / f for f in files]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "50753dc9-2008-4a4e-b14b-86f3ca1c72fb",
+ "metadata": {},
+ "source": [
+ "While there is a huge variety of analyses that could be done to assess the impacts of climate change on hydrology, this notebook will go through some of the most common steps:\n",
+ "\n",
+ "- Computing a list of relevant indicators over climatological periods\n",
+ "- Computing future deltas\n",
+ "- Computing ensemble statistics to assess future changes\n",
+ "\n",
+ "
INFO\n",
+ "\n",
+ "Multiple functions in `xh.indicators` and `xh.cc` have been leveraged from the `xscen` library and made accessible to `xhydro` users. For more information on these function, it is recommended to look at:\n",
+ "\n",
+ "- [compute_indicators](https://xscen.readthedocs.io/en/latest/notebooks/2_getting_started.html#Computing-indicators)\n",
+ "- [climatological_op](https://xscen.readthedocs.io/en/latest/notebooks/2_getting_started.html#Climatological-operations)\n",
+ "- [compute_deltas](https://xscen.readthedocs.io/en/latest/notebooks/2_getting_started.html#Computing-deltas)\n",
+ "- [ensemble_statistics](https://xscen.readthedocs.io/en/latest/notebooks/2_getting_started.html#Ensemble-statistics)\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4483793-b16c-4d87-8229-76ff62543881",
+ "metadata": {},
+ "source": [
+ "## Computing hydrological indicators over a given time period"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "50701659-5987-49c0-a2c9-8afd13cb2f9d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The file used as an example is a daily timeseries of streamflow data generated from the Hydrotel hydrological model \n",
+ "# driven by bias-adjusted data from the BCC-CSM-1.1(m) climatological model (RCP4.5), from 1950 to 2100.\n",
+ "# For this example, the dataset covers only 2 stations.\n",
+ "ds = xr.open_dataset(streamflow_file)\n",
+ "ds.streamflow.hvplot(x='time', grid=True, widget_location='bottom', groupby='station')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cde2bb56-582e-4016-af1d-49634dbb8df6",
+ "metadata": {},
+ "source": [
+ "Hydrological indicators can be separated in two broad categories: \n",
+ "\n",
+ "- Frequential indicators, such as the maximum 20-year flow (*Qmax20*) or the minimum 2-year 7-day averaged flow in summer (*Q7min2_summer*). Computing these is already covered in the [Local Frequency Analysis notebook](local_frequency_analysis.ipynb) notebook.\n",
+ "- Non frequencial indicators, such as the average yearly flow.\n",
+ "\n",
+ "Since frequential indicators have already been covered in another example, this notebook will instead look at the methodology that would be used to compute non frequential indicators using `xhydro.indicators.compute_indicators`. The inputs of that function are:\n",
+ "\n",
+ "- *ds*: the Dataset.\n",
+ "- *indicators*: a list of indicators to compute, or the path to a YAML file containing those.\n",
+ "- *periods* (optional): either [start, end] or list of [start, end] of continuous periods over which to compute the indicators.\n",
+ "\n",
+ " INFO\n",
+ "\n",
+ "Custom indicators are built by following the YAML formatting required by `xclim`. More information is available [in the xclim documentation](https://xclim.readthedocs.io/en/latest/api.html#yaml-file-structure).\n",
+ "\n",
+ "The list of Yaml IDs is available [here](https://xclim.readthedocs.io/en/stable/indicators.html).\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4e5f45c5-bc91-4149-a903-04c59592695b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We'll define 2 indicators to compute by using dictionaries.\n",
+ "#\n",
+ "# We minimally need to define three things under `data`:\n",
+ "# 1. 'base': A base indicator for the computation, identified through its Yaml ID (here, 'stats').\n",
+ "# 2. 'parameters': Specific parameters to use instead of the defaults.\n",
+ "# - This potentially includes a 'indexer' parameter to focus on particular periods of the year.\n",
+ "# 3. 'input': The name of the input variable. The key here must be the variable name used by xclim (here, 'da').\n",
+ "#\n",
+ "# The 'identifier' is the label that will be given by 'xclim' to the new indicator. The 'module' can be anything.\n",
+ "\n",
+ "indicators = [\n",
+ " # 1st indicator: Mean annual flow\n",
+ " xclim.core.indicator.Indicator.from_dict(\n",
+ " data={\"base\": \"stats\",\n",
+ " \"input\": {\"da\": \"streamflow\"}, \n",
+ " \"parameters\": {\"op\": \"mean\"}},\n",
+ " identifier=\"QMOYAN\",\n",
+ " module=\"hydro\",\n",
+ " ),\n",
+ " \n",
+ " # 2nd indicator: Mean summer-fall flow\n",
+ " xclim.core.indicator.Indicator.from_dict(\n",
+ " data={\"base\": \"stats\",\n",
+ " \"input\": {\"da\": \"streamflow\"}, \n",
+ " \"parameters\": {\"op\": \"mean\", \n",
+ " \"indexer\": {\"month\": [6, 7, 8, 9, 10, 11]}}}, # The indexer is used to restrict available data to the relevant months only\n",
+ " identifier=\"QMOYEA\",\n",
+ " module=\"hydro\",\n",
+ " )\n",
+ "]\n",
+ "\n",
+ "# Call compute_indicators\n",
+ "dict_indicators = xh.indicators.compute_indicators(ds, indicators=indicators)\n",
+ "\n",
+ "dict_indicators"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8645c1d-eb99-442b-b90b-fead5297c252",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dict_indicators[\"YS-JAN\"].QMOYAN.hvplot(x='time', grid=True, widget_location='bottom', groupby='station')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e003a1d-c08b-4906-a4ca-027cce56ee48",
+ "metadata": {},
+ "source": [
+ "Since indicators could be output at varying frequencies, `compute_indicators` will return a dictionary where the keys are the output frequencies. In this example, we only have one key: `AS-JAN` (annual data starting in January). The keys follow the `pandas` nomenclature.\n",
+ "\n",
+ "The next step is to obtain averages over climatological periods. The `xh.cc.climatological_op` function can be called for this purpose. The inputs of that function are:\n",
+ " \n",
+ "- *ds*: Dataset to use for the computation.\n",
+ "- *op*: Operation to perform over time. While other operations are technically possible, the following are recommended and tested: ['max', 'mean', 'median', 'min', 'std', 'sum', 'var', 'linregress'].\n",
+ "- *window* (optional): Number of years to use for the rolling operation. If None, all the available data will be used.\n",
+ "- *min_periods* (optional): For the rolling operation, minimum number of years required for a value to be computed.\n",
+ "- *stride*: Stride (in years) at which to provide an output from the rolling window operation.\n",
+ "- *periods* (optional): Either [start, end] or list of [start, end] of continuous periods to be considered.\n",
+ "- *rename_variables*: If True, '_clim_{op}' will be added to variable names.\n",
+ "- *horizons_as_dim*: If True, the output will have 'horizon' and the frequency as 'month', 'season' or 'year' as dimensions and coordinates."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0f8a8f5-add4-4c75-b44b-7b6b78ccead6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define the periods using a list of lists\n",
+ "periods = [[1981, 2010], [2011, 2040], [2041, 2070], [2071, 2100]]\n",
+ "min_periods = 29 # This is an example of a model where the data stops in 2099, so we can use 'min_periods' to still obtain a value for the last period\n",
+ "\n",
+ "# Call climatological_op. Here we don't need 'time' anymore, so we can use horizons_as_dim=True\n",
+ "ds_avg = xh.cc.climatological_op(dict_indicators[\"YS-JAN\"], op=\"mean\", periods=periods, min_periods=min_periods, horizons_as_dim=True, rename_variables=False).drop_vars([\"time\"])\n",
+ "ds_avg"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "74c51289-31c1-4791-b07d-ab1fcab3b6ae",
+ "metadata": {},
+ "source": [
+ "Computing deltas is then as easy as calling `xh.cc.compute_deltas`. The inputs of that function are:\n",
+ " \n",
+ "- *ds*: Dataset to use for the computation.\n",
+ "- *reference_horizon*: Either a YYYY-YYYY string corresponding to the 'horizon' coordinate of the reference period, or a xr.Dataset containing the climatological mean.\n",
+ "- *kind*: ['+', '/', '%'] Whether to provide absolute, relative, or percentage deltas. Can also be a dictionary separated per variable name."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80d4dd95-7c1e-4a3a-bcfd-127edc7ed4e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Here, we'll use a string from the 'horizon' dimension.\n",
+ "reference_horizon=\"1981-2010\"\n",
+ "kind = \"%\"\n",
+ "\n",
+ "ds_deltas = xh.cc.compute_deltas(ds_avg, reference_horizon=reference_horizon, kind=kind, rename_variables=False)\n",
+ "ds_deltas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "772a1601-9760-45d0-87b2-595f137c3c8c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Show the results as Dataframes\n",
+ "print(\"30-year averages\")\n",
+ "display(ds_avg.QMOYAN.isel(station=0).to_dataframe())\n",
+ "print(\"Deltas\")\n",
+ "display(ds_deltas.QMOYAN.isel(station=0).to_dataframe())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cf570595-51bb-47c8-b682-94b859407d2c",
+ "metadata": {},
+ "source": [
+ "## Ensemble statistics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fbc2fd92-d02d-4617-82d3-f549902a988b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To save time, let's open pre-computed deltas for the RCP4.5 simulations used in the 2022 Hydroclimatic Atlas\n",
+ "ds_dict_deltas = {}\n",
+ "for f in deltas_files:\n",
+ " id = Path(f).stem\n",
+ " ds_dict_deltas[id] = xr.open_dataset(f)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7ce174e4-ff18-4d6a-952a-5918a05dfc12",
+ "metadata": {},
+ "source": [
+ "It is a good practice to use multiple climate models to perform climate change analyses, especially since the impacts on the hydrological cycle can be non linear. Once multiple hydrological simulations have been run and are ready to be analysed, `xh.cc.ensemble_stats` can be used to call a variety of functions available in `xclim.ensemble`, such as for getting ensemble quantiles or the agreement on the sign of the change.\n",
+ "\n",
+ "### Weighting simulations\n",
+ "If the ensemble of climate models is heterogeneous, for example if a given climate model has provided more simulations, it is recommended to weight the results accordingly. While this is not currently available through `xhydro`, `xscen.generate_weights` can create a first approximation of the weights to use, based on available metadata.\n",
+ "\n",
+ "The following attributes are required for the function to work:\n",
+ "\n",
+ "- 'cat:source' in all datasets\n",
+ "- 'cat:driving_model' in regional climate models\n",
+ "- 'cat:institution' in all datasets if independence_level='institution'\n",
+ "- 'cat:experiment' in all datasets if split_experiments=True\n",
+ "\n",
+ "That function has three possible independence levels:\n",
+ "\n",
+ "- *model*: 1 Model - 1 Vote\n",
+ "- *GCM*: 1 GCM - 1 Vote\n",
+ "- *institution*: 1 institution - 1 Vote"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "72ac8902-8b39-48bb-a6c3-4087747634ff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import xscen\n",
+ "\n",
+ "independence_level = \"model\" # 1 Model - 1 Vote\n",
+ "\n",
+ "weights = xscen.generate_weights(ds_dict_deltas, independence_level=independence_level)\n",
+ "\n",
+ "# Show the results. We multiply by 6 for the showcase here simply because there are 6 hydrological platforms in the results.\n",
+ "weights.where(weights.realization.str.contains(\"LN24HA\"), drop=True) * 6"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b7a42731-241c-4950-8d88-de55de07f8b9",
+ "metadata": {},
+ "source": [
+ "### Use Case #1: Deterministic reference data\n",
+ "\n",
+ "In most cases, you'll likely have deterministic data for the reference period, meaning that for a given location, the 30-year average for the indicator is a single value."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e130841f-2f77-463e-a2a7-07ce0bc8a8b5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The Hydrological Portrait produces probabilistic estimates, but we'll take the 50th percentile to fake deterministic data\n",
+ "ref = xr.open_dataset(reference_files[0]).sel(percentile=50).drop_vars(\"percentile\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e9ceac65-2c6f-4757-aa41-5a237499f239",
+ "metadata": {},
+ "source": [
+ "Multiple methodologies exist on how to combine the information of the observed and simulated data. Due to biases that may remain in the climate simulations even after bias adjustment and affect the hydrological modelling, we'll use a perturbation technique. This is especially relevant in hydrology with regards to non linear interactions between the climate and hydrological indicators.\n",
+ "\n",
+ "The perturbation technique consists in computing ensemble percentiles on the deltas, then apply them on the reference dataset.For this example, we'll compute the 10th, 25th, 50th, 75th, and 90th percentiles of the ensemble, as well as the agreement on the sign of change, using `xh.cc.ensemble_stats`. The inputs of that function are:\n",
+ "\n",
+ "- *datasets*: List of file paths or xarray Dataset/DataArray objects to include in the ensemble. A dictionary can be passed instead of a list, in which case the keys are used as coordinates along the new `realization` axis.\n",
+ "- *statistics*: dictionary of xclim.ensembles statistics to be called, with their arguments.\n",
+ "- *weights* (optional): Weights to apply along the 'realization' dimension."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "71095fec-ea0b-4dc6-a9ff-aa19bcf39837",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Statistics to compute\n",
+ "statistics = {\"ensemble_percentiles\": \n",
+ " {\"values\": [ 10, 25, 50, 75, 90 ], \n",
+ " \"split\": False},\n",
+ " \"robustness_fractions\": \n",
+ " {\"test\": None}} # Robustness fractions is the function that provides the agreement between models.\n",
+ "\n",
+ "# Here, we call ensemble_stats on the dictionary deltas, since this is the information that we want to extrapolate.\n",
+ "# If relevant, weights are added at this step\n",
+ "ens_stats = xh.cc.ensemble_stats(ds_dict_deltas, statistics, weights=weights)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1633add8-97d0-4377-8889-d3935e9934f5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Additional statistics not explicitly supported by ensemble_stats\n",
+ "from xclim.ensembles import robustness_categories\n",
+ "\n",
+ "# Interquartile range\n",
+ "ens_stats[\"QMOYAN_iqr\"] = ens_stats[\"QMOYAN\"].sel(percentiles=75) - ens_stats[\"QMOYAN\"].sel(percentiles=25)\n",
+ "\n",
+ "# Categories of agreement for the sign of change. This follows the Advanced IPCC Atlas categories.\n",
+ "# See the Cross-Chapter Box 1 for reference: https://www.cambridge.org/core/books/climate-change-2021-the-physical-science-basis/atlas/24E1C016DBBE4725BDFBC343695DE7DB\n",
+ "# For thresholds and ops, the first entry is related to the significance test, while the 2nd is related to the percentage of simulations that see a positive delta.\n",
+ "# For example, \"Agreement towards increase\" is met if more than 66% of simulations see a significant change AND 80% of simulations see a positive change.\n",
+ "categories = [\"Agreement towards increase\", \"Agreement towards decrease\", \"Conflicting signals\", \"No change or robust signal\"]\n",
+ "thresholds = [[0.66, 0.8], [0.66, 0.2], [0.66, 0.8], [0.66, np.nan]]\n",
+ "ops = [[\">=\", \">=\"], [\">=\", \"<=\"], [\">=\", \"<\"], [\"<\", None]]\n",
+ "\n",
+ "ens_stats[\"QMOYAN_robustness_categories\"] = robustness_categories(changed_or_fractions=ens_stats[\"QMOYAN_changed\"],\n",
+ " agree=ens_stats[\"QMOYAN_positive\"], categories=categories, thresholds=thresholds, ops=ops)\n",
+ "\n",
+ "# The future values for QMOYAN can be obtained by multiplying the reference indicator with the percentiles of the ensemble deltas\n",
+ "ens_stats[\"QMOYAN_projected\"] = (ref.QMOYAN * (1 + ens_stats.QMOYAN / 100))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bb4dadc9-b8a5-4647-996c-10035598be87",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ens_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0543dd86-ef27-4a89-a2af-1bb5e223339d",
+ "metadata": {},
+ "source": [
+ "### Use Case #2: Probabilistic reference data\n",
+ "\n",
+ "This method follows a similar approach to Use Case #1, but for a case like the [Hydrological Atlas of Southern Quebec](https://cehq.gouv.qc.ca/atlas-hydroclimatique/), where the hydrological indicators computed for the historical period are represented by a probability density function (PDF), rather than a discrete value. This means that the ensemble percentiles can't simply be multiplied by the reference value.\n",
+ "\n",
+ " INFO\n",
+ "\n",
+ "Note that the percentiles in `ref` are not the interannual variability, but rather the uncertainty related, for example, to hydrological modelling or the quality of the input data. At this stage, the temporal average should already have been done.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "539334e8-b51e-45d2-af36-88caed3846ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ref = xr.open_mfdataset(reference_files, combine=\"nested\", concat_dim=\"platform\")\n",
+ "\n",
+ "# Rather than a single value, QMOYAN is represented by 21 percentiles that try to represent the uncertainty surrounding this statistics.\n",
+ "# Like for the future simulations, we also have 6 hydrological platforms to take into account.\n",
+ "ref"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "03b50ee9-f629-401d-9477-2b5fdd5d8dc6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This can also be represented as a cumulative distribution function (CDF)\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "for platform in ref.platform:\n",
+ " plt.plot(ref.QMOYAN.isel(station=0).sel(platform=platform), ref.QMOYAN.percentile / 100, \"grey\")\n",
+ " plt.xlabel(\"Mean annual flow (m³/s)\")\n",
+ " plt.ylabel(\"Probability\")\n",
+ " plt.title(\"CDF for QMOYAN @ ABIT00057 \\nEach line is an hydrological platform\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "218f434c-3f67-434e-9d6f-6c3684dc0f95",
+ "metadata": {},
+ "source": [
+ "Because of their probabilistic nature, the historical reference values can't easily be combined to the future deltas. The `sampled_indicators` function has been created to circumvent this issue. That function will:\n",
+ "\n",
+ "1. Sample 'n' values from the historical distribution, weighting the percentiles by their associated coverage.\n",
+ "2. Sample 'n' values from the delta distribution, using the provided weights.\n",
+ "3. Create the future distribution by applying the sampled deltas to the sampled historical distribution, element-wise.\n",
+ "4. Compute the percentiles of the future distribution.\n",
+ "\n",
+ "The inputs of that function are:\n",
+ "\n",
+ "- *ds*: Dataset containing the historical indicators. The indicators are expected to be represented by a distribution of pre-computed percentiles.\n",
+ "- *deltas*: Dataset containing the future deltas to apply to the historical indicators.\n",
+ "- *delta_type*: Type of delta provided. Must be one of ['absolute', 'percentage'].\n",
+ "- *ds_weights* (optional): Weights to use when sampling the historical indicators, for dimensions other than 'percentile'/'quantile'. Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'deltas'.\n",
+ "- *delta_weights* (optional): Weights to use when sampling the deltas, such as along the 'realization' dimension. Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'ds'.\n",
+ "- *n*: Number of samples to generate.\n",
+ "- *seed* (optional): Seed to use for the random number generator.\n",
+ "- *return_dist*: Whether to return the full distributions (ds, deltas, fut) or only the percentiles."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "71a9c0c4-c0e5-4114-803d-65cc9148ad49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n = 300000\n",
+ "deltas = xclim.ensembles.create_ensemble(ds_dict_deltas) # The function expects an xarray object. This xclim function can be used to easily create the required input.\n",
+ "\n",
+ "# Compute the perturbed indicators\n",
+ "fut_pct, hist_dist, delta_dist, fut_dist = xh.cc.sampled_indicators(ref,\n",
+ " deltas=deltas, \n",
+ " delta_type=\"percentage\",\n",
+ " delta_weights=weights,\n",
+ " n=n, \n",
+ " seed=0,\n",
+ " return_dist=True)\n",
+ "\n",
+ "fut_pct"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6ff6665b-81e0-4853-b825-28ca73398c6a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First, let's show how the historical distribution was sampled and reconstructed\n",
+ "\n",
+ "def _make_cdf(ds, bins):\n",
+ " count, bins_count = np.histogram(ds.QMOYAN.isel(station=0), bins=bins) \n",
+ " pdf = count / sum(count) \n",
+ " return bins_count, np.cumsum(pdf) \n",
+ "\n",
+ "# Barplot\n",
+ "plt.subplot(2, 1, 1)\n",
+ "uniquen = np.unique(hist_dist.QMOYAN.isel(station=0), return_counts=True)\n",
+ "plt.bar(uniquen[0], uniquen[1], width=0.01, color=\"k\")\n",
+ "plt.ylabel(\"Number of instances\")\n",
+ "plt.title(\"Sampling within the historical distribution\")\n",
+ "\n",
+ "# CDF\n",
+ "plt.subplot(2, 1, 2)\n",
+ "for i, platform in enumerate(ref.platform):\n",
+ " plt.plot(ref.QMOYAN.isel(station=0).sel(platform=platform), ref.percentile / 100, \"grey\", label=\"CDFs from the percentiles\" if i==0 else None)\n",
+ "bc, c = _make_cdf(hist_dist, bins=50)\n",
+ "plt.plot(bc[1:], c, \"r\", label=f\"Sampled historical CDF (n={n})\", linewidth=3) \n",
+ "plt.ylabel(\"Probability\")\n",
+ "plt.xlabel(\"QMOYAN (m³/s)\")\n",
+ "plt.legend()\n",
+ "\n",
+ "plt.tight_layout()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d29c339-05d8-4699-98fb-000b0e263cb1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Then, let's show how the deltas were sampled, for the last horizon\n",
+ "\n",
+ "# Plot #3\n",
+ "plt.subplot(2, 1, 1)\n",
+ "uniquen = np.unique(delta_dist.QMOYAN.isel(station=0, horizon=-1), return_counts=True)\n",
+ "plt.bar(uniquen[0], uniquen[1], width=0.25, color=\"k\")\n",
+ "plt.ylabel(\"Number of instances\")\n",
+ "plt.title(\"Sampling within the historical distribution\")\n",
+ "\n",
+ "# Plot #2\n",
+ "plt.subplot(2, 1, 2)\n",
+ "bc, c = _make_cdf(delta_dist, bins=100)\n",
+ "plt.plot(bc[1:], c, \"k\", label=f\"Sampled deltas CDF (n={n})\", linewidth=3) \n",
+ "plt.ylabel(\"Probability\")\n",
+ "plt.xlabel(\"Deltas (%)\")\n",
+ "plt.legend() \n",
+ "\n",
+ "plt.tight_layout()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b152bc83-065c-4520-a222-8e2d7b07a88e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The distributions can be used to quickly create boxplots\n",
+ "plt.boxplot([hist_dist.QMOYAN.isel(station=0), \n",
+ " fut_dist.QMOYAN.isel(station=0, horizon=0), \n",
+ " fut_dist.QMOYAN.isel(station=0, horizon=1), \n",
+ " fut_dist.QMOYAN.isel(station=0, horizon=2)], \n",
+ " labels=[\"Historical\", \"2011-2040\", \"2041-2070\", \"2071-2100\"])\n",
+ "\n",
+ "plt.ylabel(\"Mean summer flow (m³/s)\")\n",
+ "plt.tight_layout()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "991f6d99-97a2-418e-9442-a0bd28fa53bf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The same statistics as before can also be computed by using delta_dist\n",
+ "delta_dist = delta_dist.rename({\"sample\": \"realization\"}) # xclim compatibility\n",
+ "ens_stats_2 = xh.cc.ensemble_stats(delta_dist, statistics)\n",
+ "\n",
+ "# Interquartile range\n",
+ "ens_stats_2[\"QMOYAN_iqr\"] = ens_stats_2[\"QMOYAN\"].sel(percentiles=75) - ens_stats_2[\"QMOYAN\"].sel(percentiles=25)\n",
+ "\n",
+ "# Categories of agreement on the sign of change\n",
+ "ens_stats_2[\"QMOYAN_robustness_categories\"] = robustness_categories(changed_or_fractions=ens_stats_2[\"QMOYAN_changed\"],\n",
+ " agree=ens_stats_2[\"QMOYAN_positive\"], categories=categories, thresholds=thresholds, ops=ops)\n",
+ "\n",
+ "ens_stats_2"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/notebooks/local_frequency_analysis.ipynb b/docs/notebooks/local_frequency_analysis.ipynb
index 644bbc22..549a4599 100644
--- a/docs/notebooks/local_frequency_analysis.ipynb
+++ b/docs/notebooks/local_frequency_analysis.ipynb
@@ -69,7 +69,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ds.streamflow.dropna('time', 'all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
+ "ds.streamflow.dropna('time', how='all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
]
},
{
@@ -100,7 +100,7 @@
" \"spring\": {\"date_bounds\": [\"02-11\", \"06-19\"]},\n",
" \"summer\": {\"doy_bounds\": [152, 243]},\n",
" \"fall\": {\"month\": [9, 10, 11]},\n",
- " \"winter\": {\"season\": ['DJF'], \"freq\": \"AS-DEC\"}, # To correctly wrap around the year, we need to specify the resampling frequency.\n",
+ " \"winter\": {\"season\": ['DJF'], \"freq\": \"YS-DEC\"}, # To correctly wrap around the year, we need to specify the resampling frequency.\n",
" \"august\": {\"month\": [8]},\n",
" \"annual\": {}\n",
" }"
@@ -140,13 +140,6 @@
"ds_4fa"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": null,
@@ -155,7 +148,7 @@
},
"outputs": [],
"source": [
- "ds_4fa.streamflow_max_summer.dropna('time', 'all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
+ "ds_4fa.streamflow_max_summer.dropna('time', how='all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
]
},
{
@@ -219,7 +212,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ds_4fa.streamflow_max_custom.dropna('time', 'all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
+ "ds_4fa.streamflow_max_custom.dropna('time', how='all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
]
},
{
@@ -256,7 +249,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ds_4fa.volume_sum_spring.dropna('time', 'all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
+ "ds_4fa.volume_sum_spring.dropna('time', how='all').hvplot(x='time', grid=True, widget_location='bottom', groupby='id')"
]
},
{
@@ -329,7 +322,7 @@
"source": [
"In a future release, plotting will be handled by a proper function. For now, we'll show an example in this Notebook using preliminary utilities.\n",
"\n",
- "`xhfa.local._prepare_plots` generates datapoints required to plot the results of the frequency analysis. If `log=True`,will it return log-spaced x values between `xmin` and `xmax`.\n"
+ "`xhfa.local._prepare_plots` generates datapoints required to plot the results of the frequency analysis. If `log=True`, it will return log-spaced x values between `xmin` and `xmax`.\n"
]
},
{
@@ -390,20 +383,6 @@
"#And now combining the plots\n",
"p1 * p2"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -422,7 +401,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.7"
+ "version": "3.12.2"
},
"vscode": {
"interpreter": {
diff --git a/environment-dev.yml b/environment-dev.yml
index b8bb2d65..1ac86bdc 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -10,12 +10,12 @@ dependencies:
- pydantic >=2.0,<2.5.3 # FIXME: Remove pin once our dependencies (xclim, xscen) support pydantic 2.5.3
- spotpy
- statsmodels
- - xarray
- - xclim >=0.47.0
- - xscen >=0.7.1
+ - xarray >=2023.11.0
+ - xclim >=0.48.2
+ - xscen >=0.8.3
- pip
- pip:
- - xdatasets
+ - xdatasets >=0.3.1
# Dev
- black ==24.1.1
- blackdoc ==0.3.9
diff --git a/environment.yml b/environment.yml
index 2b0a6268..3688b043 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,9 +10,9 @@ dependencies:
- pydantic >=2.0,<2.5.3 # FIXME: Remove pin once our dependencies (xclim, xscen) support pydantic 2.5.3
- spotpy
- statsmodels
- - xarray
- - xclim >=0.47.0
- - xscen >=0.7.1
+ - xarray >=2023.11.0
+ - xclim >=0.48.2
+ - xscen >=0.8.3
- pip
- pip:
- - xdatasets
+ - xdatasets >=0.3.1
diff --git a/pyproject.toml b/pyproject.toml
index 504f7f30..9aafecbb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,10 +42,10 @@ dependencies = [
"pydantic>=2.0,<2.5.3",
"spotpy",
"statsmodels",
- "xarray",
- "xclim>=0.47.0",
+ "xarray>=2023.11.0",
+ "xclim>=0.48.2",
"xdatasets>=0.3.1",
- "xscen>=0.7.1"
+ "xscen>=0.8.3"
]
[project.optional-dependencies]
diff --git a/tests/test_cc.py b/tests/test_cc.py
index 41918422..84510193 100644
--- a/tests/test_cc.py
+++ b/tests/test_cc.py
@@ -1,3 +1,7 @@
+import numpy as np
+import pytest
+import xarray as xr
+
import xhydro as xh
@@ -12,3 +16,384 @@ def test_xscen_imported():
"produce_horizon",
]
)
+
+
+class TestSampledIndicators:
+ @pytest.mark.parametrize("delta_type", ["absolute", "percentage", "foo"])
+ def test_sampled_indicators_type(self, delta_type):
+ ds = xr.DataArray(
+ np.arange(1, 8), coords={"percentile": [1, 10, 20, 50, 80, 90, 99]}
+ ).to_dataset(name="QMOYAN")
+ deltas = xr.DataArray(
+ np.arange(-10, 55, 5),
+ coords={"realization": np.arange(13)},
+ ).to_dataset(name="QMOYAN")
+ if delta_type in ["absolute", "percentage"]:
+ out = xh.cc.sampled_indicators(
+ ds, deltas, delta_type, n=10, seed=42, return_dist=True
+ )
+
+ np.testing.assert_array_equal(
+ out[0]["percentile"], [1, 10, 20, 50, 80, 90, 99]
+ )
+ assert all(
+ chosen in np.arange(1, 8) for chosen in np.unique(out[1].QMOYAN.values)
+ )
+ assert all(
+ chosen in np.arange(-10, 55, 5)
+ for chosen in np.unique(out[2].QMOYAN.values)
+ )
+
+ if delta_type == "absolute":
+ assert (
+ np.min(out[3].QMOYAN) >= 1 - 10
+ ) # Min of historical minus min of deltas
+ assert (
+ np.max(out[3].QMOYAN) <= 7 + 50
+ ) # Max of historical plus max of deltas
+ np.testing.assert_array_almost_equal(
+ out[0]["QMOYAN"].values, [-3.0, -3.0, 14.6, 40.0, 46.2, 51.6, 56.46]
+ )
+ else:
+ assert np.min(out[3].QMOYAN) >= 1 * (
+ 1 - 10 / 100
+ ) # Min of historical minus min of deltas
+ assert np.max(out[3].QMOYAN) <= 7 * (
+ 1 + 50 / 100
+ ) # Max of historical plus max of deltas
+ np.testing.assert_array_almost_equal(
+ out[0]["QMOYAN"].values, [1.9, 1.9, 4.06, 6.75, 7.34, 8.88, 10.338]
+ )
+
+ else:
+ with pytest.raises(
+ ValueError, match=f"Unknown operation '{delta_type}', expected one"
+ ):
+ xh.cc.sampled_indicators(ds, deltas, delta_type, n=10, seed=42)
+
+ def test_sampled_indicators_return(self):
+ ds = xr.DataArray(
+ np.arange(1, 8), coords={"percentile": [1, 10, 20, 50, 80, 90, 99]}
+ ).to_dataset(name="QMOYAN")
+ deltas = xr.DataArray(
+ np.arange(-10, 55, 5),
+ coords={"realization": np.arange(13)},
+ ).to_dataset(name="QMOYAN")
+
+ out1 = xh.cc.sampled_indicators(
+ ds, deltas, "absolute", n=10, seed=42, return_dist=False
+ )
+ assert isinstance(out1, xr.Dataset)
+
+ out2 = xh.cc.sampled_indicators(
+ ds, deltas, "absolute", n=10, seed=42, return_dist=True
+ )
+ assert isinstance(out2, tuple)
+ assert len(out2) == 4
+ assert all(isinstance(o, xr.Dataset) for o in out2)
+ assert out2[0].equals(out1)
+ for o in out2[1:]:
+ assert list(o.dims) == ["sample"]
+ assert len(o["sample"]) == 10
+
+ @pytest.mark.parametrize("weights", [None, "ds", "deltas", "both"])
+ def test_sampled_indicators_weights(self, weights):
+ ds = xr.DataArray(
+ np.array(
+ [
+ [[1, 2, 3, 4, 5], [101, 102, 103, 104, 105]],
+ [[6, 7, 8, 9, 10], [106, 107, 108, 109, 110]],
+ ]
+ ),
+ dims=("foo", "station", "percentile"),
+ coords={
+ "percentile": [1, 25, 50, 75, 99],
+ "station": ["a", "b"],
+ "foo": ["bar", "baz"],
+ },
+ ).to_dataset(name="QMOYAN")
+ deltas = xr.DataArray(
+ np.array(
+ [
+ [[-10, -5, 0, 5, 10], [-25, -20, -15, -10, -5]],
+ [[40, 45, 50, 55, 60], [115, 120, 125, 130, 135]],
+ ]
+ ),
+ dims=("platform", "station", "realization"),
+ coords={
+ "realization": [1, 25, 50, 75, 99],
+ "station": ["a", "b"],
+ "platform": ["x", "y"],
+ },
+ ).to_dataset(name="QMOYAN")
+ ds_weights = xr.DataArray(
+ np.array([0, 1]),
+ dims="foo",
+ coords={
+ "foo": ["bar", "baz"],
+ },
+ )
+ delta_weights = xr.DataArray(
+ np.array([[0, 0, 0, 1, 0], [0, 0, 5, 0, 0]]),
+ dims=("platform", "realization"),
+ coords={
+ "realization": [1, 25, 50, 75, 99],
+ "platform": ["x", "y"],
+ },
+ )
+
+ expected_out1 = (
+ np.arange(1, 11)
+ if (weights is None or weights == "deltas")
+ else np.arange(6, 11)
+ )
+ expected_out2 = (
+ [-10, -5, 0, 5, 10, 40, 45, 50, 55, 60]
+ if (weights is None or weights == "ds")
+ else [5, 50]
+ )
+
+ if weights is None:
+ out = xh.cc.sampled_indicators(
+ ds, deltas, "percentage", n=10, seed=42, return_dist=True
+ )
+ np.testing.assert_array_almost_equal(
+ out[0].QMOYAN.isel(station=0).values, [1.809, 5.5, 11.8, 12.0, 15.8155]
+ )
+ elif weights == "ds":
+ out = xh.cc.sampled_indicators(
+ ds,
+ deltas,
+ "percentage",
+ n=10,
+ seed=42,
+ return_dist=True,
+ ds_weights=ds_weights,
+ )
+ np.testing.assert_array_almost_equal(
+ out[0].QMOYAN.isel(station=0).values,
+ [5.427, 8.8, 13.275, 13.5, 15.8155],
+ )
+ elif weights == "deltas":
+ out = xh.cc.sampled_indicators(
+ ds,
+ deltas,
+ "percentage",
+ n=10,
+ seed=42,
+ return_dist=True,
+ delta_weights=delta_weights,
+ )
+ np.testing.assert_array_almost_equal(
+ out[0].QMOYAN.isel(station=0).values, [2.1, 7.5, 12.0, 12.0, 14.865]
+ )
+ assert sum(out[2].QMOYAN.isel(station=0).values == 5) < sum(
+ out[2].QMOYAN.isel(station=0).values == 50
+ ) # 50 should be sampled more often
+ elif weights == "both":
+ out = xh.cc.sampled_indicators(
+ ds,
+ deltas,
+ "percentage",
+ n=10,
+ seed=42,
+ return_dist=True,
+ ds_weights=ds_weights,
+ delta_weights=delta_weights,
+ )
+ np.testing.assert_array_almost_equal(
+ out[0].QMOYAN.isel(station=0).values, [6.3, 12.0, 13.5, 13.5, 14.865]
+ )
+
+ assert all(
+ chosen in expected_out1
+ for chosen in np.unique(out[1].QMOYAN.isel(station=0).values)
+ )
+ assert all(
+ chosen in expected_out2
+ for chosen in np.unique(out[2].QMOYAN.isel(station=0).values)
+ )
+ assert all(
+ "station" in o.dims for o in out
+ ) # "station" is a shared dimension, so it should be in all outputs
+
+ def test_sampled_indicators_weight_err(self):
+ ds = xr.DataArray(
+ np.array(
+ [
+ [[1, 2, 3, 4, 5], [101, 102, 103, 104, 105]],
+ [[6, 7, 8, 9, 10], [106, 107, 108, 109, 110]],
+ ]
+ ),
+ dims=("platform", "station", "percentile"),
+ coords={
+ "percentile": [1, 25, 50, 75, 99],
+ "station": ["a", "b"],
+ "platform": ["x", "y"],
+ },
+ ).to_dataset(name="QMOYAN")
+ deltas = xr.DataArray(
+ np.array(
+ [
+ [[-10, -5, 0, 5, 10], [-25, -20, -15, -10, -5]],
+ [[40, 45, 50, 55, 60], [115, 120, 125, 130, 135]],
+ ]
+ ),
+ dims=("platform", "station", "realization"),
+ coords={
+ "realization": [1, 25, 50, 75, 99],
+ "station": ["a", "b"],
+ "platform": ["x", "y"],
+ },
+ ).to_dataset(name="QMOYAN")
+ delta_weights = xr.DataArray(
+ np.array([[0, 0, 0, 1, 0], [0, 0, 5, 0, 0]]),
+ dims=("platform", "realization"),
+ coords={
+ "realization": [1, 25, 50, 75, 99],
+ "platform": ["x", "y"],
+ },
+ )
+
+ with pytest.raises(
+ ValueError,
+ match="is shared between 'ds' and 'deltas', but not between 'ds_weights' and 'delta_weights'.",
+ ):
+ xh.cc.sampled_indicators(
+ ds, deltas, "percentage", n=10, seed=42, delta_weights=delta_weights
+ )
+
+ def test_p_weights(self):
+ def _make_da(arr):
+ return xr.DataArray(arr, dims="percentile", coords={"percentile": arr})
+
+ out = xh.cc._percentile_weights(_make_da(np.linspace(0, 100, 101)))
+ ans = np.ones(101)
+ ans[0] = 0.5
+ ans[-1] = 0.5
+ np.testing.assert_array_equal(out, ans)
+
+ out = xh.cc._percentile_weights(
+ _make_da(np.array([1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99]))
+ )
+ ans = np.array([5.5, 9.5, 10, 10, 10, 10, 10, 10, 10, 9.5, 5.5])
+ np.testing.assert_array_equal(out, ans)
+
+ @pytest.mark.parametrize("weights_ndim", [1, 2])
+ def test_weighted_sampling(self, weights_ndim):
+ data = np.array([[[1, 2, 3], [101, 102, 103]], [[2, 3, 4], [102, 103, 104]]])
+ ds = xr.DataArray(
+ data,
+ dims=("platform", "station", "percentile"),
+ coords={
+ "percentile": [25, 50, 75],
+ "station": ["a", "b"],
+ "platform": ["x", "y"],
+ },
+ ).to_dataset(name="QMOYAN")
+ weights = xr.DataArray(
+ np.array([[15, 100, 33], [500, 0, 0]]).T,
+ dims=("percentile", "platform"),
+ coords={
+ "platform": ["y", "x"], # Invert order to test reordering
+ "percentile": [25, 50, 75],
+ },
+ )
+ if weights_ndim == 1:
+ weights = weights.isel(platform=0)
+
+ out = xh.cc._weighted_sampling(ds, weights, n=10, seed=42)
+ assert set(out.dims) == set(
+ ["station", "sample"] + (["platform"] if weights_ndim == 1 else [])
+ )
+ assert list(out["sample"].coords) == ["sample", "percentile"] + (
+ ["platform"] if weights_ndim == 2 else []
+ )
+ assert len(out["sample"]) == 10
+
+ np.testing.assert_array_equal(
+ out["percentile"],
+ {
+ 1: [50, 50, 75, 50, 25, 75, 50, 75, 50, 50],
+ 2: [25, 25, 50, 25, 25, 75, 25, 25, 25, 25],
+ }[weights_ndim],
+ ) # With seed=42
+ if weights_ndim == 2:
+ np.testing.assert_array_equal(
+ out["platform"], ["y", "x", "y", "x", "x", "y", "x", "y", "x", "x"]
+ )
+
+ def test_weighted_sampling_errors(self):
+ ds = xr.DataArray(
+ np.arange(10),
+ ).to_dataset(
+ name="foo"
+ ) # Doesn't matter what the data is here
+ weights = xr.DataArray(
+ [[1, 2, 3], [4, np.nan, 6]],
+ )
+ with pytest.raises(ValueError, match="The weights contain NaNs."):
+ xh.cc._weighted_sampling(ds, weights, n=10, seed=42)
+
+ @pytest.mark.parametrize("dim", ["percentile", "quantile"])
+ def test_p_or_q(self, dim):
+ if dim == "percentile":
+ da = xr.DataArray(
+ np.linspace(0, 100, 101),
+ dims="percentile",
+ attrs={"units": "%"},
+ )
+ da = da.assign_coords(percentile=da)
+ else:
+ da = xr.DataArray(np.linspace(0, 1, 101), dims="quantile")
+ da = da.assign_coords(quantile=da)
+ ds = da.to_dataset(name="foo").expand_dims({"time": np.arange(10)})
+ out_da, pdim_da, mult_da = xh.cc._perc_or_quantile(da)
+ out_ds, pdim_ds, mult_ds = xh.cc._perc_or_quantile(ds)
+
+ assert out_da.equals(da)
+ assert out_ds.equals(ds[dim])
+ assert pdim_da == pdim_ds == dim
+ assert mult_da == mult_ds == (100 if dim == "percentile" else 1)
+
+ def test_p_or_q_errors(self):
+ # Test 1: DataArray with >1 dimension
+ da = xr.DataArray(
+ np.linspace(0, 1, 101),
+ dims="quantile",
+ ).expand_dims({"time": np.arange(10)})
+ with pytest.raises(ValueError, match="DataArray has more than one dimension"):
+ xh.cc._perc_or_quantile(da)
+
+ # Test 2: DataArray/Datset with no percentile or quantile dimension
+ da = xr.DataArray(
+ np.linspace(0, 1, 101),
+ dims="foo",
+ )
+ with pytest.raises(
+ ValueError, match="DataArray has no 'percentile' or 'quantile' dimension"
+ ):
+ xh.cc._perc_or_quantile(da)
+ ds = da.to_dataset(name="foo")
+ with pytest.raises(ValueError, match="The Dataset should contain one of "):
+ xh.cc._perc_or_quantile(ds)
+
+ # Wrong range
+ da = xr.DataArray(
+ np.linspace(0, 2, 101),
+ dims="quantile",
+ )
+ with pytest.raises(ValueError, match="values do not seem to be in the "):
+ xh.cc._perc_or_quantile(da)
+ da = xr.DataArray(
+ np.linspace(-1, 1, 101),
+ dims="quantile",
+ )
+ with pytest.raises(ValueError, match="values do not seem to be in the "):
+ xh.cc._perc_or_quantile(da)
+ da = xr.DataArray(
+ np.linspace(0, 1, 101),
+ dims="percentile",
+ )
+ with pytest.raises(ValueError, match="values do not seem to be in the "):
+ xh.cc._perc_or_quantile(da)
diff --git a/tests/test_indicators.py b/tests/test_indicators.py
index 2ab2b45e..4ea4006e 100644
--- a/tests/test_indicators.py
+++ b/tests/test_indicators.py
@@ -1,7 +1,5 @@
import numpy as np
import pytest
-from packaging.version import parse
-from xclim import __version__ as __xclim_version__
from xclim.testing.helpers import test_timeseries as timeseries
import xhydro as xh
@@ -29,11 +27,7 @@ def test_compute_volume(self, freq):
assert out.attrs["long_name"] == "Foo"
assert out.attrs["cell_methods"] == "time: sum"
assert out.attrs["description"] == "Volume of water"
-
- if parse(__xclim_version__) < parse("0.48.0"):
- assert out.attrs["units"] == "m^3"
- else:
- assert out.attrs["units"] == "m3"
+ assert out.attrs["units"] == "m3"
def test_units(self):
da = timeseries(
@@ -46,12 +40,8 @@ def test_units(self):
out_m3 = xh.indicators.compute_volume(da)
out_hm3 = xh.indicators.compute_volume(da, out_units="hm3")
- if parse(__xclim_version__) < parse("0.48.0"):
- assert out_m3.attrs["units"] == "m^3"
- assert out_hm3.attrs["units"] == "hm^3"
- else:
- assert out_m3.attrs["units"] == "m3"
- assert out_hm3.attrs["units"] == "hm3"
+ assert out_m3.attrs["units"] == "m3"
+ assert out_hm3.attrs["units"] == "hm3"
np.testing.assert_array_equal(out_m3 * 1e-6, out_hm3)
@@ -69,9 +59,9 @@ class TestGetYearlyOp:
def test_get_yearly_op(self, op):
timeargs = {
"annual": {},
- "winterdate": {"date_bounds": ["12-01", "02-28"], "freq": "AS-DEC"},
- "winterdoy": {"doy_bounds": [335, 59], "freq": "AS-DEC"},
- "winterdjf": {"season": ["DJF"], "freq": "AS-DEC"},
+ "winterdate": {"date_bounds": ["12-01", "02-28"], "freq": "YS-DEC"},
+ "winterdoy": {"doy_bounds": [335, 59], "freq": "YS-DEC"},
+ "winterdjf": {"season": ["DJF"], "freq": "YS-DEC"},
"summer": {"doy_bounds": [200, 300]},
}
@@ -121,7 +111,7 @@ def test_get_yearly_op(self, op):
)
def test_missing(self):
- timeargs = {"winterdate": {"date_bounds": ["12-01", "02-28"], "freq": "AS-DEC"}}
+ timeargs = {"winterdate": {"date_bounds": ["12-01", "02-28"], "freq": "YS-DEC"}}
out = xh.indicators.get_yearly_op(
self.ds,
op="max",
@@ -158,7 +148,7 @@ def test_sum(self):
timeargs = {
"annual": {},
- "winterdate": {"date_bounds": ["12-01", "02-28"], "freq": "AS-DEC"},
+ "winterdate": {"date_bounds": ["12-01", "02-28"], "freq": "YS-DEC"},
"summer": {"doy_bounds": [200, 300]},
}
out_sum = xh.indicators.get_yearly_op(
@@ -254,6 +244,6 @@ def test_errors(self):
self.ds,
op="max",
timeargs={
- "annual": {"date_bounds": ["06-01", "04-30"], "freq": "AS-DEC"}
+ "annual": {"date_bounds": ["06-01", "04-30"], "freq": "YS-DEC"}
},
)
diff --git a/xhydro/cc.py b/xhydro/cc.py
index ffaedd06..ea5063e5 100644
--- a/xhydro/cc.py
+++ b/xhydro/cc.py
@@ -1,43 +1,261 @@
"""Module to compute climate change statistics using xscen functions."""
-import xarray
+from typing import Optional, Union
+
+import numpy as np
+import xarray as xr
# Special imports from xscen
-from xscen import ( # FIXME: To be replaced with climatological_op once available
- climatological_mean,
- compute_deltas,
- ensemble_stats,
- produce_horizon,
-)
+from xscen import climatological_op, compute_deltas, ensemble_stats, produce_horizon
__all__ = [
"climatological_op",
"compute_deltas",
"ensemble_stats",
"produce_horizon",
+ "sampled_indicators",
]
-# FIXME: To be deleted once climatological_op is available in xscen
-def climatological_op(ds: xarray.Dataset, **kwargs: dict) -> xarray.Dataset:
- r"""Compute climatological operation.
+def sampled_indicators(
+ ds: xr.Dataset,
+ deltas: xr.Dataset,
+ delta_type: str,
+ *,
+ ds_weights: Optional[xr.DataArray] = None,
+ delta_weights: Optional[xr.DataArray] = None,
+ n: int = 50000,
+ seed: int = None,
+ return_dist: bool = False,
+) -> Union[xr.Dataset, tuple[xr.Dataset, xr.Dataset, xr.Dataset, xr.Dataset]]:
+ """Compute future indicators using a perturbation approach and random sampling.
Parameters
----------
- ds : xarray.Dataset
- Input dataset.
- \*\*kwargs : dict
- Keyword arguments passed to :py:func:`xscen.aggregate.climatological_mean`.
+ ds : xr.Dataset
+ Dataset containing the historical indicators. The indicators are expected to be represented by a distribution of pre-computed percentiles.
+ The percentiles should be stored in either a dimension called "percentile" [0, 100] or "quantile" [0, 1].
+ deltas : xr.Dataset
+ Dataset containing the future deltas to apply to the historical indicators.
+ delta_type : str
+ Type of delta provided. Must be one of ['absolute', 'percentage'].
+ ds_weights : xr.DataArray, optional
+ Weights to use when sampling the historical indicators, for dimensions other than 'percentile'/'quantile'.
+ Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'deltas'.
+ delta_weights : xr.DataArray, optional
+ Weights to use when sampling the deltas, such as along the 'realization' dimension.
+ Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'ds'.
+ n : int
+ Number of samples to generate.
+ seed : int
+ Seed to use for the random number generator.
+ return_dist : bool
+ Whether to return the full distributions (ds, deltas, fut) or only the percentiles.
Returns
-------
- xarray.Dataset
- Output dataset.
+ fut_pct : xr.Dataset
+ Dataset containing the future percentiles.
+ ds_dist : xr.Dataset
+ The historical distribution, stacked along the 'sample' dimension.
+ deltas_dist : xr.Dataset
+ The delta distribution, stacked along the 'sample' dimension.
+ fut_dist : xr.Dataset
+ The future distribution, stacked along the 'sample' dimension.
Notes
-----
- This is a temporary wrapper to be deleted once climatological_op is available in xscen.
- For the time being, it is a simple wrapper around climatological_mean.
- See :py:func:`xscen.aggregate.climatological_mean` for more details.
+ The future percentiles are computed as follows:
+ 1. Sample 'n' values from the historical distribution, weighting the percentiles by their associated coverage.
+ 2. Sample 'n' values from the delta distribution, using the provided weights.
+ 3. Create the future distribution by applying the sampled deltas to the sampled historical distribution, element-wise.
+ 4. Compute the percentiles of the future distribution.
+ """
+ # Prepare weights
+ shared_dims = set(ds.dims).intersection(set(deltas.dims))
+ exclude_dims = ["time", "horizon"]
+ percentile_weights = _percentile_weights(ds)
+ if ds_weights is not None:
+ percentile_weights = (
+ percentile_weights.expand_dims(
+ {dim: ds_weights[dim] for dim in ds_weights.dims}
+ )
+ * ds_weights
+ )
+ percentile_weights = percentile_weights.expand_dims(
+ {
+ dim: ds[dim]
+ for dim in set(ds.dims).difference(
+ list(shared_dims) + list(percentile_weights.dims) + exclude_dims
+ )
+ }
+ )
+ if delta_weights is None:
+ dims = set(deltas.dims).difference(list(shared_dims) + exclude_dims)
+ delta_weights = xr.DataArray(
+ np.ones([deltas.sizes[dim] for dim in dims]),
+ coords={dim: deltas[dim] for dim in dims},
+ dims=dims,
+ )
+ delta_weights = delta_weights.expand_dims(
+ {
+ dim: deltas[dim]
+ for dim in set(deltas.dims).difference(
+ list(shared_dims) + list(delta_weights.dims) + exclude_dims
+ )
+ }
+ )
+
+ unique_dims = set(percentile_weights.dims).symmetric_difference(
+ set(delta_weights.dims)
+ )
+ if any([dim in shared_dims for dim in unique_dims]):
+ problem_dims = [dim for dim in unique_dims if dim in shared_dims]
+ raise ValueError(
+ f"Dimension(s) {problem_dims} is shared between 'ds' and 'deltas', but not between 'ds_weights' and 'delta_weights'."
+ )
+
+ # Sample the distributions
+ _, pdim, mult = _perc_or_quantile(ds)
+ ds_dist = (
+ _weighted_sampling(ds, percentile_weights, n=n, seed=seed)
+ .drop_vars(["sample", *percentile_weights.dims])
+ .assign_coords({"sample": np.arange(n)})
+ )
+ deltas_dist = (
+ _weighted_sampling(deltas, delta_weights, n=n, seed=seed)
+ .drop_vars(["sample", *delta_weights.dims])
+ .assign_coords({"sample": np.arange(n)})
+ )
+
+ # Element-wise multiplication of the ref_dist and ens_dist
+ if delta_type == "percentage":
+ fut_dist = ds_dist * (1 + deltas_dist / 100)
+ elif delta_type == "absolute":
+ fut_dist = ds_dist + deltas_dist
+ else:
+ raise ValueError(
+ f"Unknown operation '{delta_type}', expected one of ['absolute', 'percentage']."
+ )
+
+ # Compute future percentiles
+ fut_pct = fut_dist.quantile(ds.percentile / mult, dim="sample")
+
+ if pdim == "percentile":
+ fut_pct = fut_pct.rename({"quantile": "percentile"})
+ fut_pct["percentile"] = ds.percentile
+
+ if return_dist:
+ return fut_pct, ds_dist, deltas_dist, fut_dist
+ else:
+ return fut_pct
+
+
+def _percentile_weights(da: Union[xr.DataArray, xr.Dataset]) -> xr.DataArray:
+ """Compute the weights associated with percentiles, with support for unevenly spaced percentiles.
+
+ Parameters
+ ----------
+ da : xr.DataArray or xr.Dataset
+ DataArray or Dataset containing the percentiles to use when sampling.
+ The percentiles are expected to be stored in either a dimension called "percentile" [0, 100] or "quantile" [0, 1].
+
+ Returns
+ -------
+ p : xr.DataArray
+ DataArray containing the weights associated with the percentiles.
+ """
+ pct, pdim, multiplier = _perc_or_quantile(da)
+
+ # Temporarily add a 0 and 100th percentile
+ p0 = xr.DataArray([0], coords={pdim: [0]}, dims=[pdim])
+ p1 = xr.DataArray([multiplier], coords={pdim: [multiplier]}, dims=[pdim])
+ p = xr.concat([p0, pct, p1], dim=pdim)
+ p = p.diff(pdim) / 2
+ p[0] = (
+ p[0] * 2
+ ) # The first and last weights need to be doubled to account for the fact that the first and last percentiles are not centered
+ p[-1] = p[-1] * 2
+ p = p.rolling({pdim: 2}, center=True).sum().shift({pdim: -1})[:-1]
+
+ return p
+
+
+def _weighted_sampling(
+ ds: xr.Dataset, weights: xr.DataArray, n: int = 50000, seed: int = None
+) -> xr.Dataset:
+ """Sample from a distribution with weights.
+
+ Parameters
+ ----------
+ ds : xr.Dataset
+ Dataset containing the distribution to sample from.
+ weights : xr.DataArray
+ Weights to use when sampling.
+ n : int
+ Number of samples to generate.
+ seed : int
+ Seed to use for the random number generator.
+
+ Returns
+ -------
+ ds_dist : xr.Dataset
+ Dataset containing the 'n' samples, stacked along the 'sample' dimension.
"""
- return climatological_mean(ds, **kwargs)
+ # Prepare the weights
+ if weights.isnull().any():
+ raise ValueError("The weights contain NaNs.")
+ weights = weights / weights.sum() # Must equal 1
+
+ # For performance reasons, remove the chunking along impacted dimensions
+ ds = ds.chunk({dim: -1 for dim in weights.dims})
+ weights = weights.chunk({dim: -1 for dim in weights.dims})
+
+ # Stack the dimensions containing weights
+ ds = ds.stack({"sample": sorted(list(weights.dims))})
+ weights = weights.stack({"sample": sorted(list(weights.dims))})
+ weights = weights.reindex_like(ds)
+
+ # Sample the dimensions with weights
+ rng = np.random.default_rng(seed=seed)
+ idx = rng.choice(weights.size, size=n, p=weights)
+
+ # Create the distribution dataset
+ ds_dist = ds.isel({"sample": idx})
+
+ return ds_dist
+
+
+def _perc_or_quantile(
+ da: Union[xr.DataArray, xr.Dataset]
+) -> tuple[xr.DataArray, str, int]:
+ """Return 'percentile' or 'quantile' depending on the name of the percentile dimension."""
+ if isinstance(da, xr.DataArray):
+ if len(da.dims) != 1:
+ raise ValueError(
+ f"DataArray has more than one dimension: received {da.dims}."
+ )
+ pdim = str(da.dims[0])
+ pct = da
+ if pdim not in ["percentile", "quantile"]:
+ raise ValueError(
+ f"DataArray has no 'percentile' or 'quantile' dimension: received {pdim}."
+ )
+ else:
+ pdim = [dim for dim in da.dims if dim in ["percentile", "quantile"]]
+ if len(pdim) != 1:
+ raise ValueError(
+ "The Dataset should contain one of ['percentile', 'quantile']."
+ )
+ pdim = pdim[0]
+ pct = da[pdim]
+
+ multiplier = 100 if pdim == "percentile" else 1
+ if (pct.min() < 0 or pct.max() > multiplier) or (
+ pdim == "percentile" and pct.max() <= 1
+ ):
+ raise ValueError(
+ f"The {pdim} values do not seem to be in the [0, {multiplier}] range."
+ )
+
+ return pct, pdim, multiplier
diff --git a/xhydro/indicators.py b/xhydro/indicators.py
index f42794a3..6909efe0 100644
--- a/xhydro/indicators.py
+++ b/xhydro/indicators.py
@@ -159,7 +159,7 @@ def get_yearly_op(
"DEC",
]
for i in timeargs:
- freq = timeargs[i].get("freq", "AS-JAN")
+ freq = timeargs[i].get("freq", "YS-JAN")
if not xc.core.calendar.compare_offsets(freq, "==", "YS"):
raise ValueError(
f"Frequency {freq} is not supported. Please use a yearly frequency."
diff --git a/xhydro/testing/utils.py b/xhydro/testing/utils.py
index ff52d2f4..385881c9 100644
--- a/xhydro/testing/utils.py
+++ b/xhydro/testing/utils.py
@@ -89,7 +89,7 @@ def publish_release_notes(
changes = re.sub(search, replacement, changes)
if not file:
- return
+ return changes
if isinstance(file, (Path, os.PathLike)):
file = Path(file).open("w")
print(changes, file=file)