From 82bf43c1a59f1fb2c5d8102278b6cabc5fa4cbce Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Thu, 7 Nov 2024 13:15:06 -0500
Subject: [PATCH 01/39] first batch of tests

---
 src/xscen/io.py       | 146 ++++++++++++++----------------
 src/xscen/testing.py  | 191 ++++++++++++++++++++++++++++++++++++++-
 src/xscen/utils.py    | 205 ++++++------------------------------------
 tests/test_io.py      | 178 +++++++++++++++++++++++++++++++++++-
 tests/test_testing.py |  66 ++++++++++++++
 tests/test_utils.py   |  47 ----------
 6 files changed, 527 insertions(+), 306 deletions(-)
 create mode 100644 tests/test_testing.py

diff --git a/src/xscen/io.py b/src/xscen/io.py
index f0c2e149..b93dd4f8 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -99,58 +99,30 @@ def estimate_chunks(  # noqa: C901
         A dictionary mapping dimensions to chunk sizes.
     """
 
-    def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims):
-        # Approximate size of the chunks (equal across dims)
+    def _estimate_chunks(da, target_mb, size_of_slice, rechunk_dims):
+        # Divide the dimensions by the smallest dimension
+        min_dim = np.min([da[d].shape[0] for d in rechunk_dims])
+        ratio = {d: da[d].shape[0] / min_dim for d in rechunk_dims}
+
+        # Get the approximate number of chunks, supposing the chunks are cubes
         approx_chunks = np.power(target_mb / size_of_slice, 1 / len(rechunk_dims))
 
-        chunks_per_dim = dict()
-        if len(rechunk_dims) == 1:
-            rounding = (
-                1
-                if ds[rechunk_dims[0]].shape[0] <= 15
-                else 5 if ds[rechunk_dims[0]].shape[0] <= 250 else 10
-            )
-            chunks_per_dim[rechunk_dims[0]] = np.max(
-                [
-                    np.min(
-                        [
-                            int(rounding * np.round(approx_chunks / rounding)),
-                            ds[rechunk_dims[0]].shape[0],
-                        ]
-                    ),
-                    1,
-                ]
-            )
-        elif len(rechunk_dims) == 2:
-            # Adjust approx_chunks based on the ratio of the rectangle sizes
-            for d in rechunk_dims:
-                rounding = (
-                    1 if ds[d].shape[0] <= 15 else 5 if ds[d].shape[0] <= 250 else 10
-                )
-                adjusted_chunk = int(
-                    rounding
-                    * np.round(
-                        approx_chunks
-                        * (
-                            ds[d].shape[0]
-                            / np.prod(
-                                [
-                                    ds[dd].shape[0]
-                                    for dd in rechunk_dims
-                                    if dd not in [d]
-                                ]
-                            )
-                        )
-                        / rounding
-                    )
-                )
-                chunks_per_dim[d] = np.max(
-                    [np.min([adjusted_chunk, ds[d].shape[0]]), 1]
-                )
-        else:
-            raise NotImplementedError(
-                "estimating chunks on more than 2 dimensions is not implemented yet."
-            )
+        # Redistribute the chunks based on the ratio of the dimensions
+        x = (approx_chunks ** len(rechunk_dims) / np.prod(list(ratio.values()))) ** (
+            1 / len(rechunk_dims)
+        )
+        rounding_per_dim = {
+            d: 1 if da[d].shape[0] <= 15 else 5 if da[d].shape[0] <= 250 else 10
+            for d in rechunk_dims
+        }
+        chunks_per_dim = {
+            d: int(rounding_per_dim[d] * np.round(x * ratio[d] / rounding_per_dim[d]))
+            for d in rechunk_dims
+        }
+        chunks_per_dim = {
+            d: np.max([np.min([chunks_per_dim[d], da[d].shape[0]]), 1])
+            for d in rechunk_dims
+        }
 
         return chunks_per_dim
 
@@ -163,7 +135,7 @@ def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims):
         for v in ds.variables:
             # Find dimensions to chunk
             rechunk_dims = list(set(dims).intersection(ds.variables[v].dimensions))
-            if not rechunk_dims:
+            if not rechunk_dims or v in ds.dimensions:
                 continue
 
             dtype_size = ds.variables[v].datatype.itemsize
@@ -219,7 +191,7 @@ def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims):
 def subset_maxsize(
     ds: xr.Dataset,
     maxsize_gb: float,
-) -> list:
+) -> list[xr.Dataset]:
     """Estimate a dataset's size and, if higher than the given limit, subset it alongside the 'time' dimension.
 
     Parameters
@@ -232,7 +204,7 @@ def subset_maxsize(
 
     Returns
     -------
-    list
+    list of xr.Dataset
         List of xr.Dataset subsetted alongside 'time' to limit the filesize to the requested maximum.
     """
     # Estimate the size of the dataset
@@ -247,11 +219,11 @@ def subset_maxsize(
         logger.info(msg)
         return [ds]
 
-    elif "time" in ds:
+    elif "time" in ds.dims:
         years = np.unique(ds.time.dt.year)
-        ratio = int(len(years) / (size_of_file / maxsize_gb))
+        ratio = np.max([int(len(years) / (size_of_file / maxsize_gb)), 1])
         ds_sub = []
-        for y in range(years[0], years[-1], ratio):
+        for y in range(years[0], years[-1] + 1, ratio):
             ds_sub.extend([ds.sel({"time": slice(str(y), str(y + ratio - 1))})])
         return ds_sub
 
@@ -261,7 +233,11 @@ def subset_maxsize(
         )
 
 
-def clean_incomplete(path: str | os.PathLike, complete: Sequence[str]) -> None:
+def clean_incomplete(
+    path: str | os.PathLike,
+    complete: Sequence[str] | None = None,
+    incomplete: Sequence[str] | None = None,
+) -> None:
     """Delete un-catalogued variables from a zarr folder.
 
     The goal of this function is to clean up an incomplete calculation.
@@ -272,22 +248,42 @@ def clean_incomplete(path: str | os.PathLike, complete: Sequence[str]) -> None:
     ----------
     path : str, Path
         A path to a zarr folder.
-    complete : sequence of strings
-        Name of variables that were completed.
+    complete : sequence of strings, optional
+        Name of variables that were completed. All other variables (except coordinates) will be removed.
+        Use either `complete` or `incomplete`, not both.
+    incomplete : sequence of strings, optional
+        Name of variables that should be removed.
+        Use either `complete` or `incomplete`, not both.
 
     Returns
     -------
     None
     """
     path = Path(path)
-    with xr.open_zarr(path) as ds:
-        complete = set(complete).union(ds.coords.keys())
 
-    for fold in filter(lambda p: p.is_dir(), path.iterdir()):
-        if fold.name not in complete:
-            msg = f"Removing {fold} from disk"
-            logger.warning(msg)
-            sh.rmtree(fold)
+    if complete is not None and incomplete is not None:
+        raise ValueError("Use either `complete` or `incomplete`, not both.")
+
+    if complete is not None:
+        with xr.open_zarr(path) as ds:
+            complete = set(complete).union(ds.coords.keys())
+
+        for fold in filter(lambda p: p.is_dir(), path.iterdir()):
+            if fold.name not in complete:
+                msg = f"Removing {fold} from disk"
+                logger.warning(msg)
+                sh.rmtree(fold)
+
+    elif incomplete is not None:
+        for fold in filter(lambda p: p.is_dir(), path.iterdir()):
+            if fold.name in incomplete:
+                msg = f"Removing {fold} from disk"
+                logger.warning(msg)
+                sh.rmtree(fold)
+
+    # Remove .zmetadata to avoid issues with zarr and xarray
+    if (path / ".zmetadata").exists():
+        Path.unlink(path / ".zmetadata")
 
 
 def _coerce_attrs(attrs):
@@ -319,9 +315,11 @@ def round_bits(da: xr.DataArray, keepbits: int):
     keepbits : int
         The number of bits of the mantissa to keep.
     """
+    encoding = da.encoding
     da = xr.apply_ufunc(
         _np_bitround, da, keepbits, dask="parallelized", keep_attrs=True
     )
+    da.encoding = encoding
     da.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] = keepbits
     new_history = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Data compressed with BitRound by keeping {keepbits} bits."
     history = (
@@ -510,10 +508,7 @@ def _skip(var):
 
         if mode == "o":
             if exists:
-                var_path = path / var
-                msg = f"Removing {var_path} to overwrite."
-                logger.warning(msg)
-                sh.rmtree(var_path)
+                clean_incomplete(path, incomplete=[var])
             return False
 
         if mode == "a":
@@ -562,9 +557,7 @@ def _skip(var):
                 )
             except TimeoutException:
                 if timeout_cleanup:
-                    msg = f"Removing incomplete {name}."
-                    logger.info(msg)
-                    sh.rmtree(path / name)
+                    clean_incomplete(path, incomplete=[name])
                 raise
 
     else:
@@ -576,10 +569,7 @@ def _skip(var):
             )
         except TimeoutException:
             if timeout_cleanup:
-                msg = f"Removing incomplete {list(ds.data_vars.keys())} for {filename}."
-                logger.info(msg)
-                for name in ds.data_vars:
-                    sh.rmtree(path / name)
+                clean_incomplete(path, incomplete=list(ds.data_vars.keys()))
             raise
 
 
@@ -952,7 +942,7 @@ def rechunk_for_saving(ds: xr.Dataset, rechunk: dict):
             for d in ds[rechunk_var].dims
         )
         ds[rechunk_var].encoding.pop("chunks", None)
-        ds[rechunk_var].encoding.pop("preferred_chunks", None)
+        ds[rechunk_var].encoding["preferred_chunks"] = rechunk_dims
 
     return ds
 
diff --git a/src/xscen/testing.py b/src/xscen/testing.py
index 232437e1..a04cee31 100644
--- a/src/xscen/testing.py
+++ b/src/xscen/testing.py
@@ -1,12 +1,19 @@
 """Testing utilities for xscen."""
 
+import os
+import re
+from io import StringIO
+from pathlib import Path
+from typing import TextIO
+
 import cartopy.crs as ccrs
 import numpy as np
 import pandas as pd
 import xarray as xr
 from xclim.testing.helpers import test_timeseries as timeseries
+from xclim.testing.utils import show_versions as _show_versions
 
-__all__ = ["datablock_3d", "fake_data"]
+__all__ = ["datablock_3d", "fake_data", "publish_release_notes", "show_versions"]
 
 
 def datablock_3d(
@@ -242,3 +249,185 @@ def fake_data(
     data = data + offset - (np.random.random() * amplitude - amplitude / 2)
 
     return data
+
+
+def publish_release_notes(
+    style: str = "md",
+    file: os.PathLike | StringIO | TextIO | None = None,
+    changes: str | os.PathLike | None = None,
+    latest: bool = True,
+) -> str | None:
+    """Format release history in Markdown or ReStructuredText.
+
+    Parameters
+    ----------
+    style : {"rst", "md"}
+        Use ReStructuredText (`rst`) or Markdown (`md`) formatting. Default: Markdown.
+    file : {os.PathLike, StringIO, TextIO, None}
+        If provided, prints to the given file-like object. Otherwise, returns a string.
+    changes : {str, os.PathLike}, optional
+        If provided, manually points to the file where the changelog can be found.
+        Assumes a relative path otherwise.
+    latest : bool
+        Whether to return the release notes of the latest version or all the content of the changelog.
+
+    Returns
+    -------
+    str, optional
+
+    Notes
+    -----
+    This function exists solely for development purposes. Adapted from xclim.testing.utils.publish_release_notes.
+    """
+    if isinstance(changes, str | Path):
+        changes_file = Path(changes).absolute()
+    else:
+        changes_file = Path(__file__).absolute().parents[2].joinpath("CHANGELOG.rst")
+
+    if not changes_file.exists():
+        raise FileNotFoundError("Changes file not found in xscen file tree.")
+
+    with Path(changes_file).open(encoding="utf-8") as f:
+        changes = f.read()
+
+    if style == "rst":
+        hyperlink_replacements = {
+            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/Ouranosinc/xscen/issues/\1>`_",
+            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/Ouranosinc/xscen/pull/\>`_",
+            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
+        }
+    elif style == "md":
+        hyperlink_replacements = {
+            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xscen/issues/\1)",
+            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xscen/pull/\1)",
+            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
+        }
+    else:
+        raise NotImplementedError()
+
+    for search, replacement in hyperlink_replacements.items():
+        changes = re.sub(search, replacement, changes)
+
+    if latest:
+        changes_split = changes.split("\n\nv0.")
+        changes = changes_split[0] + "\n\nv0." + changes_split[1]
+
+    if style == "md":
+        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
+
+        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
+        for title_expression, level in titles.items():
+            found = re.findall(title_expression, changes)
+            for grouping in found:
+                fixed_grouping = (
+                    str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
+                )
+                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
+                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
+                changes = re.sub(search, replacement, changes)
+
+        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
+        found = re.findall(link_expressions, changes)
+        for grouping in found:
+            search = rf"`{grouping[0]} <.+>`\_"
+            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
+            changes = re.sub(search, replacement, changes)
+
+    if not file:
+        return changes
+    if isinstance(file, Path | os.PathLike):
+        file = Path(file).open("w")
+    print(changes, file=file)
+
+
+def show_versions(
+    file: os.PathLike | StringIO | TextIO | None = None,
+    deps: list | None = None,
+) -> str | None:
+    """Print the versions of xscen and its dependencies.
+
+    Parameters
+    ----------
+    file : {os.PathLike, StringIO, TextIO}, optional
+        If provided, prints to the given file-like object. Otherwise, returns a string.
+    deps : list, optional
+        A list of dependencies to gather and print version information from. Otherwise, prints `xscen` dependencies.
+
+    Returns
+    -------
+    str or None
+    """
+    if deps is None:
+        deps = [
+            "xscen",
+            # Main packages
+            "cartopy",
+            "cftime",
+            "cf_xarray",
+            "clisops",
+            "dask",
+            "flox",
+            "fsspec",
+            "geopandas",
+            "h5netcdf",
+            "h5py",
+            "intake_esm",
+            "matplotlib",
+            "netCDF4",
+            "numcodecs",
+            "numpy",
+            "pandas",
+            "parse",
+            "pyyaml",
+            "rechunker",
+            "scipy",
+            "shapely",
+            "sparse",
+            "toolz",
+            "xarray",
+            "xclim",
+            "xesmf",
+            "zarr",
+            # Opt
+            "nc-time-axis",
+            "pyarrow",
+            # Dev
+            "babel",
+            "black",
+            "blackdoc",
+            "bump-my-version",
+            "coverage",
+            "coveralls",
+            "flake8",
+            "flake8-rst-docstrings",
+            "ipykernel",
+            "ipython",
+            "isort",
+            "jupyter_client",
+            "nbsphinx",
+            "nbval",
+            "pandoc",
+            "pooch",
+            "pre-commit",
+            "pytest",
+            "pytest-cov",
+            "ruff",
+            "setuptools",
+            "setuptools-scm",
+            "sphinx",
+            "sphinx-autoapi",
+            "sphinx-rtd-theme",
+            "sphinxcontrib-napoleon",
+            "sphinx-codeautolink",
+            "sphinx-copybutton",
+            "sphinx-mdinclude",
+            "watchdog",
+            "xdoctest",
+            "tox",
+            "build",
+            "wheel",
+            "pip",
+            "flake8-alphabetize",
+        ]
+
+    return _show_versions(file=file, deps=deps)
diff --git a/src/xscen/utils.py b/src/xscen/utils.py
index d68bebec..c9e86701 100644
--- a/src/xscen/utils.py
+++ b/src/xscen/utils.py
@@ -11,11 +11,9 @@
 from collections.abc import Sequence
 from copy import deepcopy
 from datetime import datetime
-from io import StringIO
 from itertools import chain
 from pathlib import Path
 from types import ModuleType
-from typing import TextIO
 
 import cftime
 import flox.xarray
@@ -28,7 +26,6 @@
 from xclim.core.options import METADATA_LOCALES
 from xclim.core.options import OPTIONS as XC_OPTIONS
 from xclim.core.utils import uses_dask
-from xclim.testing.utils import show_versions as _show_versions
 
 from .config import parse_config
 
@@ -46,7 +43,6 @@
     "maybe_unstack",
     "minimum_calendar",
     "natural_sort",
-    "publish_release_notes",
     "stack_drop_nans",
     "standardize_periods",
     "translate_time_chunk",
@@ -1089,88 +1085,6 @@ def clean_up(  # noqa: C901
     return ds
 
 
-def publish_release_notes(
-    style: str = "md",
-    file: os.PathLike | StringIO | TextIO | None = None,
-    changes: str | os.PathLike | None = None,
-) -> str | None:
-    """Format release history in Markdown or ReStructuredText.
-
-    Parameters
-    ----------
-    style : {"rst", "md"}
-        Use ReStructuredText (`rst`) or Markdown (`md`) formatting. Default: Markdown.
-    file : {os.PathLike, StringIO, TextIO, None}
-        If provided, prints to the given file-like object. Otherwise, returns a string.
-    changes : {str, os.PathLike}, optional
-        If provided, manually points to the file where the changelog can be found.
-        Assumes a relative path otherwise.
-
-    Returns
-    -------
-    str, optional
-
-    Notes
-    -----
-    This function exists solely for development purposes. Adapted from xclim.testing.utils.publish_release_notes.
-    """
-    if isinstance(changes, str | Path):
-        changes_file = Path(changes).absolute()
-    else:
-        changes_file = Path(__file__).absolute().parents[2].joinpath("CHANGELOG.rst")
-
-    if not changes_file.exists():
-        raise FileNotFoundError("Changes file not found in xscen file tree.")
-
-    with Path(changes_file).open(encoding="utf-8") as f:
-        changes = f.read()
-
-    if style == "rst":
-        hyperlink_replacements = {
-            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/Ouranosinc/xscen/issues/\1>`_",
-            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/Ouranosinc/xscen/pull/\>`_",
-            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
-        }
-    elif style == "md":
-        hyperlink_replacements = {
-            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xscen/issues/\1)",
-            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xscen/pull/\1)",
-            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
-        }
-    else:
-        raise NotImplementedError()
-
-    for search, replacement in hyperlink_replacements.items():
-        changes = re.sub(search, replacement, changes)
-
-    if style == "md":
-        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
-
-        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
-        for title_expression, level in titles.items():
-            found = re.findall(title_expression, changes)
-            for grouping in found:
-                fixed_grouping = (
-                    str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
-                )
-                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
-                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
-                changes = re.sub(search, replacement, changes)
-
-        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
-        found = re.findall(link_expressions, changes)
-        for grouping in found:
-            search = rf"`{grouping[0]} <.+>`\_"
-            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
-            changes = re.sub(search, replacement, changes)
-
-    if not file:
-        return changes
-    if isinstance(file, Path | os.PathLike):
-        file = Path(file).open("w")
-    print(changes, file=file)
-
-
 def unstack_dates(  # noqa: C901
     ds: xr.Dataset,
     seasons: dict[int, str] | None = None,
@@ -1346,99 +1260,6 @@ def reshape_da(da):
     return dso.assign_coords(**new_coords)
 
 
-def show_versions(
-    file: os.PathLike | StringIO | TextIO | None = None,
-    deps: list | None = None,
-) -> str | None:
-    """Print the versions of xscen and its dependencies.
-
-    Parameters
-    ----------
-    file : {os.PathLike, StringIO, TextIO}, optional
-        If provided, prints to the given file-like object. Otherwise, returns a string.
-    deps : list, optional
-        A list of dependencies to gather and print version information from. Otherwise, prints `xscen` dependencies.
-
-    Returns
-    -------
-    str or None
-    """
-    if deps is None:
-        deps = [
-            "xscen",
-            # Main packages
-            "cartopy",
-            "cftime",
-            "cf_xarray",
-            "clisops",
-            "dask",
-            "flox",
-            "fsspec",
-            "geopandas",
-            "h5netcdf",
-            "h5py",
-            "intake_esm",
-            "matplotlib",
-            "netCDF4",
-            "numcodecs",
-            "numpy",
-            "pandas",
-            "parse",
-            "pyyaml",
-            "rechunker",
-            "scipy",
-            "shapely",
-            "sparse",
-            "toolz",
-            "xarray",
-            "xclim",
-            "xesmf",
-            "zarr",
-            # Opt
-            "nc-time-axis",
-            "pyarrow",
-            # Dev
-            "babel",
-            "black",
-            "blackdoc",
-            "bump-my-version",
-            "coverage",
-            "coveralls",
-            "flake8",
-            "flake8-rst-docstrings",
-            "ipykernel",
-            "ipython",
-            "isort",
-            "jupyter_client",
-            "nbsphinx",
-            "nbval",
-            "pandoc",
-            "pooch",
-            "pre-commit",
-            "pytest",
-            "pytest-cov",
-            "ruff",
-            "setuptools",
-            "setuptools-scm",
-            "sphinx",
-            "sphinx-autoapi",
-            "sphinx-rtd-theme",
-            "sphinxcontrib-napoleon",
-            "sphinx-codeautolink",
-            "sphinx-copybutton",
-            "sphinx-mdinclude",
-            "watchdog",
-            "xdoctest",
-            "tox",
-            "build",
-            "wheel",
-            "pip",
-            "flake8-alphabetize",
-        ]
-
-    return _show_versions(file=file, deps=deps)
-
-
 def ensure_correct_time(ds: xr.Dataset, xrfreq: str) -> xr.Dataset:
     """Ensure a dataset has the correct time coordinate, as expected for the given frequency.
 
@@ -1616,3 +1437,29 @@ def rechunk_for_resample(obj: xr.DataArray | xr.Dataset, **resample_kwargs):
 
     res = obj.resample(**resample_kwargs)
     return flox.xarray.rechunk_for_blockwise(obj, res._dim, res._codes)
+
+
+def publish_release_notes(*args, **kwargs):
+    """Backward compatibility for the old function."""
+    warnings.warn(
+        "'xscen.utils.publish_release_notes' has been moved to 'xscen.testing.publish_release_notes'."
+        "Support for this function will be removed in xscen v0.12.0.",
+        FutureWarning,
+    )
+
+    from .testing import publish_release_notes as prn
+
+    return prn(*args, **kwargs)
+
+
+def show_versions(*args, **kwargs):
+    """Backward compatibility for the old function."""
+    warnings.warn(
+        "'xscen.utils.show_versions' has been moved to 'xscen.testing.show_versions'."
+        "Support for this function will be removed in xscen v0.12.0.",
+        FutureWarning,
+    )
+
+    from .testing import show_versions as sv
+
+    return sv(*args, **kwargs)
diff --git a/tests/test_io.py b/tests/test_io.py
index 7db611c9..f486de09 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,9 +1,147 @@
+import os
+from pathlib import Path
+
 import numpy as np
 import pytest
 import xarray as xr
 import xclim as xc
+from xclim.testing.helpers import test_timeseries as timeseries
 
 import xscen as xs
+from xscen.testing import datablock_3d
+
+
+@pytest.mark.parametrize("suffix", [".zarr", ".zarr.zip", "h5", "nc"])
+def test_get_engine(tmpdir, suffix):
+    if suffix in [".zarr", ".zarr.zip"]:
+        path = "some/path" + suffix
+        assert xs.io.get_engine(path) == "zarr"
+    else:
+        ds = timeseries(
+            np.zeros(60),
+            variable="tas",
+            as_dataset=True,
+        )
+        ds.to_netcdf(
+            Path(tmpdir) / f"test.nc",
+            engine="netcdf4" if suffix == "nc" else "h5netcdf",
+        )
+        assert xs.io.get_engine(Path(tmpdir) / f"test.nc") in [
+            "netcdf4",
+            "h5netcdf",
+        ]  # Hard to predict which one
+
+
+class TestEstimateChunks:
+    ds = datablock_3d(
+        np.zeros((50, 100, 150)),
+        variable="tas",
+        x="lon",
+        x_start=-70,
+        x_step=0.1,
+        y="lat",
+        y_start=45,
+        y_step=-0.1,
+        as_dataset=True,
+    )
+    ds2 = ds.copy()
+    ds2["tas"] = ds2["tas"].astype(np.float32)
+
+    def test_normal(self):
+        out1 = xs.io.estimate_chunks(self.ds, dims=["time", "lat", "lon"], target_mb=1)
+        assert out1 == {"time": 30, "lat": 55, "lon": 85}
+        out2 = xs.io.estimate_chunks(self.ds2, dims=["time", "lat", "lon"], target_mb=1)
+        assert out2 == {"time": 35, "lat": 70, "lon": 105}
+        out3 = xs.io.estimate_chunks(self.ds, dims=["lat", "lon"], target_mb=1)
+        assert out3 == {"lon": 65, "lat": 40, "time": -1}
+        out4 = xs.io.estimate_chunks(self.ds2, dims=["time"], target_mb=1)
+        assert out4 == {"time": 15, "lat": -1, "lon": -1}
+
+    @pytest.mark.parametrize("chunk_per_variable", [True, False])
+    @pytest.mark.parametrize("as_file", [True, False])
+    def test_multiple_vars(self, tmpdir, chunk_per_variable, as_file):
+        ds = self.ds.copy()
+        ds["pr"] = ds["tas"].isel(time=0)
+
+        if as_file:
+            ds.to_netcdf(Path(tmpdir) / "test.nc")
+            ds = Path(tmpdir) / "test.nc"
+
+        out = xs.io.estimate_chunks(
+            ds, dims=["lat", "lon"], target_mb=1, chunk_per_variable=chunk_per_variable
+        )
+        if chunk_per_variable is False:
+            assert out == {"lon": 65, "lat": 40, "time": -1}
+        else:
+            assert out == {
+                "tas": {"lon": 65, "lat": 40, "time": -1},
+                "pr": {"lon": 150, "lat": 100},
+            }
+
+
+class TestSubsetMaxsize:
+    def test_normal(self):
+        ds = datablock_3d(
+            np.zeros((1500, 5, 5)),
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            x_step=0.1,
+            y="lat",
+            y_start=45,
+            y_step=-0.1,
+            as_dataset=True,
+        )
+        ds["pr"] = ds["tas"]
+        # First, test with a dataset that is already small enough
+        out = xs.io.subset_maxsize(ds, maxsize_gb=1)
+        assert len(out) == 1
+        assert out[0].equals(ds)
+
+        out = xs.io.subset_maxsize(ds, maxsize_gb=0.0005)
+        assert len(out) == 2
+        assert xr.concat(out, dim="time").equals(ds)
+
+    def test_error(self):
+        ds = datablock_3d(
+            np.zeros((1, 50, 10)),
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            x_step=0.1,
+            y="lat",
+            y_start=45,
+            y_step=-0.1,
+            as_dataset=True,
+        )
+        ds = ds.isel(time=0)
+
+        with pytest.raises(NotImplementedError, match="does not contain a"):
+            xs.io.subset_maxsize(ds, maxsize_gb=1e-15)
+
+
+def test_clean_incomplete(tmpdir):
+    ds = datablock_3d(
+        np.ones((5, 5, 5)),
+        variable="tas",
+        x="lon",
+        x_start=-70,
+        x_step=0.1,
+        y="lat",
+        y_start=45,
+        y_step=-0.1,
+        as_dataset=True,
+    )
+    ds["pr"] = ds["tas"].copy()
+    ds.to_zarr(Path(tmpdir) / "test.zarr")
+
+    xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", complete=["tas"])
+    assert Path.exists(Path(tmpdir) / "test.zarr/tas")
+    assert not Path.exists(Path(tmpdir) / "test.zarr/pr")
+
+    ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+    assert "pr" not in ds2
+    assert ds2.equals(ds[["tas"]])
 
 
 class TestRechunkForSaving:
@@ -205,7 +343,7 @@ def test_make_toc(self, as_dataset):
             assert toc.loc["tas", "Unités"] == "K"
 
 
-def test_round_bits(datablock_3d):
+def test_round_bits():
     da = datablock_3d(
         np.random.random((30, 30, 50)),
         variable="tas",
@@ -241,3 +379,41 @@ def test_guess_bitround(self, vname, vtype, bitr, exp):
                 xs.io._get_keepbits(bitr, vname, vtype)
         else:
             assert xs.io._get_keepbits(bitr, vname, vtype) == exp
+
+
+class TestSaveToNetcdf:
+    def test_normal(self, tmpdir):
+        ds = datablock_3d(
+            np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7,
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            y="lat",
+            y_start=45,
+            as_dataset=True,
+        )
+        ds["pr"] = ds["tas"].copy()
+        ds["other"] = ds["tas"].copy()
+
+        xs.save_to_netcdf(
+            ds,
+            Path(tmpdir) / "test.nc",
+            rechunk={"time": 5, "lon": 2, "lat": 2},
+            bitround={"tas": 2, "pr": 3},
+        )
+
+        ds2 = xr.open_dataset(Path(tmpdir) / "test.nc", chunks={})
+        assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,))
+
+        np.testing.assert_array_almost_equal(
+            ds2.tas.isel(time=0, lat=0, lon=0), [0.00010681], decimal=8
+        )
+        assert ds2.tas.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 2
+        np.testing.assert_array_almost_equal(
+            ds2.pr.isel(time=0, lat=0, lon=0), [0.00011444], decimal=8
+        )
+        assert ds2.pr.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 3
+        np.testing.assert_array_almost_equal(
+            ds2.other.isel(time=0, lat=0, lon=0), [0.0001111], decimal=8
+        )
+        assert ds2.other.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 12
diff --git a/tests/test_testing.py b/tests/test_testing.py
new file mode 100644
index 00000000..74006677
--- /dev/null
+++ b/tests/test_testing.py
@@ -0,0 +1,66 @@
+from pathlib import Path
+
+import pytest
+
+import xscen as xs
+
+
+class TestPublish:
+    @pytest.mark.requires_netcdf
+    @pytest.mark.parametrize("fmt", ["md", "rst"])
+    def test_normal(self, fmt):
+        out = xs.testing.publish_release_notes(
+            fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst")
+        )
+        if fmt == "md":
+            assert out.startswith("# Changelog\n\n")
+            assert "[PR/413](https://github.com/Ouranosinc/xscen/pull/413)" in out
+        elif fmt == "rst":
+            assert out.startswith("=========\nChangelog\n=========\n\n")
+            assert "`PR/413 <https://github.com/Ouranosinc/xscen/pull/\\>`_" in out
+
+    def test_error(self):
+        with pytest.raises(FileNotFoundError):
+            xs.testing.publish_release_notes("md", changes="foo")
+        with pytest.raises(NotImplementedError):
+            xs.testing.publish_release_notes(
+                "foo", changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst")
+            )
+
+    @pytest.mark.requires_netcdf
+    def test_file(self, tmpdir):
+        xs.testing.publish_release_notes(
+            "md",
+            file=tmpdir / "foo.md",
+            changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"),
+        )
+        with Path(tmpdir).joinpath("foo.md").open(encoding="utf-8") as f:
+            assert f.read().startswith("# Changelog\n\n")
+
+    @pytest.mark.parametrize("latest", [True, False])
+    @pytest.mark.requires_netcdf
+    def test_latest(self, tmpdir, latest):
+        out = xs.testing.publish_release_notes(
+            "md",
+            file=tmpdir / "foo.md",
+            changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"),
+            latest=latest,
+        )
+        if latest:
+            assert len(out.split("\n\n## v0.")) == 2
+        else:
+            assert len(out.split("\n\n## v0.")) > 2
+
+
+def test_show_version(tmpdir):
+    xs.testing.show_versions(file=tmpdir / "versions.txt")
+    with Path(tmpdir).joinpath("versions.txt").open(encoding="utf-8") as f:
+        out = f.read()
+    assert "xscen" in out
+    assert "xclim" in out
+    assert "xarray" in out
+    assert "numpy" in out
+    assert "pandas" in out
+    assert "dask" in out
+    assert "cftime" in out
+    assert "netCDF4" in out
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b5fff0f8..da978353 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -825,39 +825,6 @@ def test_change_prefix(self, change_prefix):
             }
 
 
-class TestPublish:
-    @pytest.mark.requires_netcdf
-    @pytest.mark.parametrize("fmt", ["md", "rst"])
-    def test_normal(self, fmt):
-        out = xs.utils.publish_release_notes(
-            fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst")
-        )
-        if fmt == "md":
-            assert out.startswith("# Changelog\n\n")
-            assert "[PR/413](https://github.com/Ouranosinc/xscen/pull/413)" in out
-        elif fmt == "rst":
-            assert out.startswith("=========\nChangelog\n=========\n\n")
-            assert "`PR/413 <https://github.com/Ouranosinc/xscen/pull/\\>`_" in out
-
-    def test_error(self):
-        with pytest.raises(FileNotFoundError):
-            xs.utils.publish_release_notes("md", changes="foo")
-        with pytest.raises(NotImplementedError):
-            xs.utils.publish_release_notes(
-                "foo", changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst")
-            )
-
-    @pytest.mark.requires_netcdf
-    def test_file(self, tmpdir):
-        xs.utils.publish_release_notes(
-            "md",
-            file=tmpdir / "foo.md",
-            changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"),
-        )
-        with Path(tmpdir).joinpath("foo.md").open(encoding="utf-8") as f:
-            assert f.read().startswith("# Changelog\n\n")
-
-
 class TestUnstackDates:
     @pytest.mark.parametrize(
         "freq", ["MS", "2MS", "3MS", "QS-DEC", "QS", "2QS", "YS", "YS-DEC", "4YS"]
@@ -1043,20 +1010,6 @@ def test_errors(self):
             xs.utils.unstack_dates(ds)
 
 
-def test_show_version(tmpdir):
-    xs.utils.show_versions(file=tmpdir / "versions.txt")
-    with Path(tmpdir).joinpath("versions.txt").open(encoding="utf-8") as f:
-        out = f.read()
-    assert "xscen" in out
-    assert "xclim" in out
-    assert "xarray" in out
-    assert "numpy" in out
-    assert "pandas" in out
-    assert "dask" in out
-    assert "cftime" in out
-    assert "netCDF4" in out
-
-
 class TestEnsureTime:
     def test_xrfreq_ok(self):
         ds = timeseries(

From 0bfe1bd02de2d8695868c87cba334f7bd59e2da3 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Mon, 11 Nov 2024 17:07:20 -0500
Subject: [PATCH 02/39] more tests and fixes

---
 environment-dev.yml |   1 +
 environment.yml     |   1 +
 pyproject.toml      |   1 +
 src/xscen/io.py     |  44 +++--
 tests/test_io.py    | 399 +++++++++++++++++++++++++++++++++++++-------
 5 files changed, 378 insertions(+), 68 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 5b06e841..35dd5d2a 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -21,6 +21,7 @@ dependencies:
   - netCDF4
   - numcodecs
   - numpy >=1.24
+  - openpyxl
   - pandas >=2.2
   - parse
   - pyyaml
diff --git a/environment.yml b/environment.yml
index 3e4232f3..b19c78ea 100644
--- a/environment.yml
+++ b/environment.yml
@@ -21,6 +21,7 @@ dependencies:
   - netCDF4
   - numcodecs
   - numpy >=1.24
+  - openpyxl
   - pandas >=2.2
   - parse
   - pyyaml
diff --git a/pyproject.toml b/pyproject.toml
index fc5f6214..20d61b7c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,7 @@ dependencies = [
   "netCDF4",
   "numcodecs",
   "numpy >=1.24",
+  "openpyxl",
   "pandas >=2.2",
   "parse",
   # Used when opening catalogs.
diff --git a/src/xscen/io.py b/src/xscen/io.py
index b93dd4f8..d94d8d83 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -454,7 +454,8 @@ def save_to_zarr(  # noqa: C901
         if 'o', removes the existing variables.
         if 'a', skip existing variables, writes the others.
     encoding : dict, optional
-        If given, skipped variables are popped in place.
+        If given here instead of 'zarr_kwargs', encoding will only be applied to the variables that are being written,
+        skipping those that are already in the zarr.
     bitround : bool or int or dict
         If not False, float variables are bit-rounded by dropping a certain number of bits from their mantissa,
         allowing for a much better compression.
@@ -512,8 +513,21 @@ def _skip(var):
             return False
 
         if mode == "a":
+            # In all cases, we need to skip the encoding of existing variables.
+            if exists:
+                if encoding:
+                    encoding.pop(var, None)
+
+            # If we are not appending, we need to skip the writing of existing variables.
             if "append_dim" not in zarr_kwargs:
                 return exists
+
+            # If we are appending, we need to raise an error if there are new variables.
+            elif exists is False:
+                raise ValueError(
+                    f"When 'append_dim' is set in zarr_kwargs, all variables must already exist in the dataset."
+                )
+
             return False
 
     for var in list(ds.data_vars.keys()):
@@ -521,8 +535,7 @@ def _skip(var):
             msg = f"Skipping {var} in {path}."
             logger.info(msg)
             ds = ds.drop_vars(var)
-            if encoding:
-                encoding.pop(var)
+            continue
         if keepbits := _get_keepbits(bitround, var, ds[var].dtype):
             ds = ds.assign({var: round_bits(ds[var], keepbits)})
         # Remove original_shape from encoding, since it can cause issues with some engines.
@@ -773,7 +786,6 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr
             for vv, da in ds.data_vars.items()
         ],
     ).set_index(_("Variable"))
-    toc.attrs["name"] = _("Content")
 
     # Add global attributes by using a fake variable and description
     if len(ds.attrs) > 0:
@@ -793,6 +805,7 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr
         toc = pd.concat([toc, pd.DataFrame(index=[""])])
         toc = pd.concat([toc, pd.DataFrame(index=[_("Global attributes")])])
         toc = pd.concat([toc, globattr])
+    toc.attrs["name"] = _("Content")
 
     return toc
 
@@ -800,13 +813,13 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr
 TABLE_FORMATS = {".csv": "csv", ".xls": "excel", ".xlsx": "excel"}
 
 
-def save_to_table(
+def save_to_table(  # noqa: C901
     ds: xr.Dataset | xr.DataArray,
     filename: str | os.PathLike,
     output_format: str | None = None,
     *,
     row: str | Sequence[str] | None = None,
-    column: None | str | Sequence[str] = "variable",
+    column: None | str | Sequence[str] = None,
     sheet: str | Sequence[str] | None = None,
     coords: bool | Sequence[str] = True,
     col_sep: str = "_",
@@ -814,7 +827,7 @@ def save_to_table(
     add_toc: bool | pd.DataFrame = False,
     **kwargs,
 ):
-    """Save the dataset to a tabular file (csv, excel, ...).
+    r"""Save the dataset to a tabular file (csv, excel, ...).
 
     This function will trigger a computation of the dataset.
 
@@ -835,7 +848,8 @@ def save_to_table(
       Default is all data dimensions.
     column : str or sequence of str, optional
       Name of the dimension(s) to use as columns.
-      Default is "variable", i.e. the name of the variable(s).
+      When using a Dataset with more than 1 variable, default is "variable", i.e. the name of the variable(s).
+      When using a DataArray, default is None.
     sheet : str or sequence of str, optional
       Name of the dimension(s) to use as sheet names.
       Only valid if the output format is excel.
@@ -851,7 +865,7 @@ def save_to_table(
       A table of content to add as the first sheet. Only valid if the output format is excel.
       If True, :py:func:`make_toc` is used to generate the toc.
       The sheet name of the toc can be given through the "name" attribute of the DataFrame, otherwise "Content" is used.
-    kwargs:
+    \*\*kwargs:
       Other arguments passed to the pandas function.
       If the output format is excel, kwargs to :py:class:`pandas.ExcelWriter` can be given here as well.
     """
@@ -864,6 +878,9 @@ def save_to_table(
             f"Output format could not be inferred from filename {filename.name}. Please pass `output_format`."
         )
 
+    if column is None and isinstance(ds, xr.Dataset) and len(ds.data_vars) > 1:
+        column = "variable"
+
     if sheet is not None and output_format != "excel":
         raise ValueError(
             f"Argument `sheet` is only valid with excel as the output format. Got {output_format}."
@@ -882,15 +899,22 @@ def save_to_table(
             add_toc = make_toc(ds)
         out = {(add_toc.attrs.get("name", "Content"),): add_toc, **out}
 
-    if sheet or (add_toc is not False):
+    # Get engine_kwargs
+    if output_format == "excel":
         engine_kwargs = {}  # Extract engine kwargs
         for arg in signature(pd.ExcelWriter).parameters:
             if arg in kwargs:
                 engine_kwargs[arg] = kwargs.pop(arg)
+    else:
+        engine_kwargs = {}
 
+    if sheet or (add_toc is not False):
         with pd.ExcelWriter(filename, **engine_kwargs) as writer:
             for sheet_name, df in out.items():
                 df.to_excel(writer, sheet_name=col_sep.join(sheet_name), **kwargs)
+    elif len(engine_kwargs) > 0:
+        with pd.ExcelWriter(filename, **engine_kwargs) as writer:
+            out.to_excel(writer, **kwargs)
     else:
         if output_format != "excel" and isinstance(out.columns, pd.MultiIndex):
             out.columns = out.columns.map(lambda lvls: col_sep.join(map(str, lvls)))
diff --git a/tests/test_io.py b/tests/test_io.py
index f486de09..c8376f73 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,7 +1,8 @@
-import os
+import datetime
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 import xclim as xc
@@ -46,6 +47,7 @@ class TestEstimateChunks:
     )
     ds2 = ds.copy()
     ds2["tas"] = ds2["tas"].astype(np.float32)
+    ds["just_a_variable"] = xr.DataArray(np.zeros(50), dims="new_dim")
 
     def test_normal(self):
         out1 = xs.io.estimate_chunks(self.ds, dims=["time", "lat", "lon"], target_mb=1)
@@ -120,28 +122,53 @@ def test_error(self):
             xs.io.subset_maxsize(ds, maxsize_gb=1e-15)
 
 
-def test_clean_incomplete(tmpdir):
-    ds = datablock_3d(
-        np.ones((5, 5, 5)),
-        variable="tas",
-        x="lon",
-        x_start=-70,
-        x_step=0.1,
-        y="lat",
-        y_start=45,
-        y_step=-0.1,
-        as_dataset=True,
-    )
-    ds["pr"] = ds["tas"].copy()
-    ds.to_zarr(Path(tmpdir) / "test.zarr")
+class TestCleanIncomplete:
+    @pytest.mark.parametrize("which", ["complete", "incomplete"])
+    def test_complete(self, tmpdir, which):
+        ds = datablock_3d(
+            np.ones((5, 5, 5)),
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            x_step=0.1,
+            y="lat",
+            y_start=45,
+            y_step=-0.1,
+            as_dataset=True,
+        )
+        ds["pr"] = ds["tas"].copy()
+        ds.to_zarr(Path(tmpdir) / "test.zarr")
 
-    xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", complete=["tas"])
-    assert Path.exists(Path(tmpdir) / "test.zarr/tas")
-    assert not Path.exists(Path(tmpdir) / "test.zarr/pr")
+        if which == "complete":
+            xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", complete=["tas"])
+        else:
+            xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", incomplete=["pr"])
+        assert (Path(tmpdir) / "test.zarr/tas").exists()
+        assert not (Path(tmpdir) / "test.zarr/pr").exists()
 
-    ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr")
-    assert "pr" not in ds2
-    assert ds2.equals(ds[["tas"]])
+        ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+        assert "pr" not in ds2
+        assert ds2.equals(ds[["tas"]])
+
+    def test_error(self, tmpdir):
+        ds = datablock_3d(
+            np.ones((5, 5, 5)),
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            x_step=0.1,
+            y="lat",
+            y_start=45,
+            y_step=-0.1,
+            as_dataset=True,
+        )
+        ds["pr"] = ds["tas"].copy()
+        ds.to_zarr(Path(tmpdir) / "test.zarr")
+
+        with pytest.raises(ValueError, match="Use either"):
+            xs.io.clean_incomplete(
+                Path(tmpdir) / "test.zarr", complete=["tas"], incomplete=["pr"]
+            )
 
 
 class TestRechunkForSaving:
@@ -154,7 +181,7 @@ class TestRechunkForSaving:
             (["rlon", "rlat"], False),
         ],
     )
-    def test_options(self, datablock_3d, dims, xy):
+    def test_options(self, dims, xy):
         ds = datablock_3d(
             np.random.random((30, 30, 50)),
             variable="tas",
@@ -176,7 +203,7 @@ def test_options(self, datablock_3d, dims, xy):
             )
             assert chunks[0] == new_chunks[dim]
 
-    def test_variables(self, datablock_3d):
+    def test_variables(self):
         ds = datablock_3d(
             np.random.random((30, 30, 50)),
             variable="tas",
@@ -211,7 +238,7 @@ class TestToTable:
         xr.merge(
             [
                 xs.testing.datablock_3d(
-                    np.random.random_sample((20, 3, 2)),
+                    np.ones((20, 3, 2)),
                     v,
                     "lon",
                     0,
@@ -234,7 +261,7 @@ class TestToTable:
     @pytest.mark.parametrize(
         "multiple, as_dataset", [(True, True), (False, True), (False, False)]
     )
-    def test_normal(self, multiple, as_dataset):
+    def test_normal(self, tmpdir, multiple, as_dataset):
         if multiple is False:
             if as_dataset:
                 ds = self.ds[["tas"]].copy()
@@ -244,9 +271,25 @@ def test_normal(self, multiple, as_dataset):
             ds = self.ds.copy()
 
         # Default
+        xs.save_to_table(ds, Path(tmpdir) / "test.csv")
+        saved = pd.read_csv(Path(tmpdir) / "test.csv")
         tab = xs.io.to_table(ds)
-        assert tab.shape == (120, 5 if multiple else 3)  # 3 vars + 2 aux coords
+
+        assert tab.shape == (
+            120,
+            5 if multiple else 3,
+        )  # 3 variables + 2 coords that are not dimensions
+        assert saved.shape == (
+            120,
+            8 if multiple else 6,
+        )  # everything gets mapped, so dimensions are included in the columns
         assert tab.columns.names == ["variable"] if multiple else [None]
+        assert (
+            set(saved.columns)
+            == {"season", "time", "site", "lat", "lon", "pr", "snw", "tas"}
+            if multiple
+            else {"season", "time", "site", "tas"}
+        )
         assert tab.index.names == ["season", "time", "site"]
         # Season order is chronological, rather than alphabetical
         np.testing.assert_array_equal(
@@ -255,13 +298,24 @@ def test_normal(self, multiple, as_dataset):
             .index.get_level_values("season"),
             ["JFM", "AMJ", "JAS", "OND"],
         )
+        np.testing.assert_array_equal(saved.loc[0, "season"], "JFM")
 
         if multiple:
             # Variable in the index, thus no coords
+            xs.save_to_table(
+                ds,
+                Path(tmpdir) / "test.xlsx",
+                row=["time", "variable"],
+                column=["season", "site"],
+                coords=False,
+            )
             tab = xs.io.to_table(
                 ds, row=["time", "variable"], column=["season", "site"], coords=False
             )
+            saved = pd.read_excel(Path(tmpdir) / "test.xlsx")
+
             assert tab.shape == (15, 24)
+            assert saved.shape == (17, 26)  # Because of the headers
             assert tab.columns.names == ["season", "site"]
             np.testing.assert_array_equal(
                 tab.loc[("1993", "pr"), ("JFM",)], ds.pr.sel(time="1993", season="JFM")
@@ -275,8 +329,23 @@ def test_normal(self, multiple, as_dataset):
                 )
                 == 0
             )
+            # Excel is not the prettiest thing to test
+            np.testing.assert_array_equal(saved.iloc[2, 2:], np.tile([1], 24))
+            assert saved.iloc[0, 2] == "a"
+            assert saved.iloc[2, 0] == datetime.datetime(1993, 1, 1, 0, 0)
 
-    def test_sheet(self):
+    def test_sheet(self, tmpdir):
+        xs.save_to_table(
+            self.ds,
+            Path(tmpdir) / "test.xlsx",
+            row=["time", "variable"],
+            column=["season"],
+            sheet="site",
+            coords=False,
+        )
+        saved = pd.read_excel(
+            Path(tmpdir) / "test.xlsx", sheet_name=["a", "b", "c", "d", "e", "f"]
+        )  # This is a test by itself
         tab = xs.io.to_table(
             self.ds,
             row=["time", "variable"],
@@ -284,31 +353,77 @@ def test_sheet(self):
             sheet="site",
             coords=False,
         )
+
         assert set(tab.keys()) == {("a",), ("b",), ("c",), ("d",), ("e",), ("f",)}
         assert tab[("a",)].shape == (15, 4)  # 5 time * 3 variable X 4 season
+        assert saved["a"].shape == (15, 6)  # Because of the headers
 
-    def test_error(self):
+    def test_kwargs(self, tmpdir):
+        xs.save_to_table(
+            self.ds,
+            Path(tmpdir) / "test.xlsx",
+            row=["time", "variable"],
+            column=["season", "site"],
+            coords=False,
+            datetime_format="dd/mm/yyyy",
+        )
+        saved = pd.read_excel(Path(tmpdir) / "test.xlsx")
+        assert saved.iloc[2, 0] == datetime.datetime(
+            1993, 1, 1, 0, 0
+        )  # No real way to test the format
+
+    def test_multiindex(self, tmpdir):
+        xs.save_to_table(
+            self.ds,
+            Path(tmpdir) / "test.csv",
+            row=["time", "variable"],
+            column=["season", "site"],
+            coords=False,
+            row_sep="|",
+            col_sep=";",
+        )
+        out = pd.read_csv(Path(tmpdir) / "test.csv")
+        assert out.shape == (15, 25)
+        assert out.columns[0] == "time|variable"
+        assert out.columns[1] == "JFM;a"
+
+    def test_error(self, tmpdir):
         with pytest.raises(ValueError, match="Repeated dimension names."):
-            xs.io.to_table(
-                self.ds, row=["time", "variable"], column=["season", "site", "time"]
+            xs.save_to_table(
+                self.ds,
+                Path(tmpdir) / "test.xlsx",
+                row=["time", "variable"],
+                column=["season", "site", "time"],
             )
         with pytest.raises(ValueError, match="Passed row, column and sheet"):
-            xs.io.to_table(
-                self.ds, row=["time", "variable"], column=["season", "site", "foo"]
+            xs.save_to_table(
+                self.ds,
+                Path(tmpdir) / "test.xlsx",
+                row=["time", "variable"],
+                column=["season", "site", "foo"],
             )
         with pytest.raises(
             NotImplementedError,
             match="Keeping auxiliary coords is not implemented when",
         ):
-            xs.io.to_table(
+            xs.save_to_table(
                 self.ds,
+                Path(tmpdir) / "test.xlsx",
                 row=["time", "variable"],
                 column=["season", "site"],
                 coords=True,
             )
+        with pytest.raises(ValueError, match="Output format could not be inferred"):
+            xs.save_to_table(self.ds, Path(tmpdir) / "test")
+        with pytest.raises(
+            ValueError, match="is only valid with excel as the output format"
+        ):
+            xs.save_to_table(self.ds, Path(tmpdir) / "test.csv", sheet="site")
+        with pytest.raises(ValueError, match="but the output format is not Excel."):
+            xs.save_to_table(self.ds, Path(tmpdir) / "test.csv", add_toc=True)
 
     @pytest.mark.parametrize("as_dataset", [True, False])
-    def test_make_toc(self, as_dataset):
+    def test_make_toc(self, tmpdir, as_dataset):
         ds = self.ds.copy()
         for v in ds.data_vars:
             ds[v].attrs["long_name"] = f"Long name for {v}"
@@ -318,7 +433,10 @@ def test_make_toc(self, as_dataset):
             ds = ds["tas"]
 
         with xc.set_options(metadata_locales="fr"):
-            toc = xs.io.make_toc(ds)
+            xs.save_to_table(ds, Path(tmpdir) / "test.xlsx", add_toc=True)
+
+        toc = pd.read_excel(Path(tmpdir) / "test.xlsx", sheet_name="Contenu")
+        toc = toc.set_index("Unnamed: 0" if as_dataset else "Variable")
 
         if as_dataset:
             assert toc.shape == (8, 2)
@@ -327,7 +445,7 @@ def test_make_toc(self, as_dataset):
                 "tas",
                 "pr",
                 "snw",
-                "",
+                np.nan,
                 "Attributs globaux",
                 "foo",
                 "baz",
@@ -380,11 +498,63 @@ def test_guess_bitround(self, vname, vtype, bitr, exp):
         else:
             assert xs.io._get_keepbits(bitr, vname, vtype) == exp
 
+    @pytest.mark.parametrize("mode", ["f", "o", "a"])
+    @pytest.mark.parametrize("itervar", [True, False])
+    def test_mode(self, tmpdir, mode, itervar):
+        ds1 = timeseries(
+            np.arange(1, 5),
+            variable="tas",
+            as_dataset=True,
+        )
+        xs.save_to_zarr(ds1, Path(tmpdir) / "test.zarr")
 
-class TestSaveToNetcdf:
-    def test_normal(self, tmpdir):
-        ds = datablock_3d(
-            np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7,
+        ds2 = timeseries(
+            np.arange(10, 14),
+            variable="tas",
+            as_dataset=True,
+        )
+        ds2["pr"] = ds2["tas"].copy()
+        ds2 = ds2[["pr", "tas"]]
+
+        if mode == "f":
+            with pytest.raises(ValueError, match="exists in dataset"):
+                xs.save_to_zarr(
+                    ds2, Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar
+                )
+            assert not (Path(tmpdir) / "test.zarr/pr").exists()
+            if itervar:
+                # Essentially just to reach 100% coverage and make sure the function doesn't crash with mode="f" and itervar=True
+                xs.save_to_zarr(
+                    ds2, Path(tmpdir) / "test2.zarr", mode=mode, itervar=itervar
+                )
+                ds3 = xr.open_zarr(Path(tmpdir) / "test2.zarr")
+                np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [10])
+                np.testing.assert_array_almost_equal(ds3.pr.isel(time=0), [10])
+
+        elif mode == "o":
+            xs.save_to_zarr(ds2, Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar)
+            ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+            np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [10])
+            np.testing.assert_array_almost_equal(ds3.pr.isel(time=0), [10])
+
+        elif mode == "a":
+            # First, try only with variables that are already in the dataset
+            xs.save_to_zarr(
+                ds2[["tas"]], Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar
+            )
+            ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+            np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [1])
+
+            # Now, try with a new variable
+            xs.save_to_zarr(ds2, Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar)
+            ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+            np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [1])
+            np.testing.assert_array_almost_equal(ds3.pr.isel(time=0), [10])
+
+    @pytest.mark.parametrize("append", [True, False])
+    def test_append(self, tmpdir, append):
+        ds1 = datablock_3d(
+            np.array([[[1, 2], [3, 4]]]),
             variable="tas",
             x="lon",
             x_start=-70,
@@ -392,28 +562,141 @@ def test_normal(self, tmpdir):
             y_start=45,
             as_dataset=True,
         )
-        ds["pr"] = ds["tas"].copy()
-        ds["other"] = ds["tas"].copy()
+        ds2 = datablock_3d(
+            np.array([[[11, 12], [13, 14]]]),
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            y="lat",
+            y_start=45,
+            start="2005-01-01",
+            as_dataset=True,
+        )
+        ds2["pr"] = ds2["tas"].copy()
+        xs.save_to_zarr(
+            ds1, Path(tmpdir) / "test.zarr", encoding={"tas": {"dtype": "float32"}}
+        )
+
+        encoding = {
+            "tas": {"dtype": "int32"}
+        }  # This should be ignored, as the variable is already in the dataset
+        if append:
+            with pytest.raises(
+                ValueError,
+                match="is set in zarr_kwargs, all variables must already exist in the dataset.",
+            ):
+                xs.save_to_zarr(
+                    ds2,
+                    Path(tmpdir) / "test.zarr",
+                    mode="a",
+                    zarr_kwargs={"append_dim": "time"},
+                    encoding=encoding,
+                )
+            xs.save_to_zarr(
+                ds2[["tas"]],
+                Path(tmpdir) / "test.zarr",
+                mode="a",
+                zarr_kwargs={"append_dim": "time"},
+                encoding=encoding,
+            )
+            out = xr.open_zarr(Path(tmpdir) / "test.zarr")
+            np.testing.assert_array_equal(
+                out.tas, np.array([[[1, 2], [3, 4]], [[11, 12], [13, 14]]])
+            )
+        else:
+            xs.save_to_zarr(
+                ds2, Path(tmpdir) / "test.zarr", mode="a", encoding=encoding
+            )
+            out = xr.open_zarr(Path(tmpdir) / "test.zarr")
+            np.testing.assert_array_equal(out.tas, np.array([[[1, 2], [3, 4]]]))
+            np.testing.assert_array_equal(out.pr, np.array([[[11, 12], [13, 14]]]))
+        assert out.tas.dtype == np.float32
+
+    def test_skip(self, tmpdir):
+        ds1 = timeseries(
+            np.arange(1, 5),
+            variable="tas",
+            as_dataset=True,
+        )
+        ds2 = timeseries(
+            np.arange(10, 14),
+            variable="tas",
+            as_dataset=True,
+        )
+        xs.save_to_zarr(ds1, Path(tmpdir) / "test.zarr")
+        xs.save_to_zarr(ds2, Path(tmpdir) / "test.zarr", mode="a")
+        ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+        np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [1])
+
+
+@pytest.mark.parametrize("engine", ["netcdf", "zarr"])
+def test_savefuncs_normal(tmpdir, engine):
+    ds = datablock_3d(
+        np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7,
+        variable="tas",
+        x="lon",
+        x_start=-70,
+        y="lat",
+        y_start=45,
+        as_dataset=True,
+    )
+    ds["pr"] = ds["tas"].copy()
+    ds["other"] = ds["tas"].copy()
+    ds["other"].encoding = {"dtype": "float32"}
+    ds.attrs["foo"] = {"bar": 1}
+    ds["pr"].attrs["foo"] = {"bar": 2}
+
+    ds = ds.assign_coords(
+        some_coord=("lat", np.array(["hi", "how", "are", "you", "doing"]))
+    )
+    ds["some_coord"] = ds["some_coord"].astype(object)
+    ds["some_coord"].encoding = {"source": "this is a source"}
 
+    rechunk = {"time": 5, "lon": 2, "lat": 2}
+    bitround = {"tas": 2, "pr": 3}
+    if engine == "netcdf":
         xs.save_to_netcdf(
             ds,
             Path(tmpdir) / "test.nc",
-            rechunk={"time": 5, "lon": 2, "lat": 2},
-            bitround={"tas": 2, "pr": 3},
+            rechunk=rechunk,
+            bitround=bitround,
         )
-
         ds2 = xr.open_dataset(Path(tmpdir) / "test.nc", chunks={})
-        assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,))
-
-        np.testing.assert_array_almost_equal(
-            ds2.tas.isel(time=0, lat=0, lon=0), [0.00010681], decimal=8
-        )
-        assert ds2.tas.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 2
-        np.testing.assert_array_almost_equal(
-            ds2.pr.isel(time=0, lat=0, lon=0), [0.00011444], decimal=8
-        )
-        assert ds2.pr.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 3
-        np.testing.assert_array_almost_equal(
-            ds2.other.isel(time=0, lat=0, lon=0), [0.0001111], decimal=8
+    else:
+        xs.save_to_zarr(
+            ds,
+            Path(tmpdir) / "test.zarr",
+            rechunk=rechunk,
+            bitround=bitround,
         )
-        assert ds2.other.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 12
+        ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr")
+
+    # Chunks
+    assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,))
+
+    # Dtype
+    assert ds2.tas.dtype == np.float64
+    assert ds2.other.dtype == np.float32
+
+    # Bitround
+    np.testing.assert_array_almost_equal(
+        ds2.tas.isel(time=0, lat=0, lon=0), [0.00010681], decimal=8
+    )
+    assert ds2.tas.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 2
+    np.testing.assert_array_almost_equal(
+        ds2.pr.isel(time=0, lat=0, lon=0), [0.00011444], decimal=8
+    )
+    assert ds2.pr.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 3
+    np.testing.assert_array_almost_equal(
+        ds2.other.isel(time=0, lat=0, lon=0), [0.0001111], decimal=8
+    )
+    assert ds2.other.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 12
+
+    # Attributes
+    assert ds2.attrs["foo"] == "{'bar': 1}"
+    assert ds2.pr.attrs["foo"] == "{'bar': 2}"
+
+    if engine == "netcdf":
+        assert ds.some_coord.encoding == {"source": "this is a source"}
+    else:
+        assert ds.some_coord.encoding == {}

From 042b36f6a4d14748c98e74d602313ac41b179869 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Mon, 11 Nov 2024 17:38:04 -0500
Subject: [PATCH 03/39] more tests

---
 tests/test_io.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/tests/test_io.py b/tests/test_io.py
index c8376f73..6613eaf4 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -700,3 +700,92 @@ def test_savefuncs_normal(tmpdir, engine):
         assert ds.some_coord.encoding == {"source": "this is a source"}
     else:
         assert ds.some_coord.encoding == {}
+
+
+class TestRechunk:
+    @pytest.mark.parametrize("engine", ["nc", "zarr"])
+    def test_rechunk(self, tmpdir, engine):
+        ds = datablock_3d(
+            np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7,
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            y="lat",
+            y_start=45,
+            as_dataset=True,
+        )
+        ds["pr"] = ds["tas"].copy()
+
+        if engine == "nc":
+            xs.save_to_netcdf(
+                ds,
+                Path(tmpdir) / "test.nc",
+            )
+        else:
+            xs.save_to_zarr(
+                ds,
+                Path(tmpdir) / "test.zarr",
+            )
+
+        (Path(tmpdir) / f"test2.zarr").mkdir()
+
+        xs.io.rechunk(
+            Path(tmpdir) / f"test.{engine}",
+            Path(tmpdir) / "test2.zarr",
+            chunks_over_dim={"time": 5, "lon": 2, "lat": 2},
+            overwrite=True,
+            worker_mem="1GB",
+            temp_store=Path(tmpdir) / "temp",
+        )
+        xs.io.rechunk(
+            Path(tmpdir) / f"test.{engine}",
+            Path(tmpdir) / "test3.zarr",
+            chunks_over_var={"tas": {"time": 5, "lon": 2, "lat": 2}},
+            overwrite=True,
+            worker_mem="1GB",
+            temp_store=Path(tmpdir) / "temp",
+        )
+
+        ds2 = xr.open_zarr(Path(tmpdir) / "test2.zarr")
+        ds3 = xr.open_zarr(Path(tmpdir) / "test3.zarr")
+        assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,))
+        assert ds2.pr.chunks == ((5, 5, 5), (2, 2, 1), (2,))
+        assert ds3.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,))
+        assert ds3.pr.chunks == ((15,), (5,), (2,))
+
+    def test_error(self, tmpdir):
+        ds = datablock_3d(
+            np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7,
+            variable="tas",
+            x="lon",
+            x_start=-70,
+            y="lat",
+            y_start=45,
+            as_dataset=True,
+        )
+        with pytest.raises(ValueError, match="No chunks given. "):
+            xs.io.rechunk(ds, Path(tmpdir) / "test.nc", worker_mem="1GB")
+
+
+def test_zip_zip(tmpdir):
+    ds = datablock_3d(
+        np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7,
+        variable="tas",
+        x="lon",
+        x_start=-70,
+        y="lat",
+        y_start=45,
+        as_dataset=True,
+    )
+    xs.save_to_zarr(ds, Path(tmpdir) / "test.zarr")
+    xs.io.zip_directory(
+        Path(tmpdir) / "test.zarr", Path(tmpdir) / "test.zarr.zip", delete=True
+    )
+    assert not (Path(tmpdir) / "test.zarr").exists()
+
+    with xr.open_zarr(Path(tmpdir) / "test.zarr.zip") as ds2:
+        assert ds2.equals(ds)
+
+    xs.io.unzip_directory(Path(tmpdir) / "test.zarr.zip", Path(tmpdir) / "test2.zarr")
+    with xr.open_zarr(Path(tmpdir) / "test2.zarr") as ds3:
+        assert ds3.equals(ds)

From b1424c2a3eebe0a6d4ccc883a19ede6ef203092c Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 09:18:27 -0500
Subject: [PATCH 04/39] maybe fix lint

---
 src/xscen/data/fr/LC_MESSAGES/xscen.po | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.po b/src/xscen/data/fr/LC_MESSAGES/xscen.po
index 269ebed3..be7efb40 100644
--- a/src/xscen/data/fr/LC_MESSAGES/xscen.po
+++ b/src/xscen/data/fr/LC_MESSAGES/xscen.po
@@ -19,8 +19,8 @@ msgstr "Description"
 msgid "Units"
 msgstr "Unités"
 
-msgid "Content"
-msgstr "Contenu"
-
 msgid "Global attributes"
 msgstr "Attributs globaux"
+
+msgid "Content"
+msgstr "Contenu"

From 2876f4abd0349b8278413b5126bf797f6df912b3 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 09:18:54 -0500
Subject: [PATCH 05/39] maybe fix lint pt2

---
 src/xscen/data/fr/LC_MESSAGES/xscen.mo | Bin 1015 -> 1015 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.mo b/src/xscen/data/fr/LC_MESSAGES/xscen.mo
index 3821b17700b0108cae356d9251107847dc4a137d..51b5812af3b98854aae679b7238a00236247820d 100644
GIT binary patch
delta 26
hcmey){+)fp7e-z~T|*-U14}DI3ta=#&5TUn7y)?f2ipJu

delta 26
hcmey){+)fp7e-zKT>~QpLlY}Q6I}z7&5TUn7y)>32hacj


From 4c0b4dd3c71404ab05204c8fa16be01a940bb338 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 09:42:22 -0500
Subject: [PATCH 06/39] fix some tests

---
 tests/test_testing.py | 5 +++--
 tests/test_utils.py   | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/test_testing.py b/tests/test_testing.py
index 74006677..8bec8021 100644
--- a/tests/test_testing.py
+++ b/tests/test_testing.py
@@ -10,7 +10,9 @@ class TestPublish:
     @pytest.mark.parametrize("fmt", ["md", "rst"])
     def test_normal(self, fmt):
         out = xs.testing.publish_release_notes(
-            fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst")
+            fmt,
+            changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"),
+            latest=False,
         )
         if fmt == "md":
             assert out.startswith("# Changelog\n\n")
@@ -42,7 +44,6 @@ def test_file(self, tmpdir):
     def test_latest(self, tmpdir, latest):
         out = xs.testing.publish_release_notes(
             "md",
-            file=tmpdir / "foo.md",
             changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"),
             latest=latest,
         )
diff --git a/tests/test_utils.py b/tests/test_utils.py
index da978353..9b49b6c3 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -71,7 +71,7 @@ class TestDateParser:
             ("2001", True, "datetime", pd.Timestamp("2001-12-31 23:59:59")),
             ("150004", True, "datetime", pd.Timestamp("1500-04-30 23:59:59")),
             ("31231212", None, "datetime", pd.Timestamp("3123-12-12")),
-            ("2001-07-08", None, "period", pd.Period("2001-07-08", "H")),
+            ("2001-07-08", None, "period", pd.Period("2001-07-08", "h")),
             (pd.Timestamp("1993-05-20T12:07"), None, "str", "1993-05-20"),
             (
                 cftime.Datetime360Day(1981, 2, 30),
@@ -94,7 +94,7 @@ class TestDateParser:
             ("abc", None, "datetime", pd.Timestamp("NaT")),
             ("", True, "datetime", pd.Timestamp("NaT")),
             (
-                pd.Period("2001-07-08", "H"),
+                pd.Period("2001-07-08", "h"),
                 None,
                 "datetime",
                 pd.Timestamp("2001-07-08"),

From d15e740f7f4e2a3747154d128e12a6df640e042d Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 10:18:29 -0500
Subject: [PATCH 07/39] try with previous flox version

---
 environment-dev.yml | 2 +-
 environment.yml     | 2 +-
 pyproject.toml      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 35dd5d2a..44704ece 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -11,7 +11,7 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask
-  - flox
+  - flox !=0.9.14
   - fsspec
   - geopandas
   - h5netcdf
diff --git a/environment.yml b/environment.yml
index b19c78ea..f4c76e82 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,7 +11,7 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask
-  - flox
+  - flox !=0.9.14
   - fsspec
   - geopandas
   - h5netcdf
diff --git a/pyproject.toml b/pyproject.toml
index 20d61b7c..3bf7347d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ dependencies = [
   "cf_xarray >=0.7.6",
   "clisops >=0.10",
   "dask",
-  "flox",
+  "flox !=0.9.14",
   "fsspec",
   "geopandas",
   "h5netcdf",

From fa8206d5670b892fc4a7ac1fcab8a781bb29e046 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 10:32:24 -0500
Subject: [PATCH 08/39] pin dask

---
 environment-dev.yml | 4 ++--
 environment.yml     | 4 ++--
 pyproject.toml      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 44704ece..917f1b4b 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -10,8 +10,8 @@ dependencies:
   - cftime
   - cf_xarray >=0.7.6
   - clisops >=0.10
-  - dask
-  - flox !=0.9.14
+  - dask !=2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - flox !=0.9.14  # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
   - fsspec
   - geopandas
   - h5netcdf
diff --git a/environment.yml b/environment.yml
index f4c76e82..5b444878 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,8 +10,8 @@ dependencies:
   - cftime
   - cf_xarray >=0.7.6
   - clisops >=0.10
-  - dask
-  - flox !=0.9.14
+  - dask !=2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - flox !=0.9.14  # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
   - fsspec
   - geopandas
   - h5netcdf
diff --git a/pyproject.toml b/pyproject.toml
index 3bf7347d..ed4b5f01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,8 +43,8 @@ dependencies = [
   "cftime",
   "cf_xarray >=0.7.6",
   "clisops >=0.10",
-  "dask",
-  "flox !=0.9.14",
+  "dask !=2024.11", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  "flox !=0.9.14", # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
   "fsspec",
   "geopandas",
   "h5netcdf",

From 02f0d81ca2fe4aacc0e0614ec408ee60a9f60b60 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 10:44:22 -0500
Subject: [PATCH 09/39] more explicit pins

---
 environment-dev.yml | 2 +-
 environment.yml     | 2 +-
 pyproject.toml      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 917f1b4b..5c1fb63a 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -10,7 +10,7 @@ dependencies:
   - cftime
   - cf_xarray >=0.7.6
   - clisops >=0.10
-  - dask !=2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - dask !=2024.11.0, !=2024.11.1  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
   - flox !=0.9.14  # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
   - fsspec
   - geopandas
diff --git a/environment.yml b/environment.yml
index 5b444878..92ba2025 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,7 +10,7 @@ dependencies:
   - cftime
   - cf_xarray >=0.7.6
   - clisops >=0.10
-  - dask !=2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - dask !=2024.11.0, !=2024.11.1  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
   - flox !=0.9.14  # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
   - fsspec
   - geopandas
diff --git a/pyproject.toml b/pyproject.toml
index ed4b5f01..d6b5285b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ dependencies = [
   "cftime",
   "cf_xarray >=0.7.6",
   "clisops >=0.10",
-  "dask !=2024.11", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  "dask !=2024.11.0, !=2024.11.1", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
   "flox !=0.9.14", # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
   "fsspec",
   "geopandas",

From 0081ead6450150c09572e3abb0b3136e21280ddf Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 12 Nov 2024 11:09:22 -0500
Subject: [PATCH 10/39] upd changelog

---
 CHANGELOG.rst | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 3209d695..290ca20b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,35 @@
 Changelog
 =========
 
+v0.11.0 (unreleased)
+--------------------
+Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`).
+
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* N/A
+
+Breaking changes
+^^^^^^^^^^^^^^^^
+* ``xs.utils.publish_release_notes`` and ``xs.utils.show_versions`` have been moved to ``xs.testing``. (:pull:`492`).
+
+Bug fixes
+^^^^^^^^^
+* Added a missing library (``openpyxl``) to the requirements. (:pull:`492`).
+* Fixed a bug in ``xs.io.subset_maxsize`` where the function would drop the last year. (:pull:`492`).
+* Fixed a bug in ``xs.io.clean_incomplete`` where the `.zmetadata` file was not removed. (:pull:`492`).
+* Fixed a bug in the saving of datasets where encoding was sometimes not applied, resulting for example in rechunking not being respected. (:pull:`492`).
+* Fixed multiple bugs in ``xs.io.save_to_zarr`` with `mode='a'`. (:pull:`492`).
+* Fixed a few minor bugs in ``xs.io.save_to_table``. (:pull:`492`).
+
+Internal changes
+^^^^^^^^^^^^^^^^
+* Added a new parameter `latest` to ``xs.testing.publish_release_notes`` to only print the latest release notes. (:pull:`492`).
+* The estimation method in ``xs.io.estimate_chunks`` has been improved. (:pull:`492`).
+* A new parameter `incomplete` has been added to ``xs.io.clean_incomplete`` to remove incomplete variables. (:pull:`492`).
+* Continued work on adding tests. (:pull:`492`).
+
+
 v0.10.1 (2024-11-04)
 --------------------
 Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`).

From 311965f83fc2017308ba8312f38f5216526d3c78 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Thu, 14 Nov 2024 13:41:21 -0500
Subject: [PATCH 11/39] Update environment-dev.yml

---
 environment-dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 5c1fb63a..7d5d664b 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -11,7 +11,7 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask !=2024.11.0, !=2024.11.1  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
-  - flox !=0.9.14  # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
+  - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas
   - h5netcdf

From ffb66c143ad5056981220e09ab2b8ff5eb2bee99 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Thu, 14 Nov 2024 13:41:26 -0500
Subject: [PATCH 12/39] Update environment.yml

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 92ba2025..c1d18229 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,7 +11,7 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask !=2024.11.0, !=2024.11.1  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
-  - flox !=0.9.14  # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
+  - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas
   - h5netcdf

From 45ba4e8bfefbeb62edf7d00f8a3f71be406767ab Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Thu, 14 Nov 2024 13:41:31 -0500
Subject: [PATCH 13/39] Update pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d6b5285b..29b66e68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ dependencies = [
   "cf_xarray >=0.7.6",
   "clisops >=0.10",
   "dask !=2024.11.0, !=2024.11.1", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
-  "flox !=0.9.14", # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed
+  "flox !=0.9.14", # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   "fsspec",
   "geopandas",
   "h5netcdf",

From a085e79a3afdbb852736acc779127ea627df3928 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:09:01 -0500
Subject: [PATCH 14/39] Update environment-dev.yml

---
 environment-dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 7d5d664b..92df31aa 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -10,7 +10,7 @@ dependencies:
   - cftime
   - cf_xarray >=0.7.6
   - clisops >=0.10
-  - dask !=2024.11.0, !=2024.11.1  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - dask >=2024.8.1,<2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
   - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas

From abc2a40e0175238055d474a9a6848f971d14da16 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:09:21 -0500
Subject: [PATCH 15/39] Update environment.yml

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index c1d18229..ad2a08f4 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,7 +10,7 @@ dependencies:
   - cftime
   - cf_xarray >=0.7.6
   - clisops >=0.10
-  - dask !=2024.11.0, !=2024.11.1  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - dask >=2024.8.1,<2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
   - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas

From d0fb03d4ea2d441644e556e7b76dd7ea0fdadb74 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Thu, 14 Nov 2024 14:09:44 -0500
Subject: [PATCH 16/39] Update pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 29b66e68..8ed0a1d6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ dependencies = [
   "cftime",
   "cf_xarray >=0.7.6",
   "clisops >=0.10",
-  "dask !=2024.11.0, !=2024.11.1", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  "dask >=2024.8.1,<2024.11", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
   "flox !=0.9.14", # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   "fsspec",
   "geopandas",

From e7e98532984d3c23767549bae7228689ca8df763 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Fri, 15 Nov 2024 10:24:48 -0500
Subject: [PATCH 17/39] open_dataset to nimbus

---
 docs/notebooks/4_ensembles.ipynb | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index dad0407f..86a11809 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -40,7 +40,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from xclim.testing import open_dataset\n",
+    "import xarray as xr\n",
+    "from xclim.testing.utils import nimbus\n",
     "\n",
     "import xscen as xs\n",
     "\n",
@@ -53,7 +54,8 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
+    "    file = nimbus().fetch(datasets[d])\n",
+    "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",
     "        op=\"mean\",\n",

From 40f6075df1a165b8ab6bf170669761dd82e0a600 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Fri, 15 Nov 2024 10:38:08 -0500
Subject: [PATCH 18/39] more nimbus

---
 tests/test_ensembles.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py
index e21a409a..ce1e5532 100644
--- a/tests/test_ensembles.py
+++ b/tests/test_ensembles.py
@@ -11,20 +11,12 @@
     import xesmf as xe
 except ImportError:
     xe = None
-# temp fix for changes to xclim-testdata
-from functools import partial
 
-from xclim.testing import open_dataset
 from xclim.testing.helpers import test_timeseries as timeseries
+from xclim.testing.utils import nimbus
 
 import xscen as xs
 
-# FIXME: Remove if-else when updating minimum xclim version to 0.53
-if Version(xc.__version__) < Version("0.53.0"):
-    # Hack to revert to old testdata with old xclim
-    open_dataset = partial(open_dataset, branch="v2023.12.14")
-
-
 LOGGER = logging.getLogger(__name__)
 
 
@@ -1103,7 +1095,7 @@ def test_build_partition_data(self, samplecat, tmp_path):
 
 class TestReduceEnsemble:
     def test_with_criteria(self):
-        ds = open_dataset("EnsembleReduce/TestEnsReduceCriteria.nc")
+        ds = xr.open_dataset(nimbus().fetch("EnsembleReduce/TestEnsReduceCriteria.nc"))
         selected, clusters, fig_data = xs.reduce_ensemble(
             ds["data"], method="kmeans", max_clusters=3
         )
@@ -1122,7 +1114,9 @@ def test_without_criteria(self, horizon):
             "CNRM-CM5": "EnsembleStats/BCCAQv2+ANUSPLIN300_CNRM-CM5_historical+rcp45_r1i1p1_1970-2050_tg_mean_YS.nc",
         }
         for d in datasets:
-            ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))
+            ds = xr.open_dataset(nimbus().fetch(datasets[d])).isel(
+                lon=slice(0, 4), lat=slice(0, 4)
+            )
             ds = xs.climatological_op(
                 ds,
                 op="mean",
@@ -1146,7 +1140,7 @@ def test_without_criteria(self, horizon):
         assert fig_data == {}
 
     def test_errors(self):
-        ds = open_dataset("EnsembleReduce/TestEnsReduceCriteria.nc")
+        ds = xr.open_dataset(nimbus().fetch("EnsembleReduce/TestEnsReduceCriteria.nc"))
         with pytest.raises(
             ValueError, match="Data must have a 'horizon' dimension to be subsetted."
         ):

From 1a8e0294b25e752185fcd6cb67e0ccc5ba742962 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Fri, 15 Nov 2024 12:29:51 -0500
Subject: [PATCH 19/39] try to fix bad nimbus url

---
 docs/notebooks/4_ensembles.ipynb | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 86a11809..c2ef14c4 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -40,11 +40,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# from xclim.testing.utils import nimbus\n",
+    "import urllib\n",
+    "\n",
+    "import pooch\n",
     "import xarray as xr\n",
-    "from xclim.testing.utils import nimbus\n",
     "\n",
     "import xscen as xs\n",
     "\n",
+    "url = \"https://github.com/Ouranosinc/xclim-testdata/tree/main/data/\"\n",
     "datasets = {\n",
     "    \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",
     "    \"BNU-ESM\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",
@@ -54,7 +58,8 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    file = nimbus().fetch(datasets[d])\n",
+    "    # file = nimbus().fetch(datasets[d])  # There is currently a bug in xclim 0.53.2\n",
+    "    file = pooch.retrieve(urllib.parse.urljoin(url, datasets[d]), known_hash=None)\n",
     "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",

From 184852d3b2eadc1e14cb1194cdb14f3a7ec49f16 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Fri, 15 Nov 2024 13:28:19 -0500
Subject: [PATCH 20/39] revert change to nb

---
 docs/notebooks/4_ensembles.ipynb | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index c2ef14c4..fe595372 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -40,15 +40,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# from xclim.testing.utils import nimbus\n",
-    "import urllib\n",
-    "\n",
-    "import pooch\n",
     "import xarray as xr\n",
     "\n",
+    "# from xclim.testing.utils import nimbus\n",
+    "from xclim.testing import open_dataset\n",
+    "\n",
     "import xscen as xs\n",
     "\n",
-    "url = \"https://github.com/Ouranosinc/xclim-testdata/tree/main/data/\"\n",
     "datasets = {\n",
     "    \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",
     "    \"BNU-ESM\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",
@@ -59,8 +57,8 @@
     "\n",
     "for d in datasets:\n",
     "    # file = nimbus().fetch(datasets[d])  # There is currently a bug in xclim 0.53.2\n",
-    "    file = pooch.retrieve(urllib.parse.urljoin(url, datasets[d]), known_hash=None)\n",
-    "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
+    "    # ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
+    "    ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",
     "        op=\"mean\",\n",

From e7dd5c335b5f6e98d560c8eaffb79a974196a2d2 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Mon, 18 Nov 2024 12:07:50 -0500
Subject: [PATCH 21/39] address comments from code review

---
 src/xscen/io.py         | 36 ++++++++++++-----
 src/xscen/testing.py    | 89 +++++++++--------------------------------
 tests/test_ensembles.py |  1 -
 tests/test_io.py        |  1 +
 4 files changed, 44 insertions(+), 83 deletions(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index d94d8d83..7b451e1e 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -1,6 +1,7 @@
 """Input/Output functions for xscen."""
 
 import datetime
+import json
 import logging
 import os
 import shutil as sh
@@ -252,7 +253,7 @@ def clean_incomplete(
         Name of variables that were completed. All other variables (except coordinates) will be removed.
         Use either `complete` or `incomplete`, not both.
     incomplete : sequence of strings, optional
-        Name of variables that should be removed.
+        Name of variables that should be removed. Coordinates and dimensions will never be removed through this function.
         Use either `complete` or `incomplete`, not both.
 
     Returns
@@ -261,6 +262,22 @@ def clean_incomplete(
     """
     path = Path(path)
 
+    def _del_var(pth):
+        msg = f"Removing {pth} from disk"
+        logger.warning(msg)
+        sh.rmtree(pth)
+
+        # Update the .zmetadata file
+        with (Path(path) / ".zmetadata").open("r") as f:
+            metadata = json.load(f)
+        [
+            metadata["metadata"].pop(k)
+            for k in list(metadata["metadata"].keys())
+            if k.startswith(f"{pth.name}/.")
+        ]
+        with (Path(path) / ".zmetadata").open("w") as f:
+            json.dump(metadata, f, indent=2)
+
     if complete is not None and incomplete is not None:
         raise ValueError("Use either `complete` or `incomplete`, not both.")
 
@@ -270,20 +287,17 @@ def clean_incomplete(
 
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name not in complete:
-                msg = f"Removing {fold} from disk"
-                logger.warning(msg)
-                sh.rmtree(fold)
+                _del_var(fold)
 
     elif incomplete is not None:
+        with xr.open_zarr(path) as ds:
+            incomplete = [
+                v for v in incomplete if (v not in ds.coords) and (v not in ds.dims)
+            ]
+
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name in incomplete:
-                msg = f"Removing {fold} from disk"
-                logger.warning(msg)
-                sh.rmtree(fold)
-
-    # Remove .zmetadata to avoid issues with zarr and xarray
-    if (path / ".zmetadata").exists():
-        Path.unlink(path / ".zmetadata")
+                _del_var(fold)
 
 
 def _coerce_attrs(attrs):
diff --git a/src/xscen/testing.py b/src/xscen/testing.py
index a04cee31..c68e5264 100644
--- a/src/xscen/testing.py
+++ b/src/xscen/testing.py
@@ -1,5 +1,6 @@
 """Testing utilities for xscen."""
 
+import importlib.metadata
 import os
 import re
 from io import StringIO
@@ -357,77 +358,23 @@ def show_versions(
     -------
     str or None
     """
-    if deps is None:
-        deps = [
-            "xscen",
-            # Main packages
-            "cartopy",
-            "cftime",
-            "cf_xarray",
-            "clisops",
-            "dask",
-            "flox",
-            "fsspec",
-            "geopandas",
-            "h5netcdf",
-            "h5py",
-            "intake_esm",
-            "matplotlib",
-            "netCDF4",
-            "numcodecs",
-            "numpy",
-            "pandas",
-            "parse",
-            "pyyaml",
-            "rechunker",
-            "scipy",
-            "shapely",
-            "sparse",
-            "toolz",
-            "xarray",
-            "xclim",
-            "xesmf",
-            "zarr",
-            # Opt
-            "nc-time-axis",
-            "pyarrow",
-            # Dev
-            "babel",
-            "black",
-            "blackdoc",
-            "bump-my-version",
-            "coverage",
-            "coveralls",
-            "flake8",
-            "flake8-rst-docstrings",
-            "ipykernel",
-            "ipython",
-            "isort",
-            "jupyter_client",
-            "nbsphinx",
-            "nbval",
-            "pandoc",
-            "pooch",
-            "pre-commit",
-            "pytest",
-            "pytest-cov",
-            "ruff",
-            "setuptools",
-            "setuptools-scm",
-            "sphinx",
-            "sphinx-autoapi",
-            "sphinx-rtd-theme",
-            "sphinxcontrib-napoleon",
-            "sphinx-codeautolink",
-            "sphinx-copybutton",
-            "sphinx-mdinclude",
-            "watchdog",
-            "xdoctest",
-            "tox",
-            "build",
-            "wheel",
-            "pip",
-            "flake8-alphabetize",
+
+    def _get_xscen_dependencies():
+        xscen_metadata = importlib.metadata.metadata("xscen")
+        requires = xscen_metadata.get_all("Requires-Dist")
+        requires = [
+            req.split("[")[0]
+            .split(";")[0]
+            .split(">")[0]
+            .split("<")[0]
+            .split("=")[0]
+            .split("!")[0]
+            for req in requires
         ]
 
+        return ["xscen"] + requires
+
+    if deps is None:
+        deps = _get_xscen_dependencies()
+
     return _show_versions(file=file, deps=deps)
diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py
index ce1e5532..43dc383e 100644
--- a/tests/test_ensembles.py
+++ b/tests/test_ensembles.py
@@ -5,7 +5,6 @@
 import pytest
 import xarray as xr
 import xclim as xc
-from packaging.version import Version
 
 try:
     import xesmf as xe
diff --git a/tests/test_io.py b/tests/test_io.py
index 6613eaf4..87e46a4c 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -145,6 +145,7 @@ def test_complete(self, tmpdir, which):
             xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", incomplete=["pr"])
         assert (Path(tmpdir) / "test.zarr/tas").exists()
         assert not (Path(tmpdir) / "test.zarr/pr").exists()
+        assert (Path(tmpdir) / "test.zarr/.zmetadata").exists()
 
         ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr")
         assert "pr" not in ds2

From f31b798c27288f3abc587703a24e6664ca15e96f Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Mon, 18 Nov 2024 15:06:37 -0500
Subject: [PATCH 22/39] maybe fix nimbus

---
 docs/notebooks/4_ensembles.ipynb | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index fe595372..97b444dd 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -41,9 +41,7 @@
    "outputs": [],
    "source": [
     "import xarray as xr\n",
-    "\n",
-    "# from xclim.testing.utils import nimbus\n",
-    "from xclim.testing import open_dataset\n",
+    "from xclim.testing.utils import nimbus\n",
     "\n",
     "import xscen as xs\n",
     "\n",
@@ -56,9 +54,10 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    # file = nimbus().fetch(datasets[d])  # There is currently a bug in xclim 0.53.2\n",
-    "    # ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
-    "    ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
+    "    file = nimbus(repo=\"https://github.com/Ouranosinc/xclim-testdata\").fetch(\n",
+    "        datasets[d]\n",
+    "    )\n",
+    "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",
     "        op=\"mean\",\n",

From 7b9f8adb40e9372ed1d423682803884c22ba8e30 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Mon, 18 Nov 2024 15:19:04 -0500
Subject: [PATCH 23/39] Update docs/notebooks/4_ensembles.ipynb

---
 docs/notebooks/4_ensembles.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 97b444dd..2ec88b06 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -54,7 +54,7 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    file = nimbus(repo=\"https://github.com/Ouranosinc/xclim-testdata\").fetch(\n",
+    "    file = nimbus(repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\").fetch(\n",
     "        datasets[d]\n",
     "    )\n",
     "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",

From 53a8aeb787e7a764089582e6447afbf68951690e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Nov 2024 20:19:28 +0000
Subject: [PATCH 24/39] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/notebooks/4_ensembles.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 2ec88b06..75d28732 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -54,9 +54,9 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    file = nimbus(repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\").fetch(\n",
-    "        datasets[d]\n",
-    "    )\n",
+    "    file = nimbus(\n",
+    "        repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n",
+    "    ).fetch(datasets[d])\n",
     "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",

From ebf7deda4dc992d2641a258d81e2d349d6ea4ba5 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 13:00:05 -0500
Subject: [PATCH 25/39] try without zmetadata

---
 src/xscen/io.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index 7b451e1e..899605af 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -267,16 +267,16 @@ def _del_var(pth):
         logger.warning(msg)
         sh.rmtree(pth)
 
-        # Update the .zmetadata file
-        with (Path(path) / ".zmetadata").open("r") as f:
-            metadata = json.load(f)
-        [
-            metadata["metadata"].pop(k)
-            for k in list(metadata["metadata"].keys())
-            if k.startswith(f"{pth.name}/.")
-        ]
-        with (Path(path) / ".zmetadata").open("w") as f:
-            json.dump(metadata, f, indent=2)
+        # # Update the .zmetadata file
+        # with (Path(path) / ".zmetadata").open("r") as f:
+        #     metadata = json.load(f)
+        # [
+        #     metadata["metadata"].pop(k)
+        #     for k in list(metadata["metadata"].keys())
+        #     if k.startswith(f"{pth.name}/.")
+        # ]
+        # with (Path(path) / ".zmetadata").open("w") as f:
+        #     json.dump(metadata, f, indent=2)
 
     if complete is not None and incomplete is not None:
         raise ValueError("Use either `complete` or `incomplete`, not both.")

From c676877aed91178caccd6cdd8c7228a12651283b Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 13:29:53 -0500
Subject: [PATCH 26/39] revert

---
 src/xscen/io.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index 899605af..7b451e1e 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -267,16 +267,16 @@ def _del_var(pth):
         logger.warning(msg)
         sh.rmtree(pth)
 
-        # # Update the .zmetadata file
-        # with (Path(path) / ".zmetadata").open("r") as f:
-        #     metadata = json.load(f)
-        # [
-        #     metadata["metadata"].pop(k)
-        #     for k in list(metadata["metadata"].keys())
-        #     if k.startswith(f"{pth.name}/.")
-        # ]
-        # with (Path(path) / ".zmetadata").open("w") as f:
-        #     json.dump(metadata, f, indent=2)
+        # Update the .zmetadata file
+        with (Path(path) / ".zmetadata").open("r") as f:
+            metadata = json.load(f)
+        [
+            metadata["metadata"].pop(k)
+            for k in list(metadata["metadata"].keys())
+            if k.startswith(f"{pth.name}/.")
+        ]
+        with (Path(path) / ".zmetadata").open("w") as f:
+            json.dump(metadata, f, indent=2)
 
     if complete is not None and incomplete is not None:
         raise ValueError("Use either `complete` or `incomplete`, not both.")

From c3169938ccd2602ccfc33a95e7e6676583ada67f Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 13:58:45 -0500
Subject: [PATCH 27/39] comment it all

---
 src/xscen/io.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index 7b451e1e..9d7a28c3 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -267,16 +267,16 @@ def _del_var(pth):
         logger.warning(msg)
         sh.rmtree(pth)
 
-        # Update the .zmetadata file
-        with (Path(path) / ".zmetadata").open("r") as f:
-            metadata = json.load(f)
-        [
-            metadata["metadata"].pop(k)
-            for k in list(metadata["metadata"].keys())
-            if k.startswith(f"{pth.name}/.")
-        ]
-        with (Path(path) / ".zmetadata").open("w") as f:
-            json.dump(metadata, f, indent=2)
+        # # Update the .zmetadata file
+        # with (Path(path) / ".zmetadata").open("r") as f:
+        #     metadata = json.load(f)
+        # [
+        #     metadata["metadata"].pop(k)
+        #     for k in list(metadata["metadata"].keys())
+        #     if k.startswith(f"{pth.name}/.")
+        # ]
+        # with (Path(path) / ".zmetadata").open("w") as f:
+        #     json.dump(metadata, f, indent=2)
 
     if complete is not None and incomplete is not None:
         raise ValueError("Use either `complete` or `incomplete`, not both.")
@@ -290,10 +290,10 @@ def _del_var(pth):
                 _del_var(fold)
 
     elif incomplete is not None:
-        with xr.open_zarr(path) as ds:
-            incomplete = [
-                v for v in incomplete if (v not in ds.coords) and (v not in ds.dims)
-            ]
+        # with xr.open_zarr(path) as ds:
+        #     incomplete = [
+        #         v for v in incomplete if (v not in ds.coords) and (v not in ds.dims)
+        #     ]
 
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name in incomplete:

From 2269cb1e4614e8c1e2478d7dcf189b3641d0b101 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 14:50:54 -0500
Subject: [PATCH 28/39] remove subfunction

---
 src/xscen/io.py | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index 9d7a28c3..48a5bd1b 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -262,21 +262,21 @@ def clean_incomplete(
     """
     path = Path(path)
 
-    def _del_var(pth):
-        msg = f"Removing {pth} from disk"
-        logger.warning(msg)
-        sh.rmtree(pth)
-
-        # # Update the .zmetadata file
-        # with (Path(path) / ".zmetadata").open("r") as f:
-        #     metadata = json.load(f)
-        # [
-        #     metadata["metadata"].pop(k)
-        #     for k in list(metadata["metadata"].keys())
-        #     if k.startswith(f"{pth.name}/.")
-        # ]
-        # with (Path(path) / ".zmetadata").open("w") as f:
-        #     json.dump(metadata, f, indent=2)
+    # def _del_var(pth):
+    #     msg = f"Removing {pth} from disk"
+    #     logger.warning(msg)
+    #     sh.rmtree(pth)
+    #
+    #     # Update the .zmetadata file
+    #     with (Path(path) / ".zmetadata").open("r") as f:
+    #         metadata = json.load(f)
+    #     [
+    #         metadata["metadata"].pop(k)
+    #         for k in list(metadata["metadata"].keys())
+    #         if k.startswith(f"{pth.name}/.")
+    #     ]
+    #     with (Path(path) / ".zmetadata").open("w") as f:
+    #         json.dump(metadata, f, indent=2)
 
     if complete is not None and incomplete is not None:
         raise ValueError("Use either `complete` or `incomplete`, not both.")
@@ -287,7 +287,9 @@ def _del_var(pth):
 
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name not in complete:
-                _del_var(fold)
+                msg = f"Removing {fold} from disk"
+                logger.warning(msg)
+                sh.rmtree(fold)
 
     elif incomplete is not None:
         # with xr.open_zarr(path) as ds:
@@ -297,7 +299,9 @@ def _del_var(pth):
 
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name in incomplete:
-                _del_var(fold)
+                msg = f"Removing {fold} from disk"
+                logger.warning(msg)
+                sh.rmtree(fold)
 
 
 def _coerce_attrs(attrs):

From 5919bd4165f95e20ff4b2b284aefb7e49d5189ac Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 15:11:05 -0500
Subject: [PATCH 29/39] more comments

---
 src/xscen/testing.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/xscen/testing.py b/src/xscen/testing.py
index c68e5264..733d11dc 100644
--- a/src/xscen/testing.py
+++ b/src/xscen/testing.py
@@ -1,6 +1,6 @@
 """Testing utilities for xscen."""
 
-import importlib.metadata
+# import importlib.metadata
 import os
 import re
 from io import StringIO
@@ -358,23 +358,23 @@ def show_versions(
     -------
     str or None
     """
-
-    def _get_xscen_dependencies():
-        xscen_metadata = importlib.metadata.metadata("xscen")
-        requires = xscen_metadata.get_all("Requires-Dist")
-        requires = [
-            req.split("[")[0]
-            .split(";")[0]
-            .split(">")[0]
-            .split("<")[0]
-            .split("=")[0]
-            .split("!")[0]
-            for req in requires
-        ]
-
-        return ["xscen"] + requires
-
-    if deps is None:
-        deps = _get_xscen_dependencies()
-
-    return _show_versions(file=file, deps=deps)
+    # def _get_xscen_dependencies():
+    #     xscen_metadata = importlib.metadata.metadata("xscen")
+    #     requires = xscen_metadata.get_all("Requires-Dist")
+    #     requires = [
+    #         req.split("[")[0]
+    #         .split(";")[0]
+    #         .split(">")[0]
+    #         .split("<")[0]
+    #         .split("=")[0]
+    #         .split("!")[0]
+    #         for req in requires
+    #     ]
+    #
+    #     return ["xscen"] + requires
+    #
+    # if deps is None:
+    #     deps = _get_xscen_dependencies()
+    #
+    # return _show_versions(file=file, deps=deps)
+    pass

From 779a001463722e030e73941eac3f0a40e513e59c Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 15:41:56 -0500
Subject: [PATCH 30/39] try to limit esmf

---
 environment-dev.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 92df31aa..61d23e65 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -11,6 +11,7 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask >=2024.8.1,<2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
+  - esmpy <8.7.0
   - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas
@@ -32,7 +33,7 @@ dependencies:
   - toolz
   - xarray >=2023.11.0, !=2024.6.0
   - xclim >=0.53.2, <0.54
-  - xesmf >=0.7
+  - xesmf >=0.7, <0.8.8
   - zarr >=2.13
   # Opt
   - nc-time-axis >=1.3.1

From 93389c110b5703f030ed1c34d8c6bf615459d2a0 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 16:06:01 -0500
Subject: [PATCH 31/39] try without xesmf pin

---
 environment-dev.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 61d23e65..20be0122 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -11,7 +11,7 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask >=2024.8.1,<2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
-  - esmpy <8.7.0
+  - esmpy !=8.7.0  # FIXME: 8.7.0 produces a segfault in xesmf
   - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas
@@ -33,7 +33,7 @@ dependencies:
   - toolz
   - xarray >=2023.11.0, !=2024.6.0
   - xclim >=0.53.2, <0.54
-  - xesmf >=0.7, <0.8.8
+  - xesmf >=0.7
   - zarr >=2.13
   # Opt
   - nc-time-axis >=1.3.1

From b09da876bf2e406247d9b004684849e7989b1b74 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 16:20:04 -0500
Subject: [PATCH 32/39] switch pins

---
 environment-dev.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 20be0122..56345653 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -11,7 +11,6 @@ dependencies:
   - cf_xarray >=0.7.6
   - clisops >=0.10
   - dask >=2024.8.1,<2024.11  # FIXME: https://github.com/Ouranosinc/xclim/issues/1992
-  - esmpy !=8.7.0  # FIXME: 8.7.0 produces a segfault in xesmf
   - flox !=0.9.14  # FIXME: 0.9.14 is a broken version. This pin could be removed eventually.
   - fsspec
   - geopandas
@@ -33,7 +32,7 @@ dependencies:
   - toolz
   - xarray >=2023.11.0, !=2024.6.0
   - xclim >=0.53.2, <0.54
-  - xesmf >=0.7
+  - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
   # Opt
   - nc-time-axis >=1.3.1

From a159f72716a96566ae2eef286141eb30605187bc Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 16:23:06 -0500
Subject: [PATCH 33/39] uncomment tests

---
 src/xscen/io.py      | 39 +++++++++++++++++++--------------------
 src/xscen/testing.py | 42 +++++++++++++++++++++---------------------
 2 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index 48a5bd1b..ed7df22c 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -262,24 +262,9 @@ def clean_incomplete(
     """
     path = Path(path)
 
-    # def _del_var(pth):
-    #     msg = f"Removing {pth} from disk"
-    #     logger.warning(msg)
-    #     sh.rmtree(pth)
-    #
-    #     # Update the .zmetadata file
-    #     with (Path(path) / ".zmetadata").open("r") as f:
-    #         metadata = json.load(f)
-    #     [
-    #         metadata["metadata"].pop(k)
-    #         for k in list(metadata["metadata"].keys())
-    #         if k.startswith(f"{pth.name}/.")
-    #     ]
-    #     with (Path(path) / ".zmetadata").open("w") as f:
-    #         json.dump(metadata, f, indent=2)
-
     if complete is not None and incomplete is not None:
         raise ValueError("Use either `complete` or `incomplete`, not both.")
+    v_to_rm = []
 
     if complete is not None:
         with xr.open_zarr(path) as ds:
@@ -287,22 +272,36 @@ def clean_incomplete(
 
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name not in complete:
+                v_to_rm.append(fold.name)
                 msg = f"Removing {fold} from disk"
                 logger.warning(msg)
                 sh.rmtree(fold)
 
     elif incomplete is not None:
-        # with xr.open_zarr(path) as ds:
-        #     incomplete = [
-        #         v for v in incomplete if (v not in ds.coords) and (v not in ds.dims)
-        #     ]
+        with xr.open_zarr(path) as ds:
+            incomplete = [
+                v for v in incomplete if (v not in ds.coords) and (v not in ds.dims)
+            ]
 
         for fold in filter(lambda p: p.is_dir(), path.iterdir()):
             if fold.name in incomplete:
+                v_to_rm.append(fold.name)
                 msg = f"Removing {fold} from disk"
                 logger.warning(msg)
                 sh.rmtree(fold)
 
+    # Update the .zmetadata file
+    with (path / ".zmetadata").open("r") as f:
+        metadata = json.load(f)
+    for v in v_to_rm:
+        [
+            metadata["metadata"].pop(k)
+            for k in list(metadata["metadata"].keys())
+            if k.startswith(f"{v}/.")
+        ]
+    with (path / ".zmetadata").open("w") as f:
+        json.dump(metadata, f, indent=2)
+
 
 def _coerce_attrs(attrs):
     """Ensure no funky objects in attrs."""
diff --git a/src/xscen/testing.py b/src/xscen/testing.py
index 733d11dc..c68e5264 100644
--- a/src/xscen/testing.py
+++ b/src/xscen/testing.py
@@ -1,6 +1,6 @@
 """Testing utilities for xscen."""
 
-# import importlib.metadata
+import importlib.metadata
 import os
 import re
 from io import StringIO
@@ -358,23 +358,23 @@ def show_versions(
     -------
     str or None
     """
-    # def _get_xscen_dependencies():
-    #     xscen_metadata = importlib.metadata.metadata("xscen")
-    #     requires = xscen_metadata.get_all("Requires-Dist")
-    #     requires = [
-    #         req.split("[")[0]
-    #         .split(";")[0]
-    #         .split(">")[0]
-    #         .split("<")[0]
-    #         .split("=")[0]
-    #         .split("!")[0]
-    #         for req in requires
-    #     ]
-    #
-    #     return ["xscen"] + requires
-    #
-    # if deps is None:
-    #     deps = _get_xscen_dependencies()
-    #
-    # return _show_versions(file=file, deps=deps)
-    pass
+
+    def _get_xscen_dependencies():
+        xscen_metadata = importlib.metadata.metadata("xscen")
+        requires = xscen_metadata.get_all("Requires-Dist")
+        requires = [
+            req.split("[")[0]
+            .split(";")[0]
+            .split(">")[0]
+            .split("<")[0]
+            .split("=")[0]
+            .split("!")[0]
+            for req in requires
+        ]
+
+        return ["xscen"] + requires
+
+    if deps is None:
+        deps = _get_xscen_dependencies()
+
+    return _show_versions(file=file, deps=deps)

From 5c40c98536e6f532fc5377d61a7ec8c4b5a0e7b7 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 16:29:23 -0500
Subject: [PATCH 34/39] add pins

---
 environment.yml       | 2 +-
 pyproject.toml        | 2 +-
 tests/test_testing.py | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index ad2a08f4..126cb271 100644
--- a/environment.yml
+++ b/environment.yml
@@ -32,7 +32,7 @@ dependencies:
   - toolz
   - xarray >=2023.11.0, !=2024.6.0
   - xclim >=0.53.2, <0.54
-  - xesmf >=0.7
+  - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
   # To install from source
   - setuptools >=65.0.0
diff --git a/pyproject.toml b/pyproject.toml
index 8ed0a1d6..8b3a62f5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -112,7 +112,7 @@ docs = [
   "sphinxcontrib-napoleon"
 ]
 extra = [
-  "xesmf>=0.7"
+  "xesmf>=0.7, <0.8.8" # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
 ]
 all = ["xscen[dev]", "xscen[docs]", "xscen[extra]"]
 
diff --git a/tests/test_testing.py b/tests/test_testing.py
index 8bec8021..db95d242 100644
--- a/tests/test_testing.py
+++ b/tests/test_testing.py
@@ -59,6 +59,7 @@ def test_show_version(tmpdir):
         out = f.read()
     assert "xscen" in out
     assert "xclim" in out
+    assert "xesmf" in out
     assert "xarray" in out
     assert "numpy" in out
     assert "pandas" in out

From 71f6dda80dbbf46b203ec42653ed8ef20f707961 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Tue, 19 Nov 2024 17:10:36 -0500
Subject: [PATCH 35/39] explicit nimbus function

---
 docs/notebooks/4_ensembles.ipynb | 33 ++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 75d28732..7c4585dd 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -34,6 +34,34 @@
     "This tutorial will explore ensemble reduction (also known as ensemble selection) using `xscen`. This will use pre-computed annual mean temperatures from `xclim.testing`."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from xclim.testing.utils import nimbus\n",
+    "from pathlib import Path\n",
+    "from urllib.parse import urljoin\n",
+    "\n",
+    "import pooch\n",
+    "from xclim.testing.utils import load_registry\n",
+    "\n",
+    "\n",
+    "def nimbus():\n",
+    "    repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n",
+    "    branch = \"v2024.8.23\"\n",
+    "    remote = urljoin(urljoin(repo, branch), \"data\")\n",
+    "    return pooch.create(\n",
+    "        path=Path(pooch.os_cache(\"xclim-testdata\")),\n",
+    "        base_url=remote,\n",
+    "        version=branch,\n",
+    "        version_dev=branch,\n",
+    "        allow_updates=True,\n",
+    "        registry=load_registry(branch=branch, repo=repo),\n",
+    "    )"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -41,7 +69,6 @@
    "outputs": [],
    "source": [
     "import xarray as xr\n",
-    "from xclim.testing.utils import nimbus\n",
     "\n",
     "import xscen as xs\n",
     "\n",
@@ -54,9 +81,7 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    file = nimbus(\n",
-    "        repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n",
-    "    ).fetch(datasets[d])\n",
+    "    file = nimbus().fetch(datasets[d])\n",
     "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",

From 623ce8670192a1ace2fc850cabc9fcaa9a249732 Mon Sep 17 00:00:00 2001
From: RondeauG <38501935+RondeauG@users.noreply.github.com>
Date: Wed, 20 Nov 2024 09:32:40 -0500
Subject: [PATCH 36/39] Update docs/notebooks/4_ensembles.ipynb

Co-authored-by: Pascal Bourgault <bourgault.pascal@ouranos.ca>
---
 docs/notebooks/4_ensembles.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 7c4585dd..8567afe4 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -49,7 +49,7 @@
     "\n",
     "\n",
     "def nimbus():\n",
-    "    repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n",
+    "    repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n",
     "    branch = \"v2024.8.23\"\n",
     "    remote = urljoin(urljoin(repo, branch), \"data\")\n",
     "    return pooch.create(\n",

From 4448bfad6008d01853c2e34c1b00b09fd83d5ad9 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Wed, 20 Nov 2024 11:09:46 -0500
Subject: [PATCH 37/39] add downloader

---
 docs/notebooks/4_ensembles.ipynb | 38 +++++++++++++++++---------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 8567afe4..3a0a281b 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -40,26 +40,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# from xclim.testing.utils import nimbus\n",
-    "from pathlib import Path\n",
-    "from urllib.parse import urljoin\n",
+    "# from pathlib import Path\n",
+    "# from urllib.parse import urljoin\n",
     "\n",
-    "import pooch\n",
-    "from xclim.testing.utils import load_registry\n",
+    "# import pooch\n",
+    "# from xclim.testing.utils import load_registry\n",
     "\n",
     "\n",
-    "def nimbus():\n",
-    "    repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n",
-    "    branch = \"v2024.8.23\"\n",
-    "    remote = urljoin(urljoin(repo, branch), \"data\")\n",
-    "    return pooch.create(\n",
-    "        path=Path(pooch.os_cache(\"xclim-testdata\")),\n",
-    "        base_url=remote,\n",
-    "        version=branch,\n",
-    "        version_dev=branch,\n",
-    "        allow_updates=True,\n",
-    "        registry=load_registry(branch=branch, repo=repo),\n",
-    "    )"
+    "# def nimbus():\n",
+    "#     repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n",
+    "#     branch = \"v2024.8.23\"\n",
+    "#     remote = urljoin(urljoin(repo, branch), \"data\")\n",
+    "#     return pooch.create(\n",
+    "#         path=Path(pooch.os_cache(\"xclim-testdata\")),\n",
+    "#         base_url=remote,\n",
+    "#         version=branch,\n",
+    "#         version_dev=branch,\n",
+    "#         allow_updates=True,\n",
+    "#         registry=load_registry(branch=branch, repo=repo),\n",
+    "#     )"
    ]
   },
   {
@@ -69,9 +68,12 @@
    "outputs": [],
    "source": [
     "import xarray as xr\n",
+    "from xclim.testing.utils import nimbus\n",
     "\n",
     "import xscen as xs\n",
     "\n",
+    "downloader = pooch.HTTPDownloader(headers={\"User-Agent\": \"agent\"})\n",
+    "\n",
     "datasets = {\n",
     "    \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",
     "    \"BNU-ESM\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",
@@ -81,7 +83,7 @@
     "}\n",
     "\n",
     "for d in datasets:\n",
-    "    file = nimbus().fetch(datasets[d])\n",
+    "    file = nimbus().fetch(datasets[d], downloader=downloader)\n",
     "    ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n",
     "    ds = xs.climatological_op(\n",
     "        ds,\n",

From fdf85d649d2b7b0dcac2232fa20ca665b7dedb46 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Wed, 20 Nov 2024 11:12:23 -0500
Subject: [PATCH 38/39] forgot pooch

---
 docs/notebooks/4_ensembles.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index 3a0a281b..afdf9094 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -67,6 +67,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import pooch\n",
     "import xarray as xr\n",
     "from xclim.testing.utils import nimbus\n",
     "\n",

From 55720c259afbf64ec4eaf53e8edbb6fb282c3ae7 Mon Sep 17 00:00:00 2001
From: RondeauG <rondeau-genesse.gabriel@ouranos.ca>
Date: Wed, 20 Nov 2024 11:36:27 -0500
Subject: [PATCH 39/39] cleanup

---
 docs/notebooks/4_ensembles.ipynb | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index afdf9094..fa76d1ac 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -34,33 +34,6 @@
     "This tutorial will explore ensemble reduction (also known as ensemble selection) using `xscen`. This will use pre-computed annual mean temperatures from `xclim.testing`."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# from pathlib import Path\n",
-    "# from urllib.parse import urljoin\n",
-    "\n",
-    "# import pooch\n",
-    "# from xclim.testing.utils import load_registry\n",
-    "\n",
-    "\n",
-    "# def nimbus():\n",
-    "#     repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n",
-    "#     branch = \"v2024.8.23\"\n",
-    "#     remote = urljoin(urljoin(repo, branch), \"data\")\n",
-    "#     return pooch.create(\n",
-    "#         path=Path(pooch.os_cache(\"xclim-testdata\")),\n",
-    "#         base_url=remote,\n",
-    "#         version=branch,\n",
-    "#         version_dev=branch,\n",
-    "#         allow_updates=True,\n",
-    "#         registry=load_registry(branch=branch, repo=repo),\n",
-    "#     )"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -73,7 +46,7 @@
     "\n",
     "import xscen as xs\n",
     "\n",
-    "downloader = pooch.HTTPDownloader(headers={\"User-Agent\": \"agent\"})\n",
+    "downloader = pooch.HTTPDownloader(headers={\"User-Agent\": f\"xscen-{xs.__version__}\"})\n",
     "\n",
     "datasets = {\n",
     "    \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",