From 82bf43c1a59f1fb2c5d8102278b6cabc5fa4cbce Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 7 Nov 2024 13:15:06 -0500 Subject: [PATCH 01/39] first batch of tests --- src/xscen/io.py | 146 ++++++++++++++---------------- src/xscen/testing.py | 191 ++++++++++++++++++++++++++++++++++++++- src/xscen/utils.py | 205 ++++++------------------------------------ tests/test_io.py | 178 +++++++++++++++++++++++++++++++++++- tests/test_testing.py | 66 ++++++++++++++ tests/test_utils.py | 47 ---------- 6 files changed, 527 insertions(+), 306 deletions(-) create mode 100644 tests/test_testing.py diff --git a/src/xscen/io.py b/src/xscen/io.py index f0c2e149..b93dd4f8 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -99,58 +99,30 @@ def estimate_chunks( # noqa: C901 A dictionary mapping dimensions to chunk sizes. """ - def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims): - # Approximate size of the chunks (equal across dims) + def _estimate_chunks(da, target_mb, size_of_slice, rechunk_dims): + # Divide the dimensions by the smallest dimension + min_dim = np.min([da[d].shape[0] for d in rechunk_dims]) + ratio = {d: da[d].shape[0] / min_dim for d in rechunk_dims} + + # Get the approximate number of chunks, supposing the chunks are cubes approx_chunks = np.power(target_mb / size_of_slice, 1 / len(rechunk_dims)) - chunks_per_dim = dict() - if len(rechunk_dims) == 1: - rounding = ( - 1 - if ds[rechunk_dims[0]].shape[0] <= 15 - else 5 if ds[rechunk_dims[0]].shape[0] <= 250 else 10 - ) - chunks_per_dim[rechunk_dims[0]] = np.max( - [ - np.min( - [ - int(rounding * np.round(approx_chunks / rounding)), - ds[rechunk_dims[0]].shape[0], - ] - ), - 1, - ] - ) - elif len(rechunk_dims) == 2: - # Adjust approx_chunks based on the ratio of the rectangle sizes - for d in rechunk_dims: - rounding = ( - 1 if ds[d].shape[0] <= 15 else 5 if ds[d].shape[0] <= 250 else 10 - ) - adjusted_chunk = int( - rounding - * np.round( - approx_chunks - * ( - ds[d].shape[0] - / np.prod( - [ - ds[dd].shape[0] - for dd in rechunk_dims - if dd not in [d] - ] - ) - ) - / rounding - ) - ) - chunks_per_dim[d] = np.max( - [np.min([adjusted_chunk, ds[d].shape[0]]), 1] - ) - else: - raise NotImplementedError( - "estimating chunks on more than 2 dimensions is not implemented yet." - ) + # Redistribute the chunks based on the ratio of the dimensions + x = (approx_chunks ** len(rechunk_dims) / np.prod(list(ratio.values()))) ** ( + 1 / len(rechunk_dims) + ) + rounding_per_dim = { + d: 1 if da[d].shape[0] <= 15 else 5 if da[d].shape[0] <= 250 else 10 + for d in rechunk_dims + } + chunks_per_dim = { + d: int(rounding_per_dim[d] * np.round(x * ratio[d] / rounding_per_dim[d])) + for d in rechunk_dims + } + chunks_per_dim = { + d: np.max([np.min([chunks_per_dim[d], da[d].shape[0]]), 1]) + for d in rechunk_dims + } return chunks_per_dim @@ -163,7 +135,7 @@ def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims): for v in ds.variables: # Find dimensions to chunk rechunk_dims = list(set(dims).intersection(ds.variables[v].dimensions)) - if not rechunk_dims: + if not rechunk_dims or v in ds.dimensions: continue dtype_size = ds.variables[v].datatype.itemsize @@ -219,7 +191,7 @@ def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims): def subset_maxsize( ds: xr.Dataset, maxsize_gb: float, -) -> list: +) -> list[xr.Dataset]: """Estimate a dataset's size and, if higher than the given limit, subset it alongside the 'time' dimension. Parameters @@ -232,7 +204,7 @@ def subset_maxsize( Returns ------- - list + list of xr.Dataset List of xr.Dataset subsetted alongside 'time' to limit the filesize to the requested maximum. """ # Estimate the size of the dataset @@ -247,11 +219,11 @@ def subset_maxsize( logger.info(msg) return [ds] - elif "time" in ds: + elif "time" in ds.dims: years = np.unique(ds.time.dt.year) - ratio = int(len(years) / (size_of_file / maxsize_gb)) + ratio = np.max([int(len(years) / (size_of_file / maxsize_gb)), 1]) ds_sub = [] - for y in range(years[0], years[-1], ratio): + for y in range(years[0], years[-1] + 1, ratio): ds_sub.extend([ds.sel({"time": slice(str(y), str(y + ratio - 1))})]) return ds_sub @@ -261,7 +233,11 @@ def subset_maxsize( ) -def clean_incomplete(path: str | os.PathLike, complete: Sequence[str]) -> None: +def clean_incomplete( + path: str | os.PathLike, + complete: Sequence[str] | None = None, + incomplete: Sequence[str] | None = None, +) -> None: """Delete un-catalogued variables from a zarr folder. The goal of this function is to clean up an incomplete calculation. @@ -272,22 +248,42 @@ def clean_incomplete(path: str | os.PathLike, complete: Sequence[str]) -> None: ---------- path : str, Path A path to a zarr folder. - complete : sequence of strings - Name of variables that were completed. + complete : sequence of strings, optional + Name of variables that were completed. All other variables (except coordinates) will be removed. + Use either `complete` or `incomplete`, not both. + incomplete : sequence of strings, optional + Name of variables that should be removed. + Use either `complete` or `incomplete`, not both. Returns ------- None """ path = Path(path) - with xr.open_zarr(path) as ds: - complete = set(complete).union(ds.coords.keys()) - for fold in filter(lambda p: p.is_dir(), path.iterdir()): - if fold.name not in complete: - msg = f"Removing {fold} from disk" - logger.warning(msg) - sh.rmtree(fold) + if complete is not None and incomplete is not None: + raise ValueError("Use either `complete` or `incomplete`, not both.") + + if complete is not None: + with xr.open_zarr(path) as ds: + complete = set(complete).union(ds.coords.keys()) + + for fold in filter(lambda p: p.is_dir(), path.iterdir()): + if fold.name not in complete: + msg = f"Removing {fold} from disk" + logger.warning(msg) + sh.rmtree(fold) + + elif incomplete is not None: + for fold in filter(lambda p: p.is_dir(), path.iterdir()): + if fold.name in incomplete: + msg = f"Removing {fold} from disk" + logger.warning(msg) + sh.rmtree(fold) + + # Remove .zmetadata to avoid issues with zarr and xarray + if (path / ".zmetadata").exists(): + Path.unlink(path / ".zmetadata") def _coerce_attrs(attrs): @@ -319,9 +315,11 @@ def round_bits(da: xr.DataArray, keepbits: int): keepbits : int The number of bits of the mantissa to keep. """ + encoding = da.encoding da = xr.apply_ufunc( _np_bitround, da, keepbits, dask="parallelized", keep_attrs=True ) + da.encoding = encoding da.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] = keepbits new_history = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Data compressed with BitRound by keeping {keepbits} bits." history = ( @@ -510,10 +508,7 @@ def _skip(var): if mode == "o": if exists: - var_path = path / var - msg = f"Removing {var_path} to overwrite." - logger.warning(msg) - sh.rmtree(var_path) + clean_incomplete(path, incomplete=[var]) return False if mode == "a": @@ -562,9 +557,7 @@ def _skip(var): ) except TimeoutException: if timeout_cleanup: - msg = f"Removing incomplete {name}." - logger.info(msg) - sh.rmtree(path / name) + clean_incomplete(path, incomplete=[name]) raise else: @@ -576,10 +569,7 @@ def _skip(var): ) except TimeoutException: if timeout_cleanup: - msg = f"Removing incomplete {list(ds.data_vars.keys())} for {filename}." - logger.info(msg) - for name in ds.data_vars: - sh.rmtree(path / name) + clean_incomplete(path, incomplete=list(ds.data_vars.keys())) raise @@ -952,7 +942,7 @@ def rechunk_for_saving(ds: xr.Dataset, rechunk: dict): for d in ds[rechunk_var].dims ) ds[rechunk_var].encoding.pop("chunks", None) - ds[rechunk_var].encoding.pop("preferred_chunks", None) + ds[rechunk_var].encoding["preferred_chunks"] = rechunk_dims return ds diff --git a/src/xscen/testing.py b/src/xscen/testing.py index 232437e1..a04cee31 100644 --- a/src/xscen/testing.py +++ b/src/xscen/testing.py @@ -1,12 +1,19 @@ """Testing utilities for xscen.""" +import os +import re +from io import StringIO +from pathlib import Path +from typing import TextIO + import cartopy.crs as ccrs import numpy as np import pandas as pd import xarray as xr from xclim.testing.helpers import test_timeseries as timeseries +from xclim.testing.utils import show_versions as _show_versions -__all__ = ["datablock_3d", "fake_data"] +__all__ = ["datablock_3d", "fake_data", "publish_release_notes", "show_versions"] def datablock_3d( @@ -242,3 +249,185 @@ def fake_data( data = data + offset - (np.random.random() * amplitude - amplitude / 2) return data + + +def publish_release_notes( + style: str = "md", + file: os.PathLike | StringIO | TextIO | None = None, + changes: str | os.PathLike | None = None, + latest: bool = True, +) -> str | None: + """Format release history in Markdown or ReStructuredText. + + Parameters + ---------- + style : {"rst", "md"} + Use ReStructuredText (`rst`) or Markdown (`md`) formatting. Default: Markdown. + file : {os.PathLike, StringIO, TextIO, None} + If provided, prints to the given file-like object. Otherwise, returns a string. + changes : {str, os.PathLike}, optional + If provided, manually points to the file where the changelog can be found. + Assumes a relative path otherwise. + latest : bool + Whether to return the release notes of the latest version or all the content of the changelog. + + Returns + ------- + str, optional + + Notes + ----- + This function exists solely for development purposes. Adapted from xclim.testing.utils.publish_release_notes. + """ + if isinstance(changes, str | Path): + changes_file = Path(changes).absolute() + else: + changes_file = Path(__file__).absolute().parents[2].joinpath("CHANGELOG.rst") + + if not changes_file.exists(): + raise FileNotFoundError("Changes file not found in xscen file tree.") + + with Path(changes_file).open(encoding="utf-8") as f: + changes = f.read() + + if style == "rst": + hyperlink_replacements = { + r":issue:`([0-9]+)`": r"`GH/\1 `_", + r":pull:`([0-9]+)`": r"`PR/\1 `_", + r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 `_", + } + elif style == "md": + hyperlink_replacements = { + r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xscen/issues/\1)", + r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xscen/pull/\1)", + r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)", + } + else: + raise NotImplementedError() + + for search, replacement in hyperlink_replacements.items(): + changes = re.sub(search, replacement, changes) + + if latest: + changes_split = changes.split("\n\nv0.") + changes = changes_split[0] + "\n\nv0." + changes_split[1] + + if style == "md": + changes = changes.replace("=========\nChangelog\n=========", "# Changelog") + + titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"} + for title_expression, level in titles.items(): + found = re.findall(title_expression, changes) + for grouping in found: + fixed_grouping = ( + str(grouping[0]).replace("(", r"\(").replace(")", r"\)") + ) + search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})" + replacement = f"{'##' if level == '-' else '###'} {grouping[0]}" + changes = re.sub(search, replacement, changes) + + link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_" + found = re.findall(link_expressions, changes) + for grouping in found: + search = rf"`{grouping[0]} <.+>`\_" + replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})" + changes = re.sub(search, replacement, changes) + + if not file: + return changes + if isinstance(file, Path | os.PathLike): + file = Path(file).open("w") + print(changes, file=file) + + +def show_versions( + file: os.PathLike | StringIO | TextIO | None = None, + deps: list | None = None, +) -> str | None: + """Print the versions of xscen and its dependencies. + + Parameters + ---------- + file : {os.PathLike, StringIO, TextIO}, optional + If provided, prints to the given file-like object. Otherwise, returns a string. + deps : list, optional + A list of dependencies to gather and print version information from. Otherwise, prints `xscen` dependencies. + + Returns + ------- + str or None + """ + if deps is None: + deps = [ + "xscen", + # Main packages + "cartopy", + "cftime", + "cf_xarray", + "clisops", + "dask", + "flox", + "fsspec", + "geopandas", + "h5netcdf", + "h5py", + "intake_esm", + "matplotlib", + "netCDF4", + "numcodecs", + "numpy", + "pandas", + "parse", + "pyyaml", + "rechunker", + "scipy", + "shapely", + "sparse", + "toolz", + "xarray", + "xclim", + "xesmf", + "zarr", + # Opt + "nc-time-axis", + "pyarrow", + # Dev + "babel", + "black", + "blackdoc", + "bump-my-version", + "coverage", + "coveralls", + "flake8", + "flake8-rst-docstrings", + "ipykernel", + "ipython", + "isort", + "jupyter_client", + "nbsphinx", + "nbval", + "pandoc", + "pooch", + "pre-commit", + "pytest", + "pytest-cov", + "ruff", + "setuptools", + "setuptools-scm", + "sphinx", + "sphinx-autoapi", + "sphinx-rtd-theme", + "sphinxcontrib-napoleon", + "sphinx-codeautolink", + "sphinx-copybutton", + "sphinx-mdinclude", + "watchdog", + "xdoctest", + "tox", + "build", + "wheel", + "pip", + "flake8-alphabetize", + ] + + return _show_versions(file=file, deps=deps) diff --git a/src/xscen/utils.py b/src/xscen/utils.py index d68bebec..c9e86701 100644 --- a/src/xscen/utils.py +++ b/src/xscen/utils.py @@ -11,11 +11,9 @@ from collections.abc import Sequence from copy import deepcopy from datetime import datetime -from io import StringIO from itertools import chain from pathlib import Path from types import ModuleType -from typing import TextIO import cftime import flox.xarray @@ -28,7 +26,6 @@ from xclim.core.options import METADATA_LOCALES from xclim.core.options import OPTIONS as XC_OPTIONS from xclim.core.utils import uses_dask -from xclim.testing.utils import show_versions as _show_versions from .config import parse_config @@ -46,7 +43,6 @@ "maybe_unstack", "minimum_calendar", "natural_sort", - "publish_release_notes", "stack_drop_nans", "standardize_periods", "translate_time_chunk", @@ -1089,88 +1085,6 @@ def clean_up( # noqa: C901 return ds -def publish_release_notes( - style: str = "md", - file: os.PathLike | StringIO | TextIO | None = None, - changes: str | os.PathLike | None = None, -) -> str | None: - """Format release history in Markdown or ReStructuredText. - - Parameters - ---------- - style : {"rst", "md"} - Use ReStructuredText (`rst`) or Markdown (`md`) formatting. Default: Markdown. - file : {os.PathLike, StringIO, TextIO, None} - If provided, prints to the given file-like object. Otherwise, returns a string. - changes : {str, os.PathLike}, optional - If provided, manually points to the file where the changelog can be found. - Assumes a relative path otherwise. - - Returns - ------- - str, optional - - Notes - ----- - This function exists solely for development purposes. Adapted from xclim.testing.utils.publish_release_notes. - """ - if isinstance(changes, str | Path): - changes_file = Path(changes).absolute() - else: - changes_file = Path(__file__).absolute().parents[2].joinpath("CHANGELOG.rst") - - if not changes_file.exists(): - raise FileNotFoundError("Changes file not found in xscen file tree.") - - with Path(changes_file).open(encoding="utf-8") as f: - changes = f.read() - - if style == "rst": - hyperlink_replacements = { - r":issue:`([0-9]+)`": r"`GH/\1 `_", - r":pull:`([0-9]+)`": r"`PR/\1 `_", - r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 `_", - } - elif style == "md": - hyperlink_replacements = { - r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xscen/issues/\1)", - r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xscen/pull/\1)", - r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)", - } - else: - raise NotImplementedError() - - for search, replacement in hyperlink_replacements.items(): - changes = re.sub(search, replacement, changes) - - if style == "md": - changes = changes.replace("=========\nChangelog\n=========", "# Changelog") - - titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"} - for title_expression, level in titles.items(): - found = re.findall(title_expression, changes) - for grouping in found: - fixed_grouping = ( - str(grouping[0]).replace("(", r"\(").replace(")", r"\)") - ) - search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})" - replacement = f"{'##' if level == '-' else '###'} {grouping[0]}" - changes = re.sub(search, replacement, changes) - - link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_" - found = re.findall(link_expressions, changes) - for grouping in found: - search = rf"`{grouping[0]} <.+>`\_" - replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})" - changes = re.sub(search, replacement, changes) - - if not file: - return changes - if isinstance(file, Path | os.PathLike): - file = Path(file).open("w") - print(changes, file=file) - - def unstack_dates( # noqa: C901 ds: xr.Dataset, seasons: dict[int, str] | None = None, @@ -1346,99 +1260,6 @@ def reshape_da(da): return dso.assign_coords(**new_coords) -def show_versions( - file: os.PathLike | StringIO | TextIO | None = None, - deps: list | None = None, -) -> str | None: - """Print the versions of xscen and its dependencies. - - Parameters - ---------- - file : {os.PathLike, StringIO, TextIO}, optional - If provided, prints to the given file-like object. Otherwise, returns a string. - deps : list, optional - A list of dependencies to gather and print version information from. Otherwise, prints `xscen` dependencies. - - Returns - ------- - str or None - """ - if deps is None: - deps = [ - "xscen", - # Main packages - "cartopy", - "cftime", - "cf_xarray", - "clisops", - "dask", - "flox", - "fsspec", - "geopandas", - "h5netcdf", - "h5py", - "intake_esm", - "matplotlib", - "netCDF4", - "numcodecs", - "numpy", - "pandas", - "parse", - "pyyaml", - "rechunker", - "scipy", - "shapely", - "sparse", - "toolz", - "xarray", - "xclim", - "xesmf", - "zarr", - # Opt - "nc-time-axis", - "pyarrow", - # Dev - "babel", - "black", - "blackdoc", - "bump-my-version", - "coverage", - "coveralls", - "flake8", - "flake8-rst-docstrings", - "ipykernel", - "ipython", - "isort", - "jupyter_client", - "nbsphinx", - "nbval", - "pandoc", - "pooch", - "pre-commit", - "pytest", - "pytest-cov", - "ruff", - "setuptools", - "setuptools-scm", - "sphinx", - "sphinx-autoapi", - "sphinx-rtd-theme", - "sphinxcontrib-napoleon", - "sphinx-codeautolink", - "sphinx-copybutton", - "sphinx-mdinclude", - "watchdog", - "xdoctest", - "tox", - "build", - "wheel", - "pip", - "flake8-alphabetize", - ] - - return _show_versions(file=file, deps=deps) - - def ensure_correct_time(ds: xr.Dataset, xrfreq: str) -> xr.Dataset: """Ensure a dataset has the correct time coordinate, as expected for the given frequency. @@ -1616,3 +1437,29 @@ def rechunk_for_resample(obj: xr.DataArray | xr.Dataset, **resample_kwargs): res = obj.resample(**resample_kwargs) return flox.xarray.rechunk_for_blockwise(obj, res._dim, res._codes) + + +def publish_release_notes(*args, **kwargs): + """Backward compatibility for the old function.""" + warnings.warn( + "'xscen.utils.publish_release_notes' has been moved to 'xscen.testing.publish_release_notes'." + "Support for this function will be removed in xscen v0.12.0.", + FutureWarning, + ) + + from .testing import publish_release_notes as prn + + return prn(*args, **kwargs) + + +def show_versions(*args, **kwargs): + """Backward compatibility for the old function.""" + warnings.warn( + "'xscen.utils.show_versions' has been moved to 'xscen.testing.show_versions'." + "Support for this function will be removed in xscen v0.12.0.", + FutureWarning, + ) + + from .testing import show_versions as sv + + return sv(*args, **kwargs) diff --git a/tests/test_io.py b/tests/test_io.py index 7db611c9..f486de09 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,9 +1,147 @@ +import os +from pathlib import Path + import numpy as np import pytest import xarray as xr import xclim as xc +from xclim.testing.helpers import test_timeseries as timeseries import xscen as xs +from xscen.testing import datablock_3d + + +@pytest.mark.parametrize("suffix", [".zarr", ".zarr.zip", "h5", "nc"]) +def test_get_engine(tmpdir, suffix): + if suffix in [".zarr", ".zarr.zip"]: + path = "some/path" + suffix + assert xs.io.get_engine(path) == "zarr" + else: + ds = timeseries( + np.zeros(60), + variable="tas", + as_dataset=True, + ) + ds.to_netcdf( + Path(tmpdir) / f"test.nc", + engine="netcdf4" if suffix == "nc" else "h5netcdf", + ) + assert xs.io.get_engine(Path(tmpdir) / f"test.nc") in [ + "netcdf4", + "h5netcdf", + ] # Hard to predict which one + + +class TestEstimateChunks: + ds = datablock_3d( + np.zeros((50, 100, 150)), + variable="tas", + x="lon", + x_start=-70, + x_step=0.1, + y="lat", + y_start=45, + y_step=-0.1, + as_dataset=True, + ) + ds2 = ds.copy() + ds2["tas"] = ds2["tas"].astype(np.float32) + + def test_normal(self): + out1 = xs.io.estimate_chunks(self.ds, dims=["time", "lat", "lon"], target_mb=1) + assert out1 == {"time": 30, "lat": 55, "lon": 85} + out2 = xs.io.estimate_chunks(self.ds2, dims=["time", "lat", "lon"], target_mb=1) + assert out2 == {"time": 35, "lat": 70, "lon": 105} + out3 = xs.io.estimate_chunks(self.ds, dims=["lat", "lon"], target_mb=1) + assert out3 == {"lon": 65, "lat": 40, "time": -1} + out4 = xs.io.estimate_chunks(self.ds2, dims=["time"], target_mb=1) + assert out4 == {"time": 15, "lat": -1, "lon": -1} + + @pytest.mark.parametrize("chunk_per_variable", [True, False]) + @pytest.mark.parametrize("as_file", [True, False]) + def test_multiple_vars(self, tmpdir, chunk_per_variable, as_file): + ds = self.ds.copy() + ds["pr"] = ds["tas"].isel(time=0) + + if as_file: + ds.to_netcdf(Path(tmpdir) / "test.nc") + ds = Path(tmpdir) / "test.nc" + + out = xs.io.estimate_chunks( + ds, dims=["lat", "lon"], target_mb=1, chunk_per_variable=chunk_per_variable + ) + if chunk_per_variable is False: + assert out == {"lon": 65, "lat": 40, "time": -1} + else: + assert out == { + "tas": {"lon": 65, "lat": 40, "time": -1}, + "pr": {"lon": 150, "lat": 100}, + } + + +class TestSubsetMaxsize: + def test_normal(self): + ds = datablock_3d( + np.zeros((1500, 5, 5)), + variable="tas", + x="lon", + x_start=-70, + x_step=0.1, + y="lat", + y_start=45, + y_step=-0.1, + as_dataset=True, + ) + ds["pr"] = ds["tas"] + # First, test with a dataset that is already small enough + out = xs.io.subset_maxsize(ds, maxsize_gb=1) + assert len(out) == 1 + assert out[0].equals(ds) + + out = xs.io.subset_maxsize(ds, maxsize_gb=0.0005) + assert len(out) == 2 + assert xr.concat(out, dim="time").equals(ds) + + def test_error(self): + ds = datablock_3d( + np.zeros((1, 50, 10)), + variable="tas", + x="lon", + x_start=-70, + x_step=0.1, + y="lat", + y_start=45, + y_step=-0.1, + as_dataset=True, + ) + ds = ds.isel(time=0) + + with pytest.raises(NotImplementedError, match="does not contain a"): + xs.io.subset_maxsize(ds, maxsize_gb=1e-15) + + +def test_clean_incomplete(tmpdir): + ds = datablock_3d( + np.ones((5, 5, 5)), + variable="tas", + x="lon", + x_start=-70, + x_step=0.1, + y="lat", + y_start=45, + y_step=-0.1, + as_dataset=True, + ) + ds["pr"] = ds["tas"].copy() + ds.to_zarr(Path(tmpdir) / "test.zarr") + + xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", complete=["tas"]) + assert Path.exists(Path(tmpdir) / "test.zarr/tas") + assert not Path.exists(Path(tmpdir) / "test.zarr/pr") + + ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr") + assert "pr" not in ds2 + assert ds2.equals(ds[["tas"]]) class TestRechunkForSaving: @@ -205,7 +343,7 @@ def test_make_toc(self, as_dataset): assert toc.loc["tas", "Unités"] == "K" -def test_round_bits(datablock_3d): +def test_round_bits(): da = datablock_3d( np.random.random((30, 30, 50)), variable="tas", @@ -241,3 +379,41 @@ def test_guess_bitround(self, vname, vtype, bitr, exp): xs.io._get_keepbits(bitr, vname, vtype) else: assert xs.io._get_keepbits(bitr, vname, vtype) == exp + + +class TestSaveToNetcdf: + def test_normal(self, tmpdir): + ds = datablock_3d( + np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7, + variable="tas", + x="lon", + x_start=-70, + y="lat", + y_start=45, + as_dataset=True, + ) + ds["pr"] = ds["tas"].copy() + ds["other"] = ds["tas"].copy() + + xs.save_to_netcdf( + ds, + Path(tmpdir) / "test.nc", + rechunk={"time": 5, "lon": 2, "lat": 2}, + bitround={"tas": 2, "pr": 3}, + ) + + ds2 = xr.open_dataset(Path(tmpdir) / "test.nc", chunks={}) + assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,)) + + np.testing.assert_array_almost_equal( + ds2.tas.isel(time=0, lat=0, lon=0), [0.00010681], decimal=8 + ) + assert ds2.tas.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 2 + np.testing.assert_array_almost_equal( + ds2.pr.isel(time=0, lat=0, lon=0), [0.00011444], decimal=8 + ) + assert ds2.pr.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 3 + np.testing.assert_array_almost_equal( + ds2.other.isel(time=0, lat=0, lon=0), [0.0001111], decimal=8 + ) + assert ds2.other.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 12 diff --git a/tests/test_testing.py b/tests/test_testing.py new file mode 100644 index 00000000..74006677 --- /dev/null +++ b/tests/test_testing.py @@ -0,0 +1,66 @@ +from pathlib import Path + +import pytest + +import xscen as xs + + +class TestPublish: + @pytest.mark.requires_netcdf + @pytest.mark.parametrize("fmt", ["md", "rst"]) + def test_normal(self, fmt): + out = xs.testing.publish_release_notes( + fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") + ) + if fmt == "md": + assert out.startswith("# Changelog\n\n") + assert "[PR/413](https://github.com/Ouranosinc/xscen/pull/413)" in out + elif fmt == "rst": + assert out.startswith("=========\nChangelog\n=========\n\n") + assert "`PR/413 `_" in out + + def test_error(self): + with pytest.raises(FileNotFoundError): + xs.testing.publish_release_notes("md", changes="foo") + with pytest.raises(NotImplementedError): + xs.testing.publish_release_notes( + "foo", changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") + ) + + @pytest.mark.requires_netcdf + def test_file(self, tmpdir): + xs.testing.publish_release_notes( + "md", + file=tmpdir / "foo.md", + changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"), + ) + with Path(tmpdir).joinpath("foo.md").open(encoding="utf-8") as f: + assert f.read().startswith("# Changelog\n\n") + + @pytest.mark.parametrize("latest", [True, False]) + @pytest.mark.requires_netcdf + def test_latest(self, tmpdir, latest): + out = xs.testing.publish_release_notes( + "md", + file=tmpdir / "foo.md", + changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"), + latest=latest, + ) + if latest: + assert len(out.split("\n\n## v0.")) == 2 + else: + assert len(out.split("\n\n## v0.")) > 2 + + +def test_show_version(tmpdir): + xs.testing.show_versions(file=tmpdir / "versions.txt") + with Path(tmpdir).joinpath("versions.txt").open(encoding="utf-8") as f: + out = f.read() + assert "xscen" in out + assert "xclim" in out + assert "xarray" in out + assert "numpy" in out + assert "pandas" in out + assert "dask" in out + assert "cftime" in out + assert "netCDF4" in out diff --git a/tests/test_utils.py b/tests/test_utils.py index b5fff0f8..da978353 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -825,39 +825,6 @@ def test_change_prefix(self, change_prefix): } -class TestPublish: - @pytest.mark.requires_netcdf - @pytest.mark.parametrize("fmt", ["md", "rst"]) - def test_normal(self, fmt): - out = xs.utils.publish_release_notes( - fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") - ) - if fmt == "md": - assert out.startswith("# Changelog\n\n") - assert "[PR/413](https://github.com/Ouranosinc/xscen/pull/413)" in out - elif fmt == "rst": - assert out.startswith("=========\nChangelog\n=========\n\n") - assert "`PR/413 `_" in out - - def test_error(self): - with pytest.raises(FileNotFoundError): - xs.utils.publish_release_notes("md", changes="foo") - with pytest.raises(NotImplementedError): - xs.utils.publish_release_notes( - "foo", changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") - ) - - @pytest.mark.requires_netcdf - def test_file(self, tmpdir): - xs.utils.publish_release_notes( - "md", - file=tmpdir / "foo.md", - changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"), - ) - with Path(tmpdir).joinpath("foo.md").open(encoding="utf-8") as f: - assert f.read().startswith("# Changelog\n\n") - - class TestUnstackDates: @pytest.mark.parametrize( "freq", ["MS", "2MS", "3MS", "QS-DEC", "QS", "2QS", "YS", "YS-DEC", "4YS"] @@ -1043,20 +1010,6 @@ def test_errors(self): xs.utils.unstack_dates(ds) -def test_show_version(tmpdir): - xs.utils.show_versions(file=tmpdir / "versions.txt") - with Path(tmpdir).joinpath("versions.txt").open(encoding="utf-8") as f: - out = f.read() - assert "xscen" in out - assert "xclim" in out - assert "xarray" in out - assert "numpy" in out - assert "pandas" in out - assert "dask" in out - assert "cftime" in out - assert "netCDF4" in out - - class TestEnsureTime: def test_xrfreq_ok(self): ds = timeseries( From 0bfe1bd02de2d8695868c87cba334f7bd59e2da3 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 11 Nov 2024 17:07:20 -0500 Subject: [PATCH 02/39] more tests and fixes --- environment-dev.yml | 1 + environment.yml | 1 + pyproject.toml | 1 + src/xscen/io.py | 44 +++-- tests/test_io.py | 399 +++++++++++++++++++++++++++++++++++++------- 5 files changed, 378 insertions(+), 68 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 5b06e841..35dd5d2a 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -21,6 +21,7 @@ dependencies: - netCDF4 - numcodecs - numpy >=1.24 + - openpyxl - pandas >=2.2 - parse - pyyaml diff --git a/environment.yml b/environment.yml index 3e4232f3..b19c78ea 100644 --- a/environment.yml +++ b/environment.yml @@ -21,6 +21,7 @@ dependencies: - netCDF4 - numcodecs - numpy >=1.24 + - openpyxl - pandas >=2.2 - parse - pyyaml diff --git a/pyproject.toml b/pyproject.toml index fc5f6214..20d61b7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dependencies = [ "netCDF4", "numcodecs", "numpy >=1.24", + "openpyxl", "pandas >=2.2", "parse", # Used when opening catalogs. diff --git a/src/xscen/io.py b/src/xscen/io.py index b93dd4f8..d94d8d83 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -454,7 +454,8 @@ def save_to_zarr( # noqa: C901 if 'o', removes the existing variables. if 'a', skip existing variables, writes the others. encoding : dict, optional - If given, skipped variables are popped in place. + If given here instead of 'zarr_kwargs', encoding will only be applied to the variables that are being written, + skipping those that are already in the zarr. bitround : bool or int or dict If not False, float variables are bit-rounded by dropping a certain number of bits from their mantissa, allowing for a much better compression. @@ -512,8 +513,21 @@ def _skip(var): return False if mode == "a": + # In all cases, we need to skip the encoding of existing variables. + if exists: + if encoding: + encoding.pop(var, None) + + # If we are not appending, we need to skip the writing of existing variables. if "append_dim" not in zarr_kwargs: return exists + + # If we are appending, we need to raise an error if there are new variables. + elif exists is False: + raise ValueError( + f"When 'append_dim' is set in zarr_kwargs, all variables must already exist in the dataset." + ) + return False for var in list(ds.data_vars.keys()): @@ -521,8 +535,7 @@ def _skip(var): msg = f"Skipping {var} in {path}." logger.info(msg) ds = ds.drop_vars(var) - if encoding: - encoding.pop(var) + continue if keepbits := _get_keepbits(bitround, var, ds[var].dtype): ds = ds.assign({var: round_bits(ds[var], keepbits)}) # Remove original_shape from encoding, since it can cause issues with some engines. @@ -773,7 +786,6 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr for vv, da in ds.data_vars.items() ], ).set_index(_("Variable")) - toc.attrs["name"] = _("Content") # Add global attributes by using a fake variable and description if len(ds.attrs) > 0: @@ -793,6 +805,7 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr toc = pd.concat([toc, pd.DataFrame(index=[""])]) toc = pd.concat([toc, pd.DataFrame(index=[_("Global attributes")])]) toc = pd.concat([toc, globattr]) + toc.attrs["name"] = _("Content") return toc @@ -800,13 +813,13 @@ def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFr TABLE_FORMATS = {".csv": "csv", ".xls": "excel", ".xlsx": "excel"} -def save_to_table( +def save_to_table( # noqa: C901 ds: xr.Dataset | xr.DataArray, filename: str | os.PathLike, output_format: str | None = None, *, row: str | Sequence[str] | None = None, - column: None | str | Sequence[str] = "variable", + column: None | str | Sequence[str] = None, sheet: str | Sequence[str] | None = None, coords: bool | Sequence[str] = True, col_sep: str = "_", @@ -814,7 +827,7 @@ def save_to_table( add_toc: bool | pd.DataFrame = False, **kwargs, ): - """Save the dataset to a tabular file (csv, excel, ...). + r"""Save the dataset to a tabular file (csv, excel, ...). This function will trigger a computation of the dataset. @@ -835,7 +848,8 @@ def save_to_table( Default is all data dimensions. column : str or sequence of str, optional Name of the dimension(s) to use as columns. - Default is "variable", i.e. the name of the variable(s). + When using a Dataset with more than 1 variable, default is "variable", i.e. the name of the variable(s). + When using a DataArray, default is None. sheet : str or sequence of str, optional Name of the dimension(s) to use as sheet names. Only valid if the output format is excel. @@ -851,7 +865,7 @@ def save_to_table( A table of content to add as the first sheet. Only valid if the output format is excel. If True, :py:func:`make_toc` is used to generate the toc. The sheet name of the toc can be given through the "name" attribute of the DataFrame, otherwise "Content" is used. - kwargs: + \*\*kwargs: Other arguments passed to the pandas function. If the output format is excel, kwargs to :py:class:`pandas.ExcelWriter` can be given here as well. """ @@ -864,6 +878,9 @@ def save_to_table( f"Output format could not be inferred from filename {filename.name}. Please pass `output_format`." ) + if column is None and isinstance(ds, xr.Dataset) and len(ds.data_vars) > 1: + column = "variable" + if sheet is not None and output_format != "excel": raise ValueError( f"Argument `sheet` is only valid with excel as the output format. Got {output_format}." @@ -882,15 +899,22 @@ def save_to_table( add_toc = make_toc(ds) out = {(add_toc.attrs.get("name", "Content"),): add_toc, **out} - if sheet or (add_toc is not False): + # Get engine_kwargs + if output_format == "excel": engine_kwargs = {} # Extract engine kwargs for arg in signature(pd.ExcelWriter).parameters: if arg in kwargs: engine_kwargs[arg] = kwargs.pop(arg) + else: + engine_kwargs = {} + if sheet or (add_toc is not False): with pd.ExcelWriter(filename, **engine_kwargs) as writer: for sheet_name, df in out.items(): df.to_excel(writer, sheet_name=col_sep.join(sheet_name), **kwargs) + elif len(engine_kwargs) > 0: + with pd.ExcelWriter(filename, **engine_kwargs) as writer: + out.to_excel(writer, **kwargs) else: if output_format != "excel" and isinstance(out.columns, pd.MultiIndex): out.columns = out.columns.map(lambda lvls: col_sep.join(map(str, lvls))) diff --git a/tests/test_io.py b/tests/test_io.py index f486de09..c8376f73 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,7 +1,8 @@ -import os +import datetime from pathlib import Path import numpy as np +import pandas as pd import pytest import xarray as xr import xclim as xc @@ -46,6 +47,7 @@ class TestEstimateChunks: ) ds2 = ds.copy() ds2["tas"] = ds2["tas"].astype(np.float32) + ds["just_a_variable"] = xr.DataArray(np.zeros(50), dims="new_dim") def test_normal(self): out1 = xs.io.estimate_chunks(self.ds, dims=["time", "lat", "lon"], target_mb=1) @@ -120,28 +122,53 @@ def test_error(self): xs.io.subset_maxsize(ds, maxsize_gb=1e-15) -def test_clean_incomplete(tmpdir): - ds = datablock_3d( - np.ones((5, 5, 5)), - variable="tas", - x="lon", - x_start=-70, - x_step=0.1, - y="lat", - y_start=45, - y_step=-0.1, - as_dataset=True, - ) - ds["pr"] = ds["tas"].copy() - ds.to_zarr(Path(tmpdir) / "test.zarr") +class TestCleanIncomplete: + @pytest.mark.parametrize("which", ["complete", "incomplete"]) + def test_complete(self, tmpdir, which): + ds = datablock_3d( + np.ones((5, 5, 5)), + variable="tas", + x="lon", + x_start=-70, + x_step=0.1, + y="lat", + y_start=45, + y_step=-0.1, + as_dataset=True, + ) + ds["pr"] = ds["tas"].copy() + ds.to_zarr(Path(tmpdir) / "test.zarr") - xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", complete=["tas"]) - assert Path.exists(Path(tmpdir) / "test.zarr/tas") - assert not Path.exists(Path(tmpdir) / "test.zarr/pr") + if which == "complete": + xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", complete=["tas"]) + else: + xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", incomplete=["pr"]) + assert (Path(tmpdir) / "test.zarr/tas").exists() + assert not (Path(tmpdir) / "test.zarr/pr").exists() - ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr") - assert "pr" not in ds2 - assert ds2.equals(ds[["tas"]]) + ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr") + assert "pr" not in ds2 + assert ds2.equals(ds[["tas"]]) + + def test_error(self, tmpdir): + ds = datablock_3d( + np.ones((5, 5, 5)), + variable="tas", + x="lon", + x_start=-70, + x_step=0.1, + y="lat", + y_start=45, + y_step=-0.1, + as_dataset=True, + ) + ds["pr"] = ds["tas"].copy() + ds.to_zarr(Path(tmpdir) / "test.zarr") + + with pytest.raises(ValueError, match="Use either"): + xs.io.clean_incomplete( + Path(tmpdir) / "test.zarr", complete=["tas"], incomplete=["pr"] + ) class TestRechunkForSaving: @@ -154,7 +181,7 @@ class TestRechunkForSaving: (["rlon", "rlat"], False), ], ) - def test_options(self, datablock_3d, dims, xy): + def test_options(self, dims, xy): ds = datablock_3d( np.random.random((30, 30, 50)), variable="tas", @@ -176,7 +203,7 @@ def test_options(self, datablock_3d, dims, xy): ) assert chunks[0] == new_chunks[dim] - def test_variables(self, datablock_3d): + def test_variables(self): ds = datablock_3d( np.random.random((30, 30, 50)), variable="tas", @@ -211,7 +238,7 @@ class TestToTable: xr.merge( [ xs.testing.datablock_3d( - np.random.random_sample((20, 3, 2)), + np.ones((20, 3, 2)), v, "lon", 0, @@ -234,7 +261,7 @@ class TestToTable: @pytest.mark.parametrize( "multiple, as_dataset", [(True, True), (False, True), (False, False)] ) - def test_normal(self, multiple, as_dataset): + def test_normal(self, tmpdir, multiple, as_dataset): if multiple is False: if as_dataset: ds = self.ds[["tas"]].copy() @@ -244,9 +271,25 @@ def test_normal(self, multiple, as_dataset): ds = self.ds.copy() # Default + xs.save_to_table(ds, Path(tmpdir) / "test.csv") + saved = pd.read_csv(Path(tmpdir) / "test.csv") tab = xs.io.to_table(ds) - assert tab.shape == (120, 5 if multiple else 3) # 3 vars + 2 aux coords + + assert tab.shape == ( + 120, + 5 if multiple else 3, + ) # 3 variables + 2 coords that are not dimensions + assert saved.shape == ( + 120, + 8 if multiple else 6, + ) # everything gets mapped, so dimensions are included in the columns assert tab.columns.names == ["variable"] if multiple else [None] + assert ( + set(saved.columns) + == {"season", "time", "site", "lat", "lon", "pr", "snw", "tas"} + if multiple + else {"season", "time", "site", "tas"} + ) assert tab.index.names == ["season", "time", "site"] # Season order is chronological, rather than alphabetical np.testing.assert_array_equal( @@ -255,13 +298,24 @@ def test_normal(self, multiple, as_dataset): .index.get_level_values("season"), ["JFM", "AMJ", "JAS", "OND"], ) + np.testing.assert_array_equal(saved.loc[0, "season"], "JFM") if multiple: # Variable in the index, thus no coords + xs.save_to_table( + ds, + Path(tmpdir) / "test.xlsx", + row=["time", "variable"], + column=["season", "site"], + coords=False, + ) tab = xs.io.to_table( ds, row=["time", "variable"], column=["season", "site"], coords=False ) + saved = pd.read_excel(Path(tmpdir) / "test.xlsx") + assert tab.shape == (15, 24) + assert saved.shape == (17, 26) # Because of the headers assert tab.columns.names == ["season", "site"] np.testing.assert_array_equal( tab.loc[("1993", "pr"), ("JFM",)], ds.pr.sel(time="1993", season="JFM") @@ -275,8 +329,23 @@ def test_normal(self, multiple, as_dataset): ) == 0 ) + # Excel is not the prettiest thing to test + np.testing.assert_array_equal(saved.iloc[2, 2:], np.tile([1], 24)) + assert saved.iloc[0, 2] == "a" + assert saved.iloc[2, 0] == datetime.datetime(1993, 1, 1, 0, 0) - def test_sheet(self): + def test_sheet(self, tmpdir): + xs.save_to_table( + self.ds, + Path(tmpdir) / "test.xlsx", + row=["time", "variable"], + column=["season"], + sheet="site", + coords=False, + ) + saved = pd.read_excel( + Path(tmpdir) / "test.xlsx", sheet_name=["a", "b", "c", "d", "e", "f"] + ) # This is a test by itself tab = xs.io.to_table( self.ds, row=["time", "variable"], @@ -284,31 +353,77 @@ def test_sheet(self): sheet="site", coords=False, ) + assert set(tab.keys()) == {("a",), ("b",), ("c",), ("d",), ("e",), ("f",)} assert tab[("a",)].shape == (15, 4) # 5 time * 3 variable X 4 season + assert saved["a"].shape == (15, 6) # Because of the headers - def test_error(self): + def test_kwargs(self, tmpdir): + xs.save_to_table( + self.ds, + Path(tmpdir) / "test.xlsx", + row=["time", "variable"], + column=["season", "site"], + coords=False, + datetime_format="dd/mm/yyyy", + ) + saved = pd.read_excel(Path(tmpdir) / "test.xlsx") + assert saved.iloc[2, 0] == datetime.datetime( + 1993, 1, 1, 0, 0 + ) # No real way to test the format + + def test_multiindex(self, tmpdir): + xs.save_to_table( + self.ds, + Path(tmpdir) / "test.csv", + row=["time", "variable"], + column=["season", "site"], + coords=False, + row_sep="|", + col_sep=";", + ) + out = pd.read_csv(Path(tmpdir) / "test.csv") + assert out.shape == (15, 25) + assert out.columns[0] == "time|variable" + assert out.columns[1] == "JFM;a" + + def test_error(self, tmpdir): with pytest.raises(ValueError, match="Repeated dimension names."): - xs.io.to_table( - self.ds, row=["time", "variable"], column=["season", "site", "time"] + xs.save_to_table( + self.ds, + Path(tmpdir) / "test.xlsx", + row=["time", "variable"], + column=["season", "site", "time"], ) with pytest.raises(ValueError, match="Passed row, column and sheet"): - xs.io.to_table( - self.ds, row=["time", "variable"], column=["season", "site", "foo"] + xs.save_to_table( + self.ds, + Path(tmpdir) / "test.xlsx", + row=["time", "variable"], + column=["season", "site", "foo"], ) with pytest.raises( NotImplementedError, match="Keeping auxiliary coords is not implemented when", ): - xs.io.to_table( + xs.save_to_table( self.ds, + Path(tmpdir) / "test.xlsx", row=["time", "variable"], column=["season", "site"], coords=True, ) + with pytest.raises(ValueError, match="Output format could not be inferred"): + xs.save_to_table(self.ds, Path(tmpdir) / "test") + with pytest.raises( + ValueError, match="is only valid with excel as the output format" + ): + xs.save_to_table(self.ds, Path(tmpdir) / "test.csv", sheet="site") + with pytest.raises(ValueError, match="but the output format is not Excel."): + xs.save_to_table(self.ds, Path(tmpdir) / "test.csv", add_toc=True) @pytest.mark.parametrize("as_dataset", [True, False]) - def test_make_toc(self, as_dataset): + def test_make_toc(self, tmpdir, as_dataset): ds = self.ds.copy() for v in ds.data_vars: ds[v].attrs["long_name"] = f"Long name for {v}" @@ -318,7 +433,10 @@ def test_make_toc(self, as_dataset): ds = ds["tas"] with xc.set_options(metadata_locales="fr"): - toc = xs.io.make_toc(ds) + xs.save_to_table(ds, Path(tmpdir) / "test.xlsx", add_toc=True) + + toc = pd.read_excel(Path(tmpdir) / "test.xlsx", sheet_name="Contenu") + toc = toc.set_index("Unnamed: 0" if as_dataset else "Variable") if as_dataset: assert toc.shape == (8, 2) @@ -327,7 +445,7 @@ def test_make_toc(self, as_dataset): "tas", "pr", "snw", - "", + np.nan, "Attributs globaux", "foo", "baz", @@ -380,11 +498,63 @@ def test_guess_bitround(self, vname, vtype, bitr, exp): else: assert xs.io._get_keepbits(bitr, vname, vtype) == exp + @pytest.mark.parametrize("mode", ["f", "o", "a"]) + @pytest.mark.parametrize("itervar", [True, False]) + def test_mode(self, tmpdir, mode, itervar): + ds1 = timeseries( + np.arange(1, 5), + variable="tas", + as_dataset=True, + ) + xs.save_to_zarr(ds1, Path(tmpdir) / "test.zarr") -class TestSaveToNetcdf: - def test_normal(self, tmpdir): - ds = datablock_3d( - np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7, + ds2 = timeseries( + np.arange(10, 14), + variable="tas", + as_dataset=True, + ) + ds2["pr"] = ds2["tas"].copy() + ds2 = ds2[["pr", "tas"]] + + if mode == "f": + with pytest.raises(ValueError, match="exists in dataset"): + xs.save_to_zarr( + ds2, Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar + ) + assert not (Path(tmpdir) / "test.zarr/pr").exists() + if itervar: + # Essentially just to reach 100% coverage and make sure the function doesn't crash with mode="f" and itervar=True + xs.save_to_zarr( + ds2, Path(tmpdir) / "test2.zarr", mode=mode, itervar=itervar + ) + ds3 = xr.open_zarr(Path(tmpdir) / "test2.zarr") + np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [10]) + np.testing.assert_array_almost_equal(ds3.pr.isel(time=0), [10]) + + elif mode == "o": + xs.save_to_zarr(ds2, Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar) + ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr") + np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [10]) + np.testing.assert_array_almost_equal(ds3.pr.isel(time=0), [10]) + + elif mode == "a": + # First, try only with variables that are already in the dataset + xs.save_to_zarr( + ds2[["tas"]], Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar + ) + ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr") + np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [1]) + + # Now, try with a new variable + xs.save_to_zarr(ds2, Path(tmpdir) / "test.zarr", mode=mode, itervar=itervar) + ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr") + np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [1]) + np.testing.assert_array_almost_equal(ds3.pr.isel(time=0), [10]) + + @pytest.mark.parametrize("append", [True, False]) + def test_append(self, tmpdir, append): + ds1 = datablock_3d( + np.array([[[1, 2], [3, 4]]]), variable="tas", x="lon", x_start=-70, @@ -392,28 +562,141 @@ def test_normal(self, tmpdir): y_start=45, as_dataset=True, ) - ds["pr"] = ds["tas"].copy() - ds["other"] = ds["tas"].copy() + ds2 = datablock_3d( + np.array([[[11, 12], [13, 14]]]), + variable="tas", + x="lon", + x_start=-70, + y="lat", + y_start=45, + start="2005-01-01", + as_dataset=True, + ) + ds2["pr"] = ds2["tas"].copy() + xs.save_to_zarr( + ds1, Path(tmpdir) / "test.zarr", encoding={"tas": {"dtype": "float32"}} + ) + + encoding = { + "tas": {"dtype": "int32"} + } # This should be ignored, as the variable is already in the dataset + if append: + with pytest.raises( + ValueError, + match="is set in zarr_kwargs, all variables must already exist in the dataset.", + ): + xs.save_to_zarr( + ds2, + Path(tmpdir) / "test.zarr", + mode="a", + zarr_kwargs={"append_dim": "time"}, + encoding=encoding, + ) + xs.save_to_zarr( + ds2[["tas"]], + Path(tmpdir) / "test.zarr", + mode="a", + zarr_kwargs={"append_dim": "time"}, + encoding=encoding, + ) + out = xr.open_zarr(Path(tmpdir) / "test.zarr") + np.testing.assert_array_equal( + out.tas, np.array([[[1, 2], [3, 4]], [[11, 12], [13, 14]]]) + ) + else: + xs.save_to_zarr( + ds2, Path(tmpdir) / "test.zarr", mode="a", encoding=encoding + ) + out = xr.open_zarr(Path(tmpdir) / "test.zarr") + np.testing.assert_array_equal(out.tas, np.array([[[1, 2], [3, 4]]])) + np.testing.assert_array_equal(out.pr, np.array([[[11, 12], [13, 14]]])) + assert out.tas.dtype == np.float32 + + def test_skip(self, tmpdir): + ds1 = timeseries( + np.arange(1, 5), + variable="tas", + as_dataset=True, + ) + ds2 = timeseries( + np.arange(10, 14), + variable="tas", + as_dataset=True, + ) + xs.save_to_zarr(ds1, Path(tmpdir) / "test.zarr") + xs.save_to_zarr(ds2, Path(tmpdir) / "test.zarr", mode="a") + ds3 = xr.open_zarr(Path(tmpdir) / "test.zarr") + np.testing.assert_array_almost_equal(ds3.tas.isel(time=0), [1]) + + +@pytest.mark.parametrize("engine", ["netcdf", "zarr"]) +def test_savefuncs_normal(tmpdir, engine): + ds = datablock_3d( + np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7, + variable="tas", + x="lon", + x_start=-70, + y="lat", + y_start=45, + as_dataset=True, + ) + ds["pr"] = ds["tas"].copy() + ds["other"] = ds["tas"].copy() + ds["other"].encoding = {"dtype": "float32"} + ds.attrs["foo"] = {"bar": 1} + ds["pr"].attrs["foo"] = {"bar": 2} + + ds = ds.assign_coords( + some_coord=("lat", np.array(["hi", "how", "are", "you", "doing"])) + ) + ds["some_coord"] = ds["some_coord"].astype(object) + ds["some_coord"].encoding = {"source": "this is a source"} + rechunk = {"time": 5, "lon": 2, "lat": 2} + bitround = {"tas": 2, "pr": 3} + if engine == "netcdf": xs.save_to_netcdf( ds, Path(tmpdir) / "test.nc", - rechunk={"time": 5, "lon": 2, "lat": 2}, - bitround={"tas": 2, "pr": 3}, + rechunk=rechunk, + bitround=bitround, ) - ds2 = xr.open_dataset(Path(tmpdir) / "test.nc", chunks={}) - assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,)) - - np.testing.assert_array_almost_equal( - ds2.tas.isel(time=0, lat=0, lon=0), [0.00010681], decimal=8 - ) - assert ds2.tas.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 2 - np.testing.assert_array_almost_equal( - ds2.pr.isel(time=0, lat=0, lon=0), [0.00011444], decimal=8 - ) - assert ds2.pr.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 3 - np.testing.assert_array_almost_equal( - ds2.other.isel(time=0, lat=0, lon=0), [0.0001111], decimal=8 + else: + xs.save_to_zarr( + ds, + Path(tmpdir) / "test.zarr", + rechunk=rechunk, + bitround=bitround, ) - assert ds2.other.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 12 + ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr") + + # Chunks + assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,)) + + # Dtype + assert ds2.tas.dtype == np.float64 + assert ds2.other.dtype == np.float32 + + # Bitround + np.testing.assert_array_almost_equal( + ds2.tas.isel(time=0, lat=0, lon=0), [0.00010681], decimal=8 + ) + assert ds2.tas.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 2 + np.testing.assert_array_almost_equal( + ds2.pr.isel(time=0, lat=0, lon=0), [0.00011444], decimal=8 + ) + assert ds2.pr.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 3 + np.testing.assert_array_almost_equal( + ds2.other.isel(time=0, lat=0, lon=0), [0.0001111], decimal=8 + ) + assert ds2.other.attrs["_QuantizeBitRoundNumberOfSignificantDigits"] == 12 + + # Attributes + assert ds2.attrs["foo"] == "{'bar': 1}" + assert ds2.pr.attrs["foo"] == "{'bar': 2}" + + if engine == "netcdf": + assert ds.some_coord.encoding == {"source": "this is a source"} + else: + assert ds.some_coord.encoding == {} From 042b36f6a4d14748c98e74d602313ac41b179869 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 11 Nov 2024 17:38:04 -0500 Subject: [PATCH 03/39] more tests --- tests/test_io.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tests/test_io.py b/tests/test_io.py index c8376f73..6613eaf4 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -700,3 +700,92 @@ def test_savefuncs_normal(tmpdir, engine): assert ds.some_coord.encoding == {"source": "this is a source"} else: assert ds.some_coord.encoding == {} + + +class TestRechunk: + @pytest.mark.parametrize("engine", ["nc", "zarr"]) + def test_rechunk(self, tmpdir, engine): + ds = datablock_3d( + np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7, + variable="tas", + x="lon", + x_start=-70, + y="lat", + y_start=45, + as_dataset=True, + ) + ds["pr"] = ds["tas"].copy() + + if engine == "nc": + xs.save_to_netcdf( + ds, + Path(tmpdir) / "test.nc", + ) + else: + xs.save_to_zarr( + ds, + Path(tmpdir) / "test.zarr", + ) + + (Path(tmpdir) / f"test2.zarr").mkdir() + + xs.io.rechunk( + Path(tmpdir) / f"test.{engine}", + Path(tmpdir) / "test2.zarr", + chunks_over_dim={"time": 5, "lon": 2, "lat": 2}, + overwrite=True, + worker_mem="1GB", + temp_store=Path(tmpdir) / "temp", + ) + xs.io.rechunk( + Path(tmpdir) / f"test.{engine}", + Path(tmpdir) / "test3.zarr", + chunks_over_var={"tas": {"time": 5, "lon": 2, "lat": 2}}, + overwrite=True, + worker_mem="1GB", + temp_store=Path(tmpdir) / "temp", + ) + + ds2 = xr.open_zarr(Path(tmpdir) / "test2.zarr") + ds3 = xr.open_zarr(Path(tmpdir) / "test3.zarr") + assert ds2.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,)) + assert ds2.pr.chunks == ((5, 5, 5), (2, 2, 1), (2,)) + assert ds3.tas.chunks == ((5, 5, 5), (2, 2, 1), (2,)) + assert ds3.pr.chunks == ((15,), (5,), (2,)) + + def test_error(self, tmpdir): + ds = datablock_3d( + np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7, + variable="tas", + x="lon", + x_start=-70, + y="lat", + y_start=45, + as_dataset=True, + ) + with pytest.raises(ValueError, match="No chunks given. "): + xs.io.rechunk(ds, Path(tmpdir) / "test.nc", worker_mem="1GB") + + +def test_zip_zip(tmpdir): + ds = datablock_3d( + np.tile(np.arange(1111, 1121), 15).reshape(15, 5, 2) * 1e-7, + variable="tas", + x="lon", + x_start=-70, + y="lat", + y_start=45, + as_dataset=True, + ) + xs.save_to_zarr(ds, Path(tmpdir) / "test.zarr") + xs.io.zip_directory( + Path(tmpdir) / "test.zarr", Path(tmpdir) / "test.zarr.zip", delete=True + ) + assert not (Path(tmpdir) / "test.zarr").exists() + + with xr.open_zarr(Path(tmpdir) / "test.zarr.zip") as ds2: + assert ds2.equals(ds) + + xs.io.unzip_directory(Path(tmpdir) / "test.zarr.zip", Path(tmpdir) / "test2.zarr") + with xr.open_zarr(Path(tmpdir) / "test2.zarr") as ds3: + assert ds3.equals(ds) From b1424c2a3eebe0a6d4ccc883a19ede6ef203092c Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 09:18:27 -0500 Subject: [PATCH 04/39] maybe fix lint --- src/xscen/data/fr/LC_MESSAGES/xscen.po | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.po b/src/xscen/data/fr/LC_MESSAGES/xscen.po index 269ebed3..be7efb40 100644 --- a/src/xscen/data/fr/LC_MESSAGES/xscen.po +++ b/src/xscen/data/fr/LC_MESSAGES/xscen.po @@ -19,8 +19,8 @@ msgstr "Description" msgid "Units" msgstr "Unités" -msgid "Content" -msgstr "Contenu" - msgid "Global attributes" msgstr "Attributs globaux" + +msgid "Content" +msgstr "Contenu" From 2876f4abd0349b8278413b5126bf797f6df912b3 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 09:18:54 -0500 Subject: [PATCH 05/39] maybe fix lint pt2 --- src/xscen/data/fr/LC_MESSAGES/xscen.mo | Bin 1015 -> 1015 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.mo b/src/xscen/data/fr/LC_MESSAGES/xscen.mo index 3821b17700b0108cae356d9251107847dc4a137d..51b5812af3b98854aae679b7238a00236247820d 100644 GIT binary patch delta 26 hcmey){+)fp7e-z~T|*-U14}DI3ta=#&5TUn7y)?f2ipJu delta 26 hcmey){+)fp7e-zKT>~QpLlY}Q6I}z7&5TUn7y)>32hacj From 4c0b4dd3c71404ab05204c8fa16be01a940bb338 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 09:42:22 -0500 Subject: [PATCH 06/39] fix some tests --- tests/test_testing.py | 5 +++-- tests/test_utils.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index 74006677..8bec8021 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -10,7 +10,9 @@ class TestPublish: @pytest.mark.parametrize("fmt", ["md", "rst"]) def test_normal(self, fmt): out = xs.testing.publish_release_notes( - fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") + fmt, + changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"), + latest=False, ) if fmt == "md": assert out.startswith("# Changelog\n\n") @@ -42,7 +44,6 @@ def test_file(self, tmpdir): def test_latest(self, tmpdir, latest): out = xs.testing.publish_release_notes( "md", - file=tmpdir / "foo.md", changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"), latest=latest, ) diff --git a/tests/test_utils.py b/tests/test_utils.py index da978353..9b49b6c3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -71,7 +71,7 @@ class TestDateParser: ("2001", True, "datetime", pd.Timestamp("2001-12-31 23:59:59")), ("150004", True, "datetime", pd.Timestamp("1500-04-30 23:59:59")), ("31231212", None, "datetime", pd.Timestamp("3123-12-12")), - ("2001-07-08", None, "period", pd.Period("2001-07-08", "H")), + ("2001-07-08", None, "period", pd.Period("2001-07-08", "h")), (pd.Timestamp("1993-05-20T12:07"), None, "str", "1993-05-20"), ( cftime.Datetime360Day(1981, 2, 30), @@ -94,7 +94,7 @@ class TestDateParser: ("abc", None, "datetime", pd.Timestamp("NaT")), ("", True, "datetime", pd.Timestamp("NaT")), ( - pd.Period("2001-07-08", "H"), + pd.Period("2001-07-08", "h"), None, "datetime", pd.Timestamp("2001-07-08"), From d15e740f7f4e2a3747154d128e12a6df640e042d Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 10:18:29 -0500 Subject: [PATCH 07/39] try with previous flox version --- environment-dev.yml | 2 +- environment.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 35dd5d2a..44704ece 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -11,7 +11,7 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask - - flox + - flox !=0.9.14 - fsspec - geopandas - h5netcdf diff --git a/environment.yml b/environment.yml index b19c78ea..f4c76e82 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask - - flox + - flox !=0.9.14 - fsspec - geopandas - h5netcdf diff --git a/pyproject.toml b/pyproject.toml index 20d61b7c..3bf7347d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dependencies = [ "cf_xarray >=0.7.6", "clisops >=0.10", "dask", - "flox", + "flox !=0.9.14", "fsspec", "geopandas", "h5netcdf", From fa8206d5670b892fc4a7ac1fcab8a781bb29e046 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 10:32:24 -0500 Subject: [PATCH 08/39] pin dask --- environment-dev.yml | 4 ++-- environment.yml | 4 ++-- pyproject.toml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 44704ece..917f1b4b 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -10,8 +10,8 @@ dependencies: - cftime - cf_xarray >=0.7.6 - clisops >=0.10 - - dask - - flox !=0.9.14 + - dask !=2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - flox !=0.9.14 # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed - fsspec - geopandas - h5netcdf diff --git a/environment.yml b/environment.yml index f4c76e82..5b444878 100644 --- a/environment.yml +++ b/environment.yml @@ -10,8 +10,8 @@ dependencies: - cftime - cf_xarray >=0.7.6 - clisops >=0.10 - - dask - - flox !=0.9.14 + - dask !=2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - flox !=0.9.14 # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed - fsspec - geopandas - h5netcdf diff --git a/pyproject.toml b/pyproject.toml index 3bf7347d..ed4b5f01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,8 +43,8 @@ dependencies = [ "cftime", "cf_xarray >=0.7.6", "clisops >=0.10", - "dask", - "flox !=0.9.14", + "dask !=2024.11", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + "flox !=0.9.14", # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed "fsspec", "geopandas", "h5netcdf", From 02f0d81ca2fe4aacc0e0614ec408ee60a9f60b60 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 10:44:22 -0500 Subject: [PATCH 09/39] more explicit pins --- environment-dev.yml | 2 +- environment.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 917f1b4b..5c1fb63a 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -10,7 +10,7 @@ dependencies: - cftime - cf_xarray >=0.7.6 - clisops >=0.10 - - dask !=2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - dask !=2024.11.0, !=2024.11.1 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - flox !=0.9.14 # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed - fsspec - geopandas diff --git a/environment.yml b/environment.yml index 5b444878..92ba2025 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,7 @@ dependencies: - cftime - cf_xarray >=0.7.6 - clisops >=0.10 - - dask !=2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - dask !=2024.11.0, !=2024.11.1 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - flox !=0.9.14 # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed - fsspec - geopandas diff --git a/pyproject.toml b/pyproject.toml index ed4b5f01..d6b5285b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "cftime", "cf_xarray >=0.7.6", "clisops >=0.10", - "dask !=2024.11", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + "dask !=2024.11.0, !=2024.11.1", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 "flox !=0.9.14", # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed "fsspec", "geopandas", From 0081ead6450150c09572e3abb0b3136e21280ddf Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 12 Nov 2024 11:09:22 -0500 Subject: [PATCH 10/39] upd changelog --- CHANGELOG.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3209d695..290ca20b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,35 @@ Changelog ========= +v0.11.0 (unreleased) +-------------------- +Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* N/A + +Breaking changes +^^^^^^^^^^^^^^^^ +* ``xs.utils.publish_release_notes`` and ``xs.utils.show_versions`` have been moved to ``xs.testing``. (:pull:`492`). + +Bug fixes +^^^^^^^^^ +* Added a missing library (``openpyxl``) to the requirements. (:pull:`492`). +* Fixed a bug in ``xs.io.subset_maxsize`` where the function would drop the last year. (:pull:`492`). +* Fixed a bug in ``xs.io.clean_incomplete`` where the `.zmetadata` file was not removed. (:pull:`492`). +* Fixed a bug in the saving of datasets where encoding was sometimes not applied, resulting for example in rechunking not being respected. (:pull:`492`). +* Fixed multiple bugs in ``xs.io.save_to_zarr`` with `mode='a'`. (:pull:`492`). +* Fixed a few minor bugs in ``xs.io.save_to_table``. (:pull:`492`). + +Internal changes +^^^^^^^^^^^^^^^^ +* Added a new parameter `latest` to ``xs.testing.publish_release_notes`` to only print the latest release notes. (:pull:`492`). +* The estimation method in ``xs.io.estimate_chunks`` has been improved. (:pull:`492`). +* A new parameter `incomplete` has been added to ``xs.io.clean_incomplete`` to remove incomplete variables. (:pull:`492`). +* Continued work on adding tests. (:pull:`492`). + + v0.10.1 (2024-11-04) -------------------- Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`). From 311965f83fc2017308ba8312f38f5216526d3c78 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Thu, 14 Nov 2024 13:41:21 -0500 Subject: [PATCH 11/39] Update environment-dev.yml --- environment-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index 5c1fb63a..7d5d664b 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -11,7 +11,7 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask !=2024.11.0, !=2024.11.1 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - - flox !=0.9.14 # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed + - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas - h5netcdf From ffb66c143ad5056981220e09ab2b8ff5eb2bee99 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Thu, 14 Nov 2024 13:41:26 -0500 Subject: [PATCH 12/39] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 92ba2025..c1d18229 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask !=2024.11.0, !=2024.11.1 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - - flox !=0.9.14 # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed + - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas - h5netcdf From 45ba4e8bfefbeb62edf7d00f8a3f71be406767ab Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Thu, 14 Nov 2024 13:41:31 -0500 Subject: [PATCH 13/39] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d6b5285b..29b66e68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dependencies = [ "cf_xarray >=0.7.6", "clisops >=0.10", "dask !=2024.11.0, !=2024.11.1", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - "flox !=0.9.14", # FIXME: Remove this line once _datetime_nanmin() in flox.xrutils is fixed + "flox !=0.9.14", # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. "fsspec", "geopandas", "h5netcdf", From a085e79a3afdbb852736acc779127ea627df3928 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:09:01 -0500 Subject: [PATCH 14/39] Update environment-dev.yml --- environment-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index 7d5d664b..92df31aa 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -10,7 +10,7 @@ dependencies: - cftime - cf_xarray >=0.7.6 - clisops >=0.10 - - dask !=2024.11.0, !=2024.11.1 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - dask >=2024.8.1,<2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas From abc2a40e0175238055d474a9a6848f971d14da16 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:09:21 -0500 Subject: [PATCH 15/39] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index c1d18229..ad2a08f4 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,7 @@ dependencies: - cftime - cf_xarray >=0.7.6 - clisops >=0.10 - - dask !=2024.11.0, !=2024.11.1 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - dask >=2024.8.1,<2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas From d0fb03d4ea2d441644e556e7b76dd7ea0fdadb74 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:09:44 -0500 Subject: [PATCH 16/39] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 29b66e68..8ed0a1d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "cftime", "cf_xarray >=0.7.6", "clisops >=0.10", - "dask !=2024.11.0, !=2024.11.1", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + "dask >=2024.8.1,<2024.11", # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 "flox !=0.9.14", # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. "fsspec", "geopandas", From e7e98532984d3c23767549bae7228689ca8df763 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 15 Nov 2024 10:24:48 -0500 Subject: [PATCH 17/39] open_dataset to nimbus --- docs/notebooks/4_ensembles.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index dad0407f..86a11809 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -40,7 +40,8 @@ "metadata": {}, "outputs": [], "source": [ - "from xclim.testing import open_dataset\n", + "import xarray as xr\n", + "from xclim.testing.utils import nimbus\n", "\n", "import xscen as xs\n", "\n", @@ -53,7 +54,8 @@ "}\n", "\n", "for d in datasets:\n", - " ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n", + " file = nimbus().fetch(datasets[d])\n", + " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", " op=\"mean\",\n", From 40f6075df1a165b8ab6bf170669761dd82e0a600 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 15 Nov 2024 10:38:08 -0500 Subject: [PATCH 18/39] more nimbus --- tests/test_ensembles.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index e21a409a..ce1e5532 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -11,20 +11,12 @@ import xesmf as xe except ImportError: xe = None -# temp fix for changes to xclim-testdata -from functools import partial -from xclim.testing import open_dataset from xclim.testing.helpers import test_timeseries as timeseries +from xclim.testing.utils import nimbus import xscen as xs -# FIXME: Remove if-else when updating minimum xclim version to 0.53 -if Version(xc.__version__) < Version("0.53.0"): - # Hack to revert to old testdata with old xclim - open_dataset = partial(open_dataset, branch="v2023.12.14") - - LOGGER = logging.getLogger(__name__) @@ -1103,7 +1095,7 @@ def test_build_partition_data(self, samplecat, tmp_path): class TestReduceEnsemble: def test_with_criteria(self): - ds = open_dataset("EnsembleReduce/TestEnsReduceCriteria.nc") + ds = xr.open_dataset(nimbus().fetch("EnsembleReduce/TestEnsReduceCriteria.nc")) selected, clusters, fig_data = xs.reduce_ensemble( ds["data"], method="kmeans", max_clusters=3 ) @@ -1122,7 +1114,9 @@ def test_without_criteria(self, horizon): "CNRM-CM5": "EnsembleStats/BCCAQv2+ANUSPLIN300_CNRM-CM5_historical+rcp45_r1i1p1_1970-2050_tg_mean_YS.nc", } for d in datasets: - ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4)) + ds = xr.open_dataset(nimbus().fetch(datasets[d])).isel( + lon=slice(0, 4), lat=slice(0, 4) + ) ds = xs.climatological_op( ds, op="mean", @@ -1146,7 +1140,7 @@ def test_without_criteria(self, horizon): assert fig_data == {} def test_errors(self): - ds = open_dataset("EnsembleReduce/TestEnsReduceCriteria.nc") + ds = xr.open_dataset(nimbus().fetch("EnsembleReduce/TestEnsReduceCriteria.nc")) with pytest.raises( ValueError, match="Data must have a 'horizon' dimension to be subsetted." ): From 1a8e0294b25e752185fcd6cb67e0ccc5ba742962 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 15 Nov 2024 12:29:51 -0500 Subject: [PATCH 19/39] try to fix bad nimbus url --- docs/notebooks/4_ensembles.ipynb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 86a11809..c2ef14c4 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -40,11 +40,15 @@ "metadata": {}, "outputs": [], "source": [ + "# from xclim.testing.utils import nimbus\n", + "import urllib\n", + "\n", + "import pooch\n", "import xarray as xr\n", - "from xclim.testing.utils import nimbus\n", "\n", "import xscen as xs\n", "\n", + "url = \"https://github.com/Ouranosinc/xclim-testdata/tree/main/data/\"\n", "datasets = {\n", " \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n", " \"BNU-ESM\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n", @@ -54,7 +58,8 @@ "}\n", "\n", "for d in datasets:\n", - " file = nimbus().fetch(datasets[d])\n", + " # file = nimbus().fetch(datasets[d]) # There is currently a bug in xclim 0.53.2\n", + " file = pooch.retrieve(urllib.parse.urljoin(url, datasets[d]), known_hash=None)\n", " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", From 184852d3b2eadc1e14cb1194cdb14f3a7ec49f16 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 15 Nov 2024 13:28:19 -0500 Subject: [PATCH 20/39] revert change to nb --- docs/notebooks/4_ensembles.ipynb | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index c2ef14c4..fe595372 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -40,15 +40,13 @@ "metadata": {}, "outputs": [], "source": [ - "# from xclim.testing.utils import nimbus\n", - "import urllib\n", - "\n", - "import pooch\n", "import xarray as xr\n", "\n", + "# from xclim.testing.utils import nimbus\n", + "from xclim.testing import open_dataset\n", + "\n", "import xscen as xs\n", "\n", - "url = \"https://github.com/Ouranosinc/xclim-testdata/tree/main/data/\"\n", "datasets = {\n", " \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n", " \"BNU-ESM\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n", @@ -59,8 +57,8 @@ "\n", "for d in datasets:\n", " # file = nimbus().fetch(datasets[d]) # There is currently a bug in xclim 0.53.2\n", - " file = pooch.retrieve(urllib.parse.urljoin(url, datasets[d]), known_hash=None)\n", - " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", + " # ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", + " ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", " op=\"mean\",\n", From e7dd5c335b5f6e98d560c8eaffb79a974196a2d2 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 18 Nov 2024 12:07:50 -0500 Subject: [PATCH 21/39] address comments from code review --- src/xscen/io.py | 36 ++++++++++++----- src/xscen/testing.py | 89 +++++++++-------------------------------- tests/test_ensembles.py | 1 - tests/test_io.py | 1 + 4 files changed, 44 insertions(+), 83 deletions(-) diff --git a/src/xscen/io.py b/src/xscen/io.py index d94d8d83..7b451e1e 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -1,6 +1,7 @@ """Input/Output functions for xscen.""" import datetime +import json import logging import os import shutil as sh @@ -252,7 +253,7 @@ def clean_incomplete( Name of variables that were completed. All other variables (except coordinates) will be removed. Use either `complete` or `incomplete`, not both. incomplete : sequence of strings, optional - Name of variables that should be removed. + Name of variables that should be removed. Coordinates and dimensions will never be removed through this function. Use either `complete` or `incomplete`, not both. Returns @@ -261,6 +262,22 @@ def clean_incomplete( """ path = Path(path) + def _del_var(pth): + msg = f"Removing {pth} from disk" + logger.warning(msg) + sh.rmtree(pth) + + # Update the .zmetadata file + with (Path(path) / ".zmetadata").open("r") as f: + metadata = json.load(f) + [ + metadata["metadata"].pop(k) + for k in list(metadata["metadata"].keys()) + if k.startswith(f"{pth.name}/.") + ] + with (Path(path) / ".zmetadata").open("w") as f: + json.dump(metadata, f, indent=2) + if complete is not None and incomplete is not None: raise ValueError("Use either `complete` or `incomplete`, not both.") @@ -270,20 +287,17 @@ def clean_incomplete( for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name not in complete: - msg = f"Removing {fold} from disk" - logger.warning(msg) - sh.rmtree(fold) + _del_var(fold) elif incomplete is not None: + with xr.open_zarr(path) as ds: + incomplete = [ + v for v in incomplete if (v not in ds.coords) and (v not in ds.dims) + ] + for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name in incomplete: - msg = f"Removing {fold} from disk" - logger.warning(msg) - sh.rmtree(fold) - - # Remove .zmetadata to avoid issues with zarr and xarray - if (path / ".zmetadata").exists(): - Path.unlink(path / ".zmetadata") + _del_var(fold) def _coerce_attrs(attrs): diff --git a/src/xscen/testing.py b/src/xscen/testing.py index a04cee31..c68e5264 100644 --- a/src/xscen/testing.py +++ b/src/xscen/testing.py @@ -1,5 +1,6 @@ """Testing utilities for xscen.""" +import importlib.metadata import os import re from io import StringIO @@ -357,77 +358,23 @@ def show_versions( ------- str or None """ - if deps is None: - deps = [ - "xscen", - # Main packages - "cartopy", - "cftime", - "cf_xarray", - "clisops", - "dask", - "flox", - "fsspec", - "geopandas", - "h5netcdf", - "h5py", - "intake_esm", - "matplotlib", - "netCDF4", - "numcodecs", - "numpy", - "pandas", - "parse", - "pyyaml", - "rechunker", - "scipy", - "shapely", - "sparse", - "toolz", - "xarray", - "xclim", - "xesmf", - "zarr", - # Opt - "nc-time-axis", - "pyarrow", - # Dev - "babel", - "black", - "blackdoc", - "bump-my-version", - "coverage", - "coveralls", - "flake8", - "flake8-rst-docstrings", - "ipykernel", - "ipython", - "isort", - "jupyter_client", - "nbsphinx", - "nbval", - "pandoc", - "pooch", - "pre-commit", - "pytest", - "pytest-cov", - "ruff", - "setuptools", - "setuptools-scm", - "sphinx", - "sphinx-autoapi", - "sphinx-rtd-theme", - "sphinxcontrib-napoleon", - "sphinx-codeautolink", - "sphinx-copybutton", - "sphinx-mdinclude", - "watchdog", - "xdoctest", - "tox", - "build", - "wheel", - "pip", - "flake8-alphabetize", + + def _get_xscen_dependencies(): + xscen_metadata = importlib.metadata.metadata("xscen") + requires = xscen_metadata.get_all("Requires-Dist") + requires = [ + req.split("[")[0] + .split(";")[0] + .split(">")[0] + .split("<")[0] + .split("=")[0] + .split("!")[0] + for req in requires ] + return ["xscen"] + requires + + if deps is None: + deps = _get_xscen_dependencies() + return _show_versions(file=file, deps=deps) diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index ce1e5532..43dc383e 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -5,7 +5,6 @@ import pytest import xarray as xr import xclim as xc -from packaging.version import Version try: import xesmf as xe diff --git a/tests/test_io.py b/tests/test_io.py index 6613eaf4..87e46a4c 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -145,6 +145,7 @@ def test_complete(self, tmpdir, which): xs.io.clean_incomplete(Path(tmpdir) / "test.zarr", incomplete=["pr"]) assert (Path(tmpdir) / "test.zarr/tas").exists() assert not (Path(tmpdir) / "test.zarr/pr").exists() + assert (Path(tmpdir) / "test.zarr/.zmetadata").exists() ds2 = xr.open_zarr(Path(tmpdir) / "test.zarr") assert "pr" not in ds2 From f31b798c27288f3abc587703a24e6664ca15e96f Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 18 Nov 2024 15:06:37 -0500 Subject: [PATCH 22/39] maybe fix nimbus --- docs/notebooks/4_ensembles.ipynb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index fe595372..97b444dd 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -41,9 +41,7 @@ "outputs": [], "source": [ "import xarray as xr\n", - "\n", - "# from xclim.testing.utils import nimbus\n", - "from xclim.testing import open_dataset\n", + "from xclim.testing.utils import nimbus\n", "\n", "import xscen as xs\n", "\n", @@ -56,9 +54,10 @@ "}\n", "\n", "for d in datasets:\n", - " # file = nimbus().fetch(datasets[d]) # There is currently a bug in xclim 0.53.2\n", - " # ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", - " ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n", + " file = nimbus(repo=\"https://github.com/Ouranosinc/xclim-testdata\").fetch(\n", + " datasets[d]\n", + " )\n", + " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", " op=\"mean\",\n", From 7b9f8adb40e9372ed1d423682803884c22ba8e30 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Mon, 18 Nov 2024 15:19:04 -0500 Subject: [PATCH 23/39] Update docs/notebooks/4_ensembles.ipynb --- docs/notebooks/4_ensembles.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 97b444dd..2ec88b06 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -54,7 +54,7 @@ "}\n", "\n", "for d in datasets:\n", - " file = nimbus(repo=\"https://github.com/Ouranosinc/xclim-testdata\").fetch(\n", + " file = nimbus(repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\").fetch(\n", " datasets[d]\n", " )\n", " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", From 53a8aeb787e7a764089582e6447afbf68951690e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 20:19:28 +0000 Subject: [PATCH 24/39] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/notebooks/4_ensembles.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 2ec88b06..75d28732 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -54,9 +54,9 @@ "}\n", "\n", "for d in datasets:\n", - " file = nimbus(repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\").fetch(\n", - " datasets[d]\n", - " )\n", + " file = nimbus(\n", + " repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n", + " ).fetch(datasets[d])\n", " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", From ebf7deda4dc992d2641a258d81e2d349d6ea4ba5 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 13:00:05 -0500 Subject: [PATCH 25/39] try without zmetadata --- src/xscen/io.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/xscen/io.py b/src/xscen/io.py index 7b451e1e..899605af 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -267,16 +267,16 @@ def _del_var(pth): logger.warning(msg) sh.rmtree(pth) - # Update the .zmetadata file - with (Path(path) / ".zmetadata").open("r") as f: - metadata = json.load(f) - [ - metadata["metadata"].pop(k) - for k in list(metadata["metadata"].keys()) - if k.startswith(f"{pth.name}/.") - ] - with (Path(path) / ".zmetadata").open("w") as f: - json.dump(metadata, f, indent=2) + # # Update the .zmetadata file + # with (Path(path) / ".zmetadata").open("r") as f: + # metadata = json.load(f) + # [ + # metadata["metadata"].pop(k) + # for k in list(metadata["metadata"].keys()) + # if k.startswith(f"{pth.name}/.") + # ] + # with (Path(path) / ".zmetadata").open("w") as f: + # json.dump(metadata, f, indent=2) if complete is not None and incomplete is not None: raise ValueError("Use either `complete` or `incomplete`, not both.") From c676877aed91178caccd6cdd8c7228a12651283b Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 13:29:53 -0500 Subject: [PATCH 26/39] revert --- src/xscen/io.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/xscen/io.py b/src/xscen/io.py index 899605af..7b451e1e 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -267,16 +267,16 @@ def _del_var(pth): logger.warning(msg) sh.rmtree(pth) - # # Update the .zmetadata file - # with (Path(path) / ".zmetadata").open("r") as f: - # metadata = json.load(f) - # [ - # metadata["metadata"].pop(k) - # for k in list(metadata["metadata"].keys()) - # if k.startswith(f"{pth.name}/.") - # ] - # with (Path(path) / ".zmetadata").open("w") as f: - # json.dump(metadata, f, indent=2) + # Update the .zmetadata file + with (Path(path) / ".zmetadata").open("r") as f: + metadata = json.load(f) + [ + metadata["metadata"].pop(k) + for k in list(metadata["metadata"].keys()) + if k.startswith(f"{pth.name}/.") + ] + with (Path(path) / ".zmetadata").open("w") as f: + json.dump(metadata, f, indent=2) if complete is not None and incomplete is not None: raise ValueError("Use either `complete` or `incomplete`, not both.") From c3169938ccd2602ccfc33a95e7e6676583ada67f Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 13:58:45 -0500 Subject: [PATCH 27/39] comment it all --- src/xscen/io.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/xscen/io.py b/src/xscen/io.py index 7b451e1e..9d7a28c3 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -267,16 +267,16 @@ def _del_var(pth): logger.warning(msg) sh.rmtree(pth) - # Update the .zmetadata file - with (Path(path) / ".zmetadata").open("r") as f: - metadata = json.load(f) - [ - metadata["metadata"].pop(k) - for k in list(metadata["metadata"].keys()) - if k.startswith(f"{pth.name}/.") - ] - with (Path(path) / ".zmetadata").open("w") as f: - json.dump(metadata, f, indent=2) + # # Update the .zmetadata file + # with (Path(path) / ".zmetadata").open("r") as f: + # metadata = json.load(f) + # [ + # metadata["metadata"].pop(k) + # for k in list(metadata["metadata"].keys()) + # if k.startswith(f"{pth.name}/.") + # ] + # with (Path(path) / ".zmetadata").open("w") as f: + # json.dump(metadata, f, indent=2) if complete is not None and incomplete is not None: raise ValueError("Use either `complete` or `incomplete`, not both.") @@ -290,10 +290,10 @@ def _del_var(pth): _del_var(fold) elif incomplete is not None: - with xr.open_zarr(path) as ds: - incomplete = [ - v for v in incomplete if (v not in ds.coords) and (v not in ds.dims) - ] + # with xr.open_zarr(path) as ds: + # incomplete = [ + # v for v in incomplete if (v not in ds.coords) and (v not in ds.dims) + # ] for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name in incomplete: From 2269cb1e4614e8c1e2478d7dcf189b3641d0b101 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 14:50:54 -0500 Subject: [PATCH 28/39] remove subfunction --- src/xscen/io.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/xscen/io.py b/src/xscen/io.py index 9d7a28c3..48a5bd1b 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -262,21 +262,21 @@ def clean_incomplete( """ path = Path(path) - def _del_var(pth): - msg = f"Removing {pth} from disk" - logger.warning(msg) - sh.rmtree(pth) - - # # Update the .zmetadata file - # with (Path(path) / ".zmetadata").open("r") as f: - # metadata = json.load(f) - # [ - # metadata["metadata"].pop(k) - # for k in list(metadata["metadata"].keys()) - # if k.startswith(f"{pth.name}/.") - # ] - # with (Path(path) / ".zmetadata").open("w") as f: - # json.dump(metadata, f, indent=2) + # def _del_var(pth): + # msg = f"Removing {pth} from disk" + # logger.warning(msg) + # sh.rmtree(pth) + # + # # Update the .zmetadata file + # with (Path(path) / ".zmetadata").open("r") as f: + # metadata = json.load(f) + # [ + # metadata["metadata"].pop(k) + # for k in list(metadata["metadata"].keys()) + # if k.startswith(f"{pth.name}/.") + # ] + # with (Path(path) / ".zmetadata").open("w") as f: + # json.dump(metadata, f, indent=2) if complete is not None and incomplete is not None: raise ValueError("Use either `complete` or `incomplete`, not both.") @@ -287,7 +287,9 @@ def _del_var(pth): for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name not in complete: - _del_var(fold) + msg = f"Removing {fold} from disk" + logger.warning(msg) + sh.rmtree(fold) elif incomplete is not None: # with xr.open_zarr(path) as ds: @@ -297,7 +299,9 @@ def _del_var(pth): for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name in incomplete: - _del_var(fold) + msg = f"Removing {fold} from disk" + logger.warning(msg) + sh.rmtree(fold) def _coerce_attrs(attrs): From 5919bd4165f95e20ff4b2b284aefb7e49d5189ac Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 15:11:05 -0500 Subject: [PATCH 29/39] more comments --- src/xscen/testing.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/xscen/testing.py b/src/xscen/testing.py index c68e5264..733d11dc 100644 --- a/src/xscen/testing.py +++ b/src/xscen/testing.py @@ -1,6 +1,6 @@ """Testing utilities for xscen.""" -import importlib.metadata +# import importlib.metadata import os import re from io import StringIO @@ -358,23 +358,23 @@ def show_versions( ------- str or None """ - - def _get_xscen_dependencies(): - xscen_metadata = importlib.metadata.metadata("xscen") - requires = xscen_metadata.get_all("Requires-Dist") - requires = [ - req.split("[")[0] - .split(";")[0] - .split(">")[0] - .split("<")[0] - .split("=")[0] - .split("!")[0] - for req in requires - ] - - return ["xscen"] + requires - - if deps is None: - deps = _get_xscen_dependencies() - - return _show_versions(file=file, deps=deps) + # def _get_xscen_dependencies(): + # xscen_metadata = importlib.metadata.metadata("xscen") + # requires = xscen_metadata.get_all("Requires-Dist") + # requires = [ + # req.split("[")[0] + # .split(";")[0] + # .split(">")[0] + # .split("<")[0] + # .split("=")[0] + # .split("!")[0] + # for req in requires + # ] + # + # return ["xscen"] + requires + # + # if deps is None: + # deps = _get_xscen_dependencies() + # + # return _show_versions(file=file, deps=deps) + pass From 779a001463722e030e73941eac3f0a40e513e59c Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 15:41:56 -0500 Subject: [PATCH 30/39] try to limit esmf --- environment-dev.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index 92df31aa..61d23e65 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -11,6 +11,7 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask >=2024.8.1,<2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 + - esmpy <8.7.0 - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas @@ -32,7 +33,7 @@ dependencies: - toolz - xarray >=2023.11.0, !=2024.6.0 - xclim >=0.53.2, <0.54 - - xesmf >=0.7 + - xesmf >=0.7, <0.8.8 - zarr >=2.13 # Opt - nc-time-axis >=1.3.1 From 93389c110b5703f030ed1c34d8c6bf615459d2a0 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 16:06:01 -0500 Subject: [PATCH 31/39] try without xesmf pin --- environment-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 61d23e65..20be0122 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -11,7 +11,7 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask >=2024.8.1,<2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - - esmpy <8.7.0 + - esmpy !=8.7.0 # FIXME: 8.7.0 produces a segfault in xesmf - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas @@ -33,7 +33,7 @@ dependencies: - toolz - xarray >=2023.11.0, !=2024.6.0 - xclim >=0.53.2, <0.54 - - xesmf >=0.7, <0.8.8 + - xesmf >=0.7 - zarr >=2.13 # Opt - nc-time-axis >=1.3.1 From b09da876bf2e406247d9b004684849e7989b1b74 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 16:20:04 -0500 Subject: [PATCH 32/39] switch pins --- environment-dev.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 20be0122..56345653 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -11,7 +11,6 @@ dependencies: - cf_xarray >=0.7.6 - clisops >=0.10 - dask >=2024.8.1,<2024.11 # FIXME: https://github.com/Ouranosinc/xclim/issues/1992 - - esmpy !=8.7.0 # FIXME: 8.7.0 produces a segfault in xesmf - flox !=0.9.14 # FIXME: 0.9.14 is a broken version. This pin could be removed eventually. - fsspec - geopandas @@ -33,7 +32,7 @@ dependencies: - toolz - xarray >=2023.11.0, !=2024.6.0 - xclim >=0.53.2, <0.54 - - xesmf >=0.7 + - xesmf >=0.7, <0.8.8 # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs. - zarr >=2.13 # Opt - nc-time-axis >=1.3.1 From a159f72716a96566ae2eef286141eb30605187bc Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 16:23:06 -0500 Subject: [PATCH 33/39] uncomment tests --- src/xscen/io.py | 39 +++++++++++++++++++-------------------- src/xscen/testing.py | 42 +++++++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/src/xscen/io.py b/src/xscen/io.py index 48a5bd1b..ed7df22c 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -262,24 +262,9 @@ def clean_incomplete( """ path = Path(path) - # def _del_var(pth): - # msg = f"Removing {pth} from disk" - # logger.warning(msg) - # sh.rmtree(pth) - # - # # Update the .zmetadata file - # with (Path(path) / ".zmetadata").open("r") as f: - # metadata = json.load(f) - # [ - # metadata["metadata"].pop(k) - # for k in list(metadata["metadata"].keys()) - # if k.startswith(f"{pth.name}/.") - # ] - # with (Path(path) / ".zmetadata").open("w") as f: - # json.dump(metadata, f, indent=2) - if complete is not None and incomplete is not None: raise ValueError("Use either `complete` or `incomplete`, not both.") + v_to_rm = [] if complete is not None: with xr.open_zarr(path) as ds: @@ -287,22 +272,36 @@ def clean_incomplete( for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name not in complete: + v_to_rm.append(fold.name) msg = f"Removing {fold} from disk" logger.warning(msg) sh.rmtree(fold) elif incomplete is not None: - # with xr.open_zarr(path) as ds: - # incomplete = [ - # v for v in incomplete if (v not in ds.coords) and (v not in ds.dims) - # ] + with xr.open_zarr(path) as ds: + incomplete = [ + v for v in incomplete if (v not in ds.coords) and (v not in ds.dims) + ] for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name in incomplete: + v_to_rm.append(fold.name) msg = f"Removing {fold} from disk" logger.warning(msg) sh.rmtree(fold) + # Update the .zmetadata file + with (path / ".zmetadata").open("r") as f: + metadata = json.load(f) + for v in v_to_rm: + [ + metadata["metadata"].pop(k) + for k in list(metadata["metadata"].keys()) + if k.startswith(f"{v}/.") + ] + with (path / ".zmetadata").open("w") as f: + json.dump(metadata, f, indent=2) + def _coerce_attrs(attrs): """Ensure no funky objects in attrs.""" diff --git a/src/xscen/testing.py b/src/xscen/testing.py index 733d11dc..c68e5264 100644 --- a/src/xscen/testing.py +++ b/src/xscen/testing.py @@ -1,6 +1,6 @@ """Testing utilities for xscen.""" -# import importlib.metadata +import importlib.metadata import os import re from io import StringIO @@ -358,23 +358,23 @@ def show_versions( ------- str or None """ - # def _get_xscen_dependencies(): - # xscen_metadata = importlib.metadata.metadata("xscen") - # requires = xscen_metadata.get_all("Requires-Dist") - # requires = [ - # req.split("[")[0] - # .split(";")[0] - # .split(">")[0] - # .split("<")[0] - # .split("=")[0] - # .split("!")[0] - # for req in requires - # ] - # - # return ["xscen"] + requires - # - # if deps is None: - # deps = _get_xscen_dependencies() - # - # return _show_versions(file=file, deps=deps) - pass + + def _get_xscen_dependencies(): + xscen_metadata = importlib.metadata.metadata("xscen") + requires = xscen_metadata.get_all("Requires-Dist") + requires = [ + req.split("[")[0] + .split(";")[0] + .split(">")[0] + .split("<")[0] + .split("=")[0] + .split("!")[0] + for req in requires + ] + + return ["xscen"] + requires + + if deps is None: + deps = _get_xscen_dependencies() + + return _show_versions(file=file, deps=deps) From 5c40c98536e6f532fc5377d61a7ec8c4b5a0e7b7 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 16:29:23 -0500 Subject: [PATCH 34/39] add pins --- environment.yml | 2 +- pyproject.toml | 2 +- tests/test_testing.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index ad2a08f4..126cb271 100644 --- a/environment.yml +++ b/environment.yml @@ -32,7 +32,7 @@ dependencies: - toolz - xarray >=2023.11.0, !=2024.6.0 - xclim >=0.53.2, <0.54 - - xesmf >=0.7 + - xesmf >=0.7, <0.8.8 # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs. - zarr >=2.13 # To install from source - setuptools >=65.0.0 diff --git a/pyproject.toml b/pyproject.toml index 8ed0a1d6..8b3a62f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ docs = [ "sphinxcontrib-napoleon" ] extra = [ - "xesmf>=0.7" + "xesmf>=0.7, <0.8.8" # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs. ] all = ["xscen[dev]", "xscen[docs]", "xscen[extra]"] diff --git a/tests/test_testing.py b/tests/test_testing.py index 8bec8021..db95d242 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -59,6 +59,7 @@ def test_show_version(tmpdir): out = f.read() assert "xscen" in out assert "xclim" in out + assert "xesmf" in out assert "xarray" in out assert "numpy" in out assert "pandas" in out From 71f6dda80dbbf46b203ec42653ed8ef20f707961 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 19 Nov 2024 17:10:36 -0500 Subject: [PATCH 35/39] explicit nimbus function --- docs/notebooks/4_ensembles.ipynb | 33 ++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 75d28732..7c4585dd 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -34,6 +34,34 @@ "This tutorial will explore ensemble reduction (also known as ensemble selection) using `xscen`. This will use pre-computed annual mean temperatures from `xclim.testing`." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# from xclim.testing.utils import nimbus\n", + "from pathlib import Path\n", + "from urllib.parse import urljoin\n", + "\n", + "import pooch\n", + "from xclim.testing.utils import load_registry\n", + "\n", + "\n", + "def nimbus():\n", + " repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n", + " branch = \"v2024.8.23\"\n", + " remote = urljoin(urljoin(repo, branch), \"data\")\n", + " return pooch.create(\n", + " path=Path(pooch.os_cache(\"xclim-testdata\")),\n", + " base_url=remote,\n", + " version=branch,\n", + " version_dev=branch,\n", + " allow_updates=True,\n", + " registry=load_registry(branch=branch, repo=repo),\n", + " )" + ] + }, { "cell_type": "code", "execution_count": null, @@ -41,7 +69,6 @@ "outputs": [], "source": [ "import xarray as xr\n", - "from xclim.testing.utils import nimbus\n", "\n", "import xscen as xs\n", "\n", @@ -54,9 +81,7 @@ "}\n", "\n", "for d in datasets:\n", - " file = nimbus(\n", - " repo=\"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n", - " ).fetch(datasets[d])\n", + " file = nimbus().fetch(datasets[d])\n", " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", From 623ce8670192a1ace2fc850cabc9fcaa9a249732 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Wed, 20 Nov 2024 09:32:40 -0500 Subject: [PATCH 36/39] Update docs/notebooks/4_ensembles.ipynb Co-authored-by: Pascal Bourgault --- docs/notebooks/4_ensembles.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 7c4585dd..8567afe4 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -49,7 +49,7 @@ "\n", "\n", "def nimbus():\n", - " repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata\"\n", + " repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n", " branch = \"v2024.8.23\"\n", " remote = urljoin(urljoin(repo, branch), \"data\")\n", " return pooch.create(\n", From 4448bfad6008d01853c2e34c1b00b09fd83d5ad9 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 20 Nov 2024 11:09:46 -0500 Subject: [PATCH 37/39] add downloader --- docs/notebooks/4_ensembles.ipynb | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 8567afe4..3a0a281b 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -40,26 +40,25 @@ "metadata": {}, "outputs": [], "source": [ - "# from xclim.testing.utils import nimbus\n", - "from pathlib import Path\n", - "from urllib.parse import urljoin\n", + "# from pathlib import Path\n", + "# from urllib.parse import urljoin\n", "\n", - "import pooch\n", - "from xclim.testing.utils import load_registry\n", + "# import pooch\n", + "# from xclim.testing.utils import load_registry\n", "\n", "\n", - "def nimbus():\n", - " repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n", - " branch = \"v2024.8.23\"\n", - " remote = urljoin(urljoin(repo, branch), \"data\")\n", - " return pooch.create(\n", - " path=Path(pooch.os_cache(\"xclim-testdata\")),\n", - " base_url=remote,\n", - " version=branch,\n", - " version_dev=branch,\n", - " allow_updates=True,\n", - " registry=load_registry(branch=branch, repo=repo),\n", - " )" + "# def nimbus():\n", + "# repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n", + "# branch = \"v2024.8.23\"\n", + "# remote = urljoin(urljoin(repo, branch), \"data\")\n", + "# return pooch.create(\n", + "# path=Path(pooch.os_cache(\"xclim-testdata\")),\n", + "# base_url=remote,\n", + "# version=branch,\n", + "# version_dev=branch,\n", + "# allow_updates=True,\n", + "# registry=load_registry(branch=branch, repo=repo),\n", + "# )" ] }, { @@ -69,9 +68,12 @@ "outputs": [], "source": [ "import xarray as xr\n", + "from xclim.testing.utils import nimbus\n", "\n", "import xscen as xs\n", "\n", + "downloader = pooch.HTTPDownloader(headers={\"User-Agent\": \"agent\"})\n", + "\n", "datasets = {\n", " \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n", " \"BNU-ESM\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n", @@ -81,7 +83,7 @@ "}\n", "\n", "for d in datasets:\n", - " file = nimbus().fetch(datasets[d])\n", + " file = nimbus().fetch(datasets[d], downloader=downloader)\n", " ds = xr.open_dataset(file).isel(lon=slice(0, 4), lat=slice(0, 4))\n", " ds = xs.climatological_op(\n", " ds,\n", From fdf85d649d2b7b0dcac2232fa20ca665b7dedb46 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 20 Nov 2024 11:12:23 -0500 Subject: [PATCH 38/39] forgot pooch --- docs/notebooks/4_ensembles.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 3a0a281b..afdf9094 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -67,6 +67,7 @@ "metadata": {}, "outputs": [], "source": [ + "import pooch\n", "import xarray as xr\n", "from xclim.testing.utils import nimbus\n", "\n", From 55720c259afbf64ec4eaf53e8edbb6fb282c3ae7 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 20 Nov 2024 11:36:27 -0500 Subject: [PATCH 39/39] cleanup --- docs/notebooks/4_ensembles.ipynb | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index afdf9094..fa76d1ac 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -34,33 +34,6 @@ "This tutorial will explore ensemble reduction (also known as ensemble selection) using `xscen`. This will use pre-computed annual mean temperatures from `xclim.testing`." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# from pathlib import Path\n", - "# from urllib.parse import urljoin\n", - "\n", - "# import pooch\n", - "# from xclim.testing.utils import load_registry\n", - "\n", - "\n", - "# def nimbus():\n", - "# repo = \"https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/\"\n", - "# branch = \"v2024.8.23\"\n", - "# remote = urljoin(urljoin(repo, branch), \"data\")\n", - "# return pooch.create(\n", - "# path=Path(pooch.os_cache(\"xclim-testdata\")),\n", - "# base_url=remote,\n", - "# version=branch,\n", - "# version_dev=branch,\n", - "# allow_updates=True,\n", - "# registry=load_registry(branch=branch, repo=repo),\n", - "# )" - ] - }, { "cell_type": "code", "execution_count": null, @@ -73,7 +46,7 @@ "\n", "import xscen as xs\n", "\n", - "downloader = pooch.HTTPDownloader(headers={\"User-Agent\": \"agent\"})\n", + "downloader = pooch.HTTPDownloader(headers={\"User-Agent\": f\"xscen-{xs.__version__}\"})\n", "\n", "datasets = {\n", " \"ACCESS\": \"EnsembleStats/BCCAQv2+ANUSPLIN300_ACCESS1-0_historical+rcp45_r1i1p1_1950-2100_tg_mean_YS.nc\",\n",