From cfd42189f3eaff28de44737207bb7fcfc3a6e0b2 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Fri, 19 Jan 2024 15:36:27 -0500
Subject: [PATCH 01/37] add calendar

---
 xscen/ensembles.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 063e966a..aee4ced4 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 import xarray as xr
+import xclim as xc
 from xclim import ensembles
 
 from .config import parse_config
@@ -679,6 +680,7 @@ def build_partition_data(
     subset_kw: dict = None,
     regrid_kw: dict = None,
     indicators_kw: dict = None,
+    calendar_kw: dict = None,
     rename_dict: dict = None,
 ):
     """Get the input for the xclim partition functions.
@@ -688,7 +690,7 @@ def build_partition_data(
     (https://xclim.readthedocs.io/en/stable/api.html#uncertainty-partitioning).
     If the inputs have different grids,
     they have to be subsetted and regridded to a common grid/point.
-    Indicators can also be computed before combining the datasets.
+    Indicators can also be computed and calendar converted before combining the datasets.
 
 
     Parameters
@@ -708,6 +710,14 @@ def build_partition_data(
     indicators_kw:
         Arguments to pass to `xs.indicators.compute_indicators()`.
         All indicators have to be for the same frequency, in order to be put on a single time axis.
+    calendar_kw : dict, optional
+        Arguments to pass to `xclim.core.calendar.convert_calendar`.
+        If None, the smallest common calendar is chosen.
+        For example, a mixed input of “noleap” and “360_day” will default to “noleap”.
+        ‘default’ is the standard calendar using np.datetime64 objects (xarray’s “standard” with use_cftime=False).
+        This is the same behavior as `calendar` in xclim.create_ensemble.
+        For conversions involving '360_day', the align_on='date' option is used by default.
+        If False, no conversion is done.
     rename_dict:
         Dictionary to rename the dimensions from xscen names to xclim names.
         The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}.
@@ -727,11 +737,17 @@ def build_partition_data(
     # initialize dict
     subset_kw = subset_kw or {}
     regrid_kw = regrid_kw or {}
+    calendar_kw = calendar_kw or {}
 
     list_ds = []
+    calendars = []
     for ds in datasets:
         if subset_kw:
             ds = subset(ds, **subset_kw)
+            # clean coords that might not match exactly
+            for c in ["rlat", "rlon", "lat", "lon", "rotated_pole"]:
+                if c in ds.coords:
+                    ds = ds.drop_vars(c)
 
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
@@ -745,6 +761,13 @@ def build_partition_data(
             else:
                 ds = list(dict_ind.values())[0]
 
+        # get calendar of each dataset
+        if calendar_kw is None:
+            if "time" in ds.coords:
+                time = xr.decode_cf(ds).time
+                ds["time"] = time
+                calendars.append(xc.core.calendar.get_calendar(time))
+
         for dim in partition_dim:
             if f"cat:{dim}" in ds.attrs:
                 ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
@@ -752,7 +775,17 @@ def build_partition_data(
         if "source" in partition_dim:
             new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
             ds = ds.assign_coords(source=[new_source])
+
         list_ds.append(ds)
+
+    # convert calendars
+    if calendar_kw:
+        common_cal = xc.core.calendar.common_calendar(calendars, join="outer")
+        calendar_kw.setdefault("target", common_cal)
+        calendar_kw.setdefault("align_on", "date")
+        list_ds = [
+            xc.core.calendar.convert_calendar(ds, **calendar_kw) for ds in list_ds
+        ]
     ens = xr.merge(list_ds)
 
     rename_dict = rename_dict or {}

From 2dede98d6278b3cd690c3ced9f9bc2766761024f Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 25 Jan 2024 14:55:31 -0500
Subject: [PATCH 02/37] improve cal

---
 xscen/ensembles.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index aee4ced4..78917ef1 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -775,11 +775,12 @@ def build_partition_data(
         if "source" in partition_dim:
             new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
             ds = ds.assign_coords(source=[new_source])
-
+        ds = ds.chunk({"time": 5, "lat": 50, "lon": 50})
+        print(ds.chunks)
         list_ds.append(ds)
 
     # convert calendars
-    if calendar_kw:
+    if isinstance(calendar_kw, dict):
         common_cal = xc.core.calendar.common_calendar(calendars, join="outer")
         calendar_kw.setdefault("target", common_cal)
         calendar_kw.setdefault("align_on", "date")

From 48ed051ab47d6840b61d82a9e3a8ffa12e2b8d8d Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Fri, 26 Jan 2024 14:21:22 -0500
Subject: [PATCH 03/37] remove chunks

---
 xscen/ensembles.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 78917ef1..8569c832 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -775,8 +775,6 @@ def build_partition_data(
         if "source" in partition_dim:
             new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
             ds = ds.assign_coords(source=[new_source])
-        ds = ds.chunk({"time": 5, "lat": 50, "lon": 50})
-        print(ds.chunks)
         list_ds.append(ds)
 
     # convert calendars

From 34739f686c29dd5d558ba9563b1faee43ba1f2c7 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 30 Jan 2024 14:41:39 -0500
Subject: [PATCH 04/37] drop_vars

---
 xscen/ensembles.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 5a861bf9..79c06230 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -745,10 +745,9 @@ def build_partition_data(
     for ds in datasets:
         if subset_kw:
             ds = subset(ds, **subset_kw)
-            # clean coords that might not match exactly
-            for c in ["rlat", "rlon", "lat", "lon", "rotated_pole"]:
-                if c in ds.coords:
-                    ds = ds.drop_vars(c)
+            ds = ds.drop_vars(
+                ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
+            )
 
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)

From c2c648e7a5ed305c6b71d29dbe58cba247eb1a22 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 21 Feb 2024 16:04:31 -0500
Subject: [PATCH 05/37] add subcat possibility to avoid merge

---
 xscen/ensembles.py | 130 ++++++++++++++++++++++++++++++---------------
 1 file changed, 87 insertions(+), 43 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 79c06230..4fbb61e1 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -683,6 +683,7 @@ def build_partition_data(
     indicators_kw: dict = None,
     calendar_kw: dict = None,
     rename_dict: dict = None,
+    to_dataset_kw: dict = None,
 ):
     """Get the input for the xclim partition functions.
 
@@ -740,52 +741,95 @@ def build_partition_data(
     regrid_kw = regrid_kw or {}
     calendar_kw = calendar_kw or {}
 
-    list_ds = []
-    calendars = []
-    for ds in datasets:
-        if subset_kw:
-            ds = subset(ds, **subset_kw)
-            ds = ds.drop_vars(
-                ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
-            )
+    if isinstance(datasets, list):
+        list_ds = []
+        calendars = []
+        merged = False
+        for ds in datasets:
+            if subset_kw:
+                ds = subset(ds, **subset_kw)
+                ds = ds.drop_vars(
+                    ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
+                )
 
-        if regrid_kw:
-            ds = regrid_dataset(ds, **regrid_kw)
+            if regrid_kw:
+                ds = regrid_dataset(ds, **regrid_kw)
 
-        if indicators_kw:
-            dict_ind = compute_indicators(ds, **indicators_kw)
-            if len(dict_ind) > 1:
-                raise ValueError(
-                    f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
-                )
+            if indicators_kw:
+                dict_ind = compute_indicators(ds, **indicators_kw)
+                if len(dict_ind) > 1:
+                    raise ValueError(
+                        f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
+                    )
+                else:
+                    ds = list(dict_ind.values())[0]
+
+            # get calendar of each dataset
+            if calendar_kw is None:
+                if "time" in ds.coords:
+                    time = xr.decode_cf(ds).time
+                    ds["time"] = time
+                    calendars.append(xc.core.calendar.get_calendar(time))
+
+            for dim in partition_dim:
+                if f"cat:{dim}" in ds.attrs:
+                    ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
+
+            if "source" in partition_dim:
+                new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
+                ds = ds.assign_coords(source=[new_source])
+            list_ds.append(ds)
+            if not merged:
+                merged = ds
             else:
-                ds = list(dict_ind.values())[0]
-
-        # get calendar of each dataset
-        if calendar_kw is None:
-            if "time" in ds.coords:
-                time = xr.decode_cf(ds).time
-                ds["time"] = time
-                calendars.append(xc.core.calendar.get_calendar(time))
-
-        for dim in partition_dim:
-            if f"cat:{dim}" in ds.attrs:
-                ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
-
-        if "source" in partition_dim:
-            new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
-            ds = ds.assign_coords(source=[new_source])
-        list_ds.append(ds)
-
-    # convert calendars
-    if isinstance(calendar_kw, dict):
-        common_cal = xc.core.calendar.common_calendar(calendars, join="outer")
-        calendar_kw.setdefault("target", common_cal)
-        calendar_kw.setdefault("align_on", "date")
-        list_ds = [
-            xc.core.calendar.convert_calendar(ds, **calendar_kw) for ds in list_ds
-        ]
-    ens = xr.merge(list_ds)
+                merged = xr.merge([merged, ds])
+        ens = merged
+
+    # elif isinstance(datasets, xscen.DataCatalog):
+    #     # special case to handle source (create one dimension with institution_source_member)
+    #     ensemble_on_list = None
+    #     if "source" in partition_dim:
+    #         partition_dim.remove("source")
+    #         ensemble_on_list = ["institution", "source", "member"]
+    #
+    #     subcat = datasets
+    #
+    #     # create a dataset for each bias_adjust_project, modify grid and concat them
+    #     dim_with_different_grid = (
+    #         "bias_adjust_project"
+    #         if "bias_adjust_project" in partition_dim
+    #         else "source"
+    #     )
+    #     list_ds = []
+    #     for d in subcat.df[dim_with_different_grid].unique():
+    #         ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
+    #             concat_on=partition_dim,
+    #             create_ensemble_on=ensemble_on_list,
+    #             **to_dataset_kw,
+    #         )
+    #         if "realization" in ds:
+    #             ds = ds.rename({"realization": "source"})
+    #         if subset_kw:
+    #             ds = subset(ds, **subset_kw)
+    #         if regrid_kw:
+    #             ds = regrid_dataset(ds, **regrid_kw)
+    #         list_ds.append(ds)
+    #     ens = xr.concat(list_ds, dim=dim_with_different_grid)
+
+    else:
+        raise ValueError(
+            "datasets should be a list or a dictionary of xarray datasets or a xscen.DataCatalog"
+        )
+
+    # # convert calendars
+    # if isinstance(calendar_kw, dict):
+    #     common_cal = xc.core.calendar.common_calendar(calendars, join="outer")
+    #     calendar_kw.setdefault("target", common_cal)
+    #     calendar_kw.setdefault("align_on", "date")
+    #     list_ds = [
+    #         xc.core.calendar.convert_calendar(ds, **calendar_kw) for ds in list_ds
+    #     ]
+    # ens = xr.merge(list_ds)
 
     rename_dict = rename_dict or {}
     rename_dict.setdefault("source", "model")

From 4519048f831a33e4fde5ce336aa7a24d186f3cd4 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 21 Feb 2024 17:09:10 -0500
Subject: [PATCH 06/37] real

---
 xscen/ensembles.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 4fbb61e1..d5735369 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -777,7 +777,7 @@ def build_partition_data(
 
             if "source" in partition_dim:
                 new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
-                ds = ds.assign_coords(source=[new_source])
+                ds = ds.assign_coords(realization=[new_source])
             list_ds.append(ds)
             if not merged:
                 merged = ds
@@ -832,7 +832,7 @@ def build_partition_data(
     # ens = xr.merge(list_ds)
 
     rename_dict = rename_dict or {}
-    rename_dict.setdefault("source", "model")
+    rename_dict.setdefault("realization", "model")
     rename_dict.setdefault("experiment", "scenario")
     rename_dict.setdefault("bias_adjust_project", "downscaling")
     rename_dict = {k: v for k, v in rename_dict.items() if k in ens.dims}

From 13fa1978cafe5dbb69810253d80601c11b12478a Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 27 Feb 2024 15:35:07 -0500
Subject: [PATCH 07/37] add datacatalog option

---
 xscen/ensembles.py | 114 ++++++++++++++++++++++-----------------------
 1 file changed, 57 insertions(+), 57 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index d5735369..ae4df7cd 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -11,11 +11,10 @@
 
 import numpy as np
 import xarray as xr
-import xclim as xc
 from xclim import ensembles
 
+from .catalog import DataCatalog
 from .config import parse_config
-from .indicators import compute_indicators
 from .regrid import regrid_dataset
 from .spatial import subset
 from .utils import clean_up, get_cat_attrs
@@ -698,9 +697,14 @@ def build_partition_data(
     Parameters
     ----------
     datasets : dict
-        List or dictionnary of Dataset objects that will be included in the ensemble.
+        List, dictionnary or DataCatalog of Datasets that will be included in the ensemble.
         The datasets should include the necessary ("cat:") attributes to understand their metadata.
-        Tip: With a project catalog, you can do: `datasets = pcat.search(**search_dict).to_dataset_dict()`.
+        Tip: A dictionnary can be created with `datasets = pcat.search(**search_dict).to_dataset_dict()`.
+
+        The use of a DataCatalog is recommended for large ensembles.
+        In that case, the ensembles will be loaded separately for each `bias_adjust_project`,
+        the subsetting or regridding can be applied before combining the datasets through concatenation.
+        If `bias_adjust_project` is not in `partition_dim`, `source` will be used instead.
     partition_dim: list[str]
         Components of the partition. They will become the dimension of the output.
         The default is ['source', 'experiment', 'bias_adjust_project'].
@@ -743,8 +747,7 @@ def build_partition_data(
 
     if isinstance(datasets, list):
         list_ds = []
-        calendars = []
-        merged = False
+        # calendars = []
         for ds in datasets:
             if subset_kw:
                 ds = subset(ds, **subset_kw)
@@ -755,21 +758,21 @@ def build_partition_data(
             if regrid_kw:
                 ds = regrid_dataset(ds, **regrid_kw)
 
-            if indicators_kw:
-                dict_ind = compute_indicators(ds, **indicators_kw)
-                if len(dict_ind) > 1:
-                    raise ValueError(
-                        f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
-                    )
-                else:
-                    ds = list(dict_ind.values())[0]
-
-            # get calendar of each dataset
-            if calendar_kw is None:
-                if "time" in ds.coords:
-                    time = xr.decode_cf(ds).time
-                    ds["time"] = time
-                    calendars.append(xc.core.calendar.get_calendar(time))
+            # if indicators_kw:
+            #     dict_ind = compute_indicators(ds, **indicators_kw)
+            #     if len(dict_ind) > 1:
+            #         raise ValueError(
+            #             f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
+            #         )
+            #     else:
+            #         ds = list(dict_ind.values())[0]
+
+            # # get calendar of each dataset
+            # if calendar_kw is None:
+            #     if "time" in ds.coords:
+            #         time = xr.decode_cf(ds).time
+            #         ds["time"] = time
+            #         calendars.append(xc.core.calendar.get_calendar(time))
 
             for dim in partition_dim:
                 if f"cat:{dim}" in ds.attrs:
@@ -779,42 +782,39 @@ def build_partition_data(
                 new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
                 ds = ds.assign_coords(realization=[new_source])
             list_ds.append(ds)
-            if not merged:
-                merged = ds
-            else:
-                merged = xr.merge([merged, ds])
-        ens = merged
-
-    # elif isinstance(datasets, xscen.DataCatalog):
-    #     # special case to handle source (create one dimension with institution_source_member)
-    #     ensemble_on_list = None
-    #     if "source" in partition_dim:
-    #         partition_dim.remove("source")
-    #         ensemble_on_list = ["institution", "source", "member"]
-    #
-    #     subcat = datasets
-    #
-    #     # create a dataset for each bias_adjust_project, modify grid and concat them
-    #     dim_with_different_grid = (
-    #         "bias_adjust_project"
-    #         if "bias_adjust_project" in partition_dim
-    #         else "source"
-    #     )
-    #     list_ds = []
-    #     for d in subcat.df[dim_with_different_grid].unique():
-    #         ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
-    #             concat_on=partition_dim,
-    #             create_ensemble_on=ensemble_on_list,
-    #             **to_dataset_kw,
-    #         )
-    #         if "realization" in ds:
-    #             ds = ds.rename({"realization": "source"})
-    #         if subset_kw:
-    #             ds = subset(ds, **subset_kw)
-    #         if regrid_kw:
-    #             ds = regrid_dataset(ds, **regrid_kw)
-    #         list_ds.append(ds)
-    #     ens = xr.concat(list_ds, dim=dim_with_different_grid)
+        ens = xr.merge(list_ds)
+
+    elif isinstance(datasets, DataCatalog):
+        # special case to handle source (create one dimension with institution_source_member)
+        ensemble_on_list = None
+        if "source" in partition_dim:
+            partition_dim.remove("source")
+            ensemble_on_list = ["institution", "source", "member"]
+
+        subcat = datasets
+
+        # create a dataset for each bias_adjust_project, modify grid and concat them
+        # if no bias_adjust_project, use source
+        dim_with_different_grid = (
+            "bias_adjust_project"
+            if "bias_adjust_project" in partition_dim
+            else "source"
+        )
+        list_ds = []
+        for d in subcat.df[dim_with_different_grid].unique():
+            ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
+                concat_on=partition_dim,
+                create_ensemble_on=ensemble_on_list,
+                **to_dataset_kw,
+            )
+            if "realization" in ds:
+                ds = ds.rename({"realization": "source"})
+            if subset_kw:
+                ds = subset(ds, **subset_kw)
+            if regrid_kw:
+                ds = regrid_dataset(ds, **regrid_kw)
+            list_ds.append(ds)
+        ens = xr.concat(list_ds, dim=dim_with_different_grid)
 
     else:
         raise ValueError(

From d4ffe92469a08b3e24b3e976e63adaeee087bbd1 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 6 Mar 2024 10:20:30 -0500
Subject: [PATCH 08/37] add real to part_dim

---
 xscen/ensembles.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index ae4df7cd..9cbf4c14 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -676,7 +676,7 @@ def generate_weights(  # noqa: C901
 
 def build_partition_data(
     datasets: Union[dict, list[xr.Dataset]],
-    partition_dim: list[str] = ["source", "experiment", "bias_adjust_project"],
+    partition_dim: list[str] = ["realization", "experiment", "bias_adjust_project"],
     subset_kw: dict = None,
     regrid_kw: dict = None,
     indicators_kw: dict = None,
@@ -738,11 +738,13 @@ def build_partition_data(
     xclim.ensembles
 
     """
+    # TODO: add warning if both realization and source in partition_dim
     if isinstance(datasets, dict):
         datasets = list(datasets.values())
     # initialize dict
     subset_kw = subset_kw or {}
     regrid_kw = regrid_kw or {}
+    to_dataset_kw = to_dataset_kw or {}
     calendar_kw = calendar_kw or {}
 
     if isinstance(datasets, list):
@@ -778,9 +780,9 @@ def build_partition_data(
                 if f"cat:{dim}" in ds.attrs:
                     ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
 
-            if "source" in partition_dim:
+            if "realization" in partition_dim:
                 new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
-                ds = ds.assign_coords(realization=[new_source])
+                ds = ds.expand_dims(realization=[new_source])
             list_ds.append(ds)
         ens = xr.merge(list_ds)
 
@@ -807,12 +809,11 @@ def build_partition_data(
                 create_ensemble_on=ensemble_on_list,
                 **to_dataset_kw,
             )
-            if "realization" in ds:
-                ds = ds.rename({"realization": "source"})
             if subset_kw:
                 ds = subset(ds, **subset_kw)
             if regrid_kw:
                 ds = regrid_dataset(ds, **regrid_kw)
+
             list_ds.append(ds)
         ens = xr.concat(list_ds, dim=dim_with_different_grid)
 
@@ -833,6 +834,7 @@ def build_partition_data(
 
     rename_dict = rename_dict or {}
     rename_dict.setdefault("realization", "model")
+    rename_dict.setdefault("source", "model")
     rename_dict.setdefault("experiment", "scenario")
     rename_dict.setdefault("bias_adjust_project", "downscaling")
     rename_dict = {k: v for k, v in rename_dict.items() if k in ens.dims}

From d0a097d760f6a6ef5b3da7edbaf21f5b4a3bf025 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 9 May 2024 16:51:57 -0400
Subject: [PATCH 09/37] remove moving_rearly_window

---
 xscen/catalog.py   | 5 +++--
 xscen/ensembles.py | 2 ++
 xscen/spatial.py   | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/xscen/catalog.py b/xscen/catalog.py
index d10d3fb0..e1c3b488 100644
--- a/xscen/catalog.py
+++ b/xscen/catalog.py
@@ -383,13 +383,14 @@ def check_variables(row):
             if len_df > 0:
                 self.esmcat._df["variable"] = self.df.apply(check_variables, axis=1)
 
-    def exists_in_cat(self, **columns) -> bool:
+    def exists_in_cat(self, verbose=True, **columns) -> bool:
         """
         Check if there is an entry in the catalogue corresponding to the arguments given.
 
         Parameters
         ----------
         columns: Arguments that will be given to `catalog.search`
+        verbose: Log the result of the search.
 
         Returns
         -------
@@ -397,7 +398,7 @@ def exists_in_cat(self, **columns) -> bool:
             True if there is an entry in the catalogue corresponding to the arguments given.
         """
         exists = bool(len(self.search(**columns)))
-        if exists:
+        if exists and verbose:
             logger.info(f"An entry exists for: {columns}")
         return exists
 
diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 9cbf4c14..15063bcc 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -787,6 +787,8 @@ def build_partition_data(
         ens = xr.merge(list_ds)
 
     elif isinstance(datasets, DataCatalog):
+        # TODO: add possibility of method and ref
+
         # special case to handle source (create one dimension with institution_source_member)
         ensemble_on_list = None
         if "source" in partition_dim:
diff --git a/xscen/spatial.py b/xscen/spatial.py
index 48ffe9b3..ceaf8f76 100644
--- a/xscen/spatial.py
+++ b/xscen/spatial.py
@@ -184,6 +184,7 @@ def subset(  # noqa: C901
         else:
             tile_buffer = tile_buffer or region.get("tile_buffer", 0)
         kwargs = deepcopy(region[region["method"]])
+        name = region.get("name", None)
 
     if uses_dask(ds.lon) or uses_dask(ds.lat):
         warnings.warn("Loading longitude and latitude for more efficient subsetting.")

From 337875761cacd5bb40e7297839835c8c53800af6 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Mon, 13 May 2024 09:07:22 -0400
Subject: [PATCH 10/37] fix A-DEC

---
 xscen/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xscen/utils.py b/xscen/utils.py
index 4e0adad3..35d97e2f 100644
--- a/xscen/utils.py
+++ b/xscen/utils.py
@@ -243,7 +243,7 @@ def _parse_date(date, fmts):
         quasiday = (pd.Timedelta(1, "d") - pd.Timedelta(1, "s")).as_unit(date.unit)
         if end_of_period == "Y" or "m" not in fmt:
             date = (
-                pd.tseries.frequencies.to_offset("A-DEC").rollforward(date) + quasiday
+                pd.tseries.frequencies.to_offset("YE-DEC").rollforward(date) + quasiday
             )
         elif end_of_period == "M" or "d" not in fmt:
             date = pd.tseries.frequencies.to_offset("M").rollforward(date) + quasiday

From 95f50f9c74100599e6ee8abbbf84df286073c456 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 23 May 2024 11:21:39 -0400
Subject: [PATCH 11/37] subdivise

---
 xscen/ensembles.py | 170 +++++++++++++++++++++++++++------------------
 1 file changed, 103 insertions(+), 67 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index a9490d95..1c03f75a 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -675,6 +675,106 @@ def generate_weights(  # noqa: C901
     return weights
 
 
+def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
+    list_ds = []
+    # calendars = []
+    for ds in datasets:
+        if subset_kw:
+            ds = subset(ds, **subset_kw)
+            ds = ds.drop_vars(
+                ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
+            )
+
+        if regrid_kw:
+            ds = regrid_dataset(ds, **regrid_kw)
+
+        # if indicators_kw:
+        #     dict_ind = compute_indicators(ds, **indicators_kw)
+        #     if len(dict_ind) > 1:
+        #         raise ValueError(
+        #             f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
+        #         )
+        #     else:
+        #         ds = list(dict_ind.values())[0]
+
+        # # get calendar of each dataset
+        # if calendar_kw is None:
+        #     if "time" in ds.coords:
+        #         time = xr.decode_cf(ds).time
+        #         ds["time"] = time
+        #         calendars.append(xc.core.calendar.get_calendar(time))
+
+        for dim in partition_dim:
+            if f"cat:{dim}" in ds.attrs:
+                ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
+
+        if "bias_adjust_project" in ds.dims:
+            ds = ds.assign_coords(
+                method=("bias_adjust_project", ds.attrs.get("cat:method", np.nan))
+            )
+            ds = ds.assign_coords(
+                reference=(
+                    "bias_adjust_project",
+                    ds.attrs.get("cat:reference", np.nan),
+                )
+            )
+
+        if "realization" in partition_dim:
+            new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
+            ds = ds.expand_dims(realization=[new_source])
+        list_ds.append(ds)
+    ens = xr.merge(list_ds, combine_attrs="drop_conflicts")
+    return ens
+
+
+def _partition_from_catalog(
+    datasets, partition_dim, subset_kw, regrid_kw, to_dataset_kw
+):
+    # TODO: add possibility of method and ref
+
+    # special case to handle source (create one dimension with institution_source_member)
+    ensemble_on_list = None
+    if "source" in partition_dim:
+        partition_dim.remove("source")
+        ensemble_on_list = ["institution", "source", "member"]
+
+    subcat = datasets
+
+    # create a dataset for each bias_adjust_project, modify grid and concat them
+    # if no bias_adjust_project, use source
+    dim_with_different_grid = (
+        "bias_adjust_project" if "bias_adjust_project" in partition_dim else "source"
+    )
+    list_ds = []
+    for d in subcat.df[dim_with_different_grid].unique():
+        ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
+            concat_on=partition_dim,
+            create_ensemble_on=ensemble_on_list,
+            **to_dataset_kw,
+        )
+        if subset_kw:
+            ds = subset(ds, **subset_kw)
+        if regrid_kw:
+            ds = regrid_dataset(ds, **regrid_kw)
+
+        if "bias_adjust_project" in ds.dims:
+            ds = ds.assign_coords(
+                method=("bias_adjust_project", ds.attrs.get("cat:method", np.nan))
+            )
+            ds = ds.assign_coords(
+                reference=(
+                    "bias_adjust_project",
+                    ds.attrs.get("cat:reference", np.nan),
+                )
+            )
+
+        list_ds.append(ds)
+    ens = xr.concat(
+        list_ds, dim=dim_with_different_grid, combine_attrs="drop_conflicts"
+    )
+    return ens
+
+
 def build_partition_data(
     datasets: Union[dict, list[xr.Dataset]],
     partition_dim: list[str] = ["realization", "experiment", "bias_adjust_project"],
@@ -749,76 +849,12 @@ def build_partition_data(
     calendar_kw = calendar_kw or {}
 
     if isinstance(datasets, list):
-        list_ds = []
-        # calendars = []
-        for ds in datasets:
-            if subset_kw:
-                ds = subset(ds, **subset_kw)
-                ds = ds.drop_vars(
-                    ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
-                )
-
-            if regrid_kw:
-                ds = regrid_dataset(ds, **regrid_kw)
-
-            # if indicators_kw:
-            #     dict_ind = compute_indicators(ds, **indicators_kw)
-            #     if len(dict_ind) > 1:
-            #         raise ValueError(
-            #             f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
-            #         )
-            #     else:
-            #         ds = list(dict_ind.values())[0]
-
-            # # get calendar of each dataset
-            # if calendar_kw is None:
-            #     if "time" in ds.coords:
-            #         time = xr.decode_cf(ds).time
-            #         ds["time"] = time
-            #         calendars.append(xc.core.calendar.get_calendar(time))
-
-            for dim in partition_dim:
-                if f"cat:{dim}" in ds.attrs:
-                    ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
-
-            if "realization" in partition_dim:
-                new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
-                ds = ds.expand_dims(realization=[new_source])
-            list_ds.append(ds)
-        ens = xr.merge(list_ds)
+        ens = _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw)
 
     elif isinstance(datasets, DataCatalog):
-        # TODO: add possibility of method and ref
-
-        # special case to handle source (create one dimension with institution_source_member)
-        ensemble_on_list = None
-        if "source" in partition_dim:
-            partition_dim.remove("source")
-            ensemble_on_list = ["institution", "source", "member"]
-
-        subcat = datasets
-
-        # create a dataset for each bias_adjust_project, modify grid and concat them
-        # if no bias_adjust_project, use source
-        dim_with_different_grid = (
-            "bias_adjust_project"
-            if "bias_adjust_project" in partition_dim
-            else "source"
+        ens = _partition_from_catalog(
+            datasets, partition_dim, subset_kw, regrid_kw, to_dataset_kw
         )
-        list_ds = []
-        for d in subcat.df[dim_with_different_grid].unique():
-            ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
-                concat_on=partition_dim,
-                create_ensemble_on=ensemble_on_list,
-                **to_dataset_kw,
-            )
-            if subset_kw:
-                ds = subset(ds, **subset_kw)
-            if regrid_kw:
-                ds = regrid_dataset(ds, **regrid_kw)
-
-            list_ds.append(ds)
-        ens = xr.concat(list_ds, dim=dim_with_different_grid)
 
     else:
         raise ValueError(

From 650803e4c4c878c0d45ddf1850e3a62db9ad6a14 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 23 May 2024 12:03:34 -0400
Subject: [PATCH 12/37] common_attrs

---
 xscen/ensembles.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 1c03f75a..93cda8c1 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -678,6 +678,8 @@ def generate_weights(  # noqa: C901
 def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
     list_ds = []
     # calendars = []
+    # only keep attrs common to all datasets
+    common_attrs = False
     for ds in datasets:
         if subset_kw:
             ds = subset(ds, **subset_kw)
@@ -710,20 +712,25 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
 
         if "bias_adjust_project" in ds.dims:
             ds = ds.assign_coords(
-                method=("bias_adjust_project", ds.attrs.get("cat:method", np.nan))
+                method=("bias_adjust_project", [ds.attrs.get("cat:method", np.nan)])
             )
             ds = ds.assign_coords(
                 reference=(
                     "bias_adjust_project",
-                    ds.attrs.get("cat:reference", np.nan),
+                    [ds.attrs.get("cat:reference", np.nan)],
                 )
             )
 
         if "realization" in partition_dim:
             new_source = f"{ds.attrs['cat:institution']}_{ds.attrs['cat:source']}_{ds.attrs['cat:member']}"
             ds = ds.expand_dims(realization=[new_source])
+
+        a = ds.attrs
+        a.pop("intake_esm_vars", None)  # remove list for intersection to work
+        common_attrs = dict(common_attrs.items() & a.items()) if common_attrs else a
         list_ds.append(ds)
-    ens = xr.merge(list_ds, combine_attrs="drop_conflicts")
+    ens = xr.merge(list_ds)
+    ens.attrs = common_attrs
     return ens
 
 
@@ -746,6 +753,7 @@ def _partition_from_catalog(
         "bias_adjust_project" if "bias_adjust_project" in partition_dim else "source"
     )
     list_ds = []
+    common_attrs = False
     for d in subcat.df[dim_with_different_grid].unique():
         ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
             concat_on=partition_dim,
@@ -759,19 +767,20 @@ def _partition_from_catalog(
 
         if "bias_adjust_project" in ds.dims:
             ds = ds.assign_coords(
-                method=("bias_adjust_project", ds.attrs.get("cat:method", np.nan))
+                method=("bias_adjust_project", [ds.attrs.get("cat:method", np.nan)])
             )
             ds = ds.assign_coords(
                 reference=(
                     "bias_adjust_project",
-                    ds.attrs.get("cat:reference", np.nan),
+                    [ds.attrs.get("cat:reference", np.nan)],
                 )
             )
-
+        a = ds.attrs
+        a.pop("intake_esm_vars", None)  # remove list for intersection to work
+        common_attrs = dict(common_attrs.items() & a.items()) if common_attrs else a
         list_ds.append(ds)
-    ens = xr.concat(
-        list_ds, dim=dim_with_different_grid, combine_attrs="drop_conflicts"
-    )
+    ens = xr.concat(list_ds, dim=dim_with_different_grid)
+    ens.attrs = common_attrs
     return ens
 
 
From b8d66d0045ce46da946b3744e4ade1184af7ccad Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 23 May 2024 14:42:13 -0400
Subject: [PATCH 13/37] add ref and method to cat

---
 xscen/ensembles.py | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 93cda8c1..e89e1094 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -14,6 +14,7 @@
 from xclim import ensembles
 
 from .catalog import DataCatalog
+from .catutils import generate_id
 from .config import parse_config
 from .regrid import regrid_dataset
 from .spatial import subset
@@ -737,7 +738,6 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
 def _partition_from_catalog(
     datasets, partition_dim, subset_kw, regrid_kw, to_dataset_kw
 ):
-    # TODO: add possibility of method and ref
 
     # special case to handle source (create one dimension with institution_source_member)
     ensemble_on_list = None
@@ -748,18 +748,46 @@ def _partition_from_catalog(
     subcat = datasets
 
     # create a dataset for each bias_adjust_project, modify grid and concat them
-    # if no bias_adjust_project, use source
-    dim_with_different_grid = (
-        "bias_adjust_project" if "bias_adjust_project" in partition_dim else "source"
-    )
+    # choose with dim that exists in partition_dim and is the first in the order of preference
+    order_of_preference = ["reference", "bias_adjust_project", "source"]
+    dim_with_different_grid = list(set(partition_dim) & set(order_of_preference))[0]
+    # dim_with_different_grid = (
+    #     "bias_adjust_project" if "bias_adjust_project" in partition_dim else "source"
+    # )
+
+    # trick for method
+    if "method" in partition_dim:
+        # replace id with bias_adjust_project with method and ref.
+        datasets.df["id"] = generate_id(
+            datasets.df,
+            [
+                "method",
+                "reference",
+                "mip_era",
+                "activity",
+                "driving_model",
+                "institution",
+                "source",
+                "experiment",
+                "member",
+                "domain",
+            ],
+        )
+
+    # get attrs that are common to all datasets
+    common_attrs = {}
+    for col, series in subcat.df.items():
+        if (series[0] == series).all():
+            common_attrs[f"cat:{col}"] = series[0]
+
     list_ds = []
-    common_attrs = False
     for d in subcat.df[dim_with_different_grid].unique():
         ds = subcat.search(**{dim_with_different_grid: d}).to_dataset(
             concat_on=partition_dim,
             create_ensemble_on=ensemble_on_list,
             **to_dataset_kw,
         )
+
         if subset_kw:
             ds = subset(ds, **subset_kw)
         if regrid_kw:
@@ -775,9 +803,6 @@ def _partition_from_catalog(
                     [ds.attrs.get("cat:reference", np.nan)],
                 )
             )
-        a = ds.attrs
-        a.pop("intake_esm_vars", None)  # remove list for intersection to work
-        common_attrs = dict(common_attrs.items() & a.items()) if common_attrs else a
         list_ds.append(ds)
     ens = xr.concat(list_ds, dim=dim_with_different_grid)
     ens.attrs = common_attrs

From 2a4198491da964024b8521080bef4abb8b1fd0ad Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 23 May 2024 16:18:22 -0400
Subject: [PATCH 14/37] index

---
 xscen/ensembles.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index e89e1094..1d0d9d6e 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -747,22 +747,20 @@ def _partition_from_catalog(
 
     subcat = datasets
 
-    # create a dataset for each bias_adjust_project, modify grid and concat them
-    # choose with dim that exists in partition_dim and is the first in the order of preference
-    order_of_preference = ["reference", "bias_adjust_project", "source"]
-    dim_with_different_grid = list(set(partition_dim) & set(order_of_preference))[0]
-    # dim_with_different_grid = (
-    #     "bias_adjust_project" if "bias_adjust_project" in partition_dim else "source"
-    # )
+    # get attrs that are common to all datasets
+    common_attrs = {}
+    for col, series in subcat.df.items():
+        if (series[0] == series).all():
+            common_attrs[f"cat:{col}"] = series[0]
 
-    # trick for method
+    # trick when using method/ref, instead of bias_adjust_project
     if "method" in partition_dim:
         # replace id with bias_adjust_project with method and ref.
         datasets.df["id"] = generate_id(
             datasets.df,
             [
-                "method",
-                "reference",
+                "method",  # instead of bias_adjust_project
+                "reference",  # instead of bias_adjust_project
                 "mip_era",
                 "activity",
                 "driving_model",
@@ -774,11 +772,10 @@ def _partition_from_catalog(
             ],
         )
 
-    # get attrs that are common to all datasets
-    common_attrs = {}
-    for col, series in subcat.df.items():
-        if (series[0] == series).all():
-            common_attrs[f"cat:{col}"] = series[0]
+    # create a dataset for each bias_adjust_project, modify grid and concat them
+    # choose with dim that exists in partition_dim and is the first in the order of preference
+    order_of_preference = ["reference", "bias_adjust_project", "source"]
+    dim_with_different_grid = list(set(partition_dim) & set(order_of_preference))[0]
 
     list_ds = []
     for d in subcat.df[dim_with_different_grid].unique():
@@ -793,6 +790,7 @@ def _partition_from_catalog(
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
 
+        # add coords method and reference
         if "bias_adjust_project" in ds.dims:
             ds = ds.assign_coords(
                 method=("bias_adjust_project", [ds.attrs.get("cat:method", np.nan)])
@@ -913,6 +911,9 @@ def build_partition_data(
     rename_dict = {k: v for k, v in rename_dict.items() if k in ens.dims}
     ens = ens.rename(rename_dict)
 
+    ens.attrs["cat:processing_level"] = "partition_ensemble"
+    ens.attrs["cat:id"] = generate_id(ens)[0]
+
     return ens
 
 
From f667026bd3f241d447b3b348415d7017ad7004d2 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Fri, 24 May 2024 10:25:47 -0400
Subject: [PATCH 15/37] real in from cat

---
 xscen/ensembles.py | 54 ++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 1d0d9d6e..a41e53a5 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -738,11 +738,22 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
 def _partition_from_catalog(
     datasets, partition_dim, subset_kw, regrid_kw, to_dataset_kw
 ):
+    if ("method" in partition_dim or "reference" in partition_dim) and (
+        "bias_adjust_project" in partition_dim
+    ):
+        raise ValueError(
+            "The partition_dim can have either method and reference or bias_adjust_project, not both."
+        )
+
+    if ("realization" in partition_dim) and ("source" in partition_dim):
+        raise ValueError(
+            "The partition_dim can have either realization or source, not both."
+        )
 
     # special case to handle source (create one dimension with institution_source_member)
     ensemble_on_list = None
-    if "source" in partition_dim:
-        partition_dim.remove("source")
+    if "realization" in partition_dim:
+        partition_dim.remove("realization")
         ensemble_on_list = ["institution", "source", "member"]
 
     subcat = datasets
@@ -753,24 +764,25 @@ def _partition_from_catalog(
         if (series[0] == series).all():
             common_attrs[f"cat:{col}"] = series[0]
 
-    # trick when using method/ref, instead of bias_adjust_project
-    if "method" in partition_dim:
-        # replace id with bias_adjust_project with method and ref.
-        datasets.df["id"] = generate_id(
-            datasets.df,
-            [
-                "method",  # instead of bias_adjust_project
-                "reference",  # instead of bias_adjust_project
-                "mip_era",
-                "activity",
-                "driving_model",
-                "institution",
-                "source",
-                "experiment",
-                "member",
-                "domain",
-            ],
-        )
+    col_id = [
+        (
+            "method" if "method" in partition_dim else None
+        ),  # instead of bias_adjust_project
+        (
+            "reference" if "reference" in partition_dim else None
+        ),  # instead of bias_adjust_project
+        "bias_adjust_project" if "bias_adjust_project" in partition_dim else None,
+        "mip_era",
+        "activity",
+        "driving_model",
+        "institution" if "realization" in partition_dim else None,
+        "source",
+        "experiment",
+        "member" if "realization" in partition_dim else None,
+        "domain",
+    ]
+
+    datasets.df["id"] = generate_id(datasets.df, col_id)
 
     # create a dataset for each bias_adjust_project, modify grid and concat them
     # choose with dim that exists in partition_dim and is the first in the order of preference
@@ -911,7 +923,7 @@ def build_partition_data(
     rename_dict = {k: v for k, v in rename_dict.items() if k in ens.dims}
     ens = ens.rename(rename_dict)
 
-    ens.attrs["cat:processing_level"] = "partition_ensemble"
+    ens.attrs["cat:processing_level"] = "partition-ensemble"
     ens.attrs["cat:id"] = generate_id(ens)[0]
 
     return ens

From 8414c10a90692f628281d810c5a3bf1efd34309f Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Fri, 7 Jun 2024 15:08:25 -0400
Subject: [PATCH 16/37] to level

---
 xscen/ensembles.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index a41e53a5..7e601c13 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -828,6 +828,7 @@ def build_partition_data(
     calendar_kw: dict = None,
     rename_dict: dict = None,
     to_dataset_kw: dict = None,
+    to_level: dict = "partition-ensemble",
 ):
     """Get the input for the xclim partition functions.
 
@@ -872,6 +873,8 @@ def build_partition_data(
     rename_dict:
         Dictionary to rename the dimensions from xscen names to xclim names.
         The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}.
+    to_level: str
+        The processing level of the output dataset. Default is 'partition-ensemble'.
 
     Returns
     -------
@@ -923,7 +926,7 @@ def build_partition_data(
     rename_dict = {k: v for k, v in rename_dict.items() if k in ens.dims}
     ens = ens.rename(rename_dict)
 
-    ens.attrs["cat:processing_level"] = "partition-ensemble"
+    ens.attrs["cat:processing_level"] = to_level
     ens.attrs["cat:id"] = generate_id(ens)[0]
 
     return ens

From e27ebb73138c456b6cd10f95ab0715e9ac33ae1c Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Fri, 7 Jun 2024 16:07:52 -0400
Subject: [PATCH 17/37] to level type

---
 xscen/ensembles.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 7e601c13..2dfdbdc1 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -828,7 +828,7 @@ def build_partition_data(
     calendar_kw: dict = None,
     rename_dict: dict = None,
     to_dataset_kw: dict = None,
-    to_level: dict = "partition-ensemble",
+    to_level: str = "partition-ensemble",
 ):
     """Get the input for the xclim partition functions.
 

From d9d05353f61d0d58651f33fd7384ff8e1a33aefa Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Mon, 10 Jun 2024 10:34:07 -0400
Subject: [PATCH 18/37] adjustment instead

---
 xscen/ensembles.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index 2dfdbdc1..f5045607 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -713,7 +713,10 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
 
         if "bias_adjust_project" in ds.dims:
             ds = ds.assign_coords(
-                method=("bias_adjust_project", [ds.attrs.get("cat:method", np.nan)])
+                adjustment=(
+                    "bias_adjust_project",
+                    [ds.attrs.get("cat:adjustment", np.nan)],
+                )
             )
             ds = ds.assign_coords(
                 reference=(
@@ -738,11 +741,12 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
 def _partition_from_catalog(
     datasets, partition_dim, subset_kw, regrid_kw, to_dataset_kw
 ):
-    if ("method" in partition_dim or "reference" in partition_dim) and (
+
+    if ("adjustment" in partition_dim or "reference" in partition_dim) and (
         "bias_adjust_project" in partition_dim
     ):
         raise ValueError(
-            "The partition_dim can have either method and reference or bias_adjust_project, not both."
+            "The partition_dim can have either adjustment and reference or bias_adjust_project, not both."
         )
 
     if ("realization" in partition_dim) and ("source" in partition_dim):
@@ -766,8 +770,8 @@ def _partition_from_catalog(
 
     col_id = [
         (
-            "method" if "method" in partition_dim else None
-        ),  # instead of bias_adjust_project
+            "adjustment" if "adjustment" in partition_dim else None
+        ),  # instead of bias_adjust_project, need to use adjustment, not method bc .sel
         (
             "reference" if "reference" in partition_dim else None
         ),  # instead of bias_adjust_project
@@ -802,11 +806,14 @@ def _partition_from_catalog(
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
 
-        # add coords method and reference
+        # add coords adjustment and reference
         if "bias_adjust_project" in ds.dims:
             ds = ds.assign_coords(
-                method=("bias_adjust_project", [ds.attrs.get("cat:method", np.nan)])
-            )
+                adjustment=(
+                    "bias_adjust_project",
+                    [ds.attrs.get("cat:adjustment", np.nan)],
+                )
+            )  # need to use adjustment, not method bc .sel
             ds = ds.assign_coords(
                 reference=(
                     "bias_adjust_project",

From 79dd1b0f340f0d239a24ff9379c19719a2aa8c26 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Mon, 10 Jun 2024 17:03:39 -0400
Subject: [PATCH 19/37] subcat

---
 xscen/ensembles.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xscen/ensembles.py b/xscen/ensembles.py
index f5045607..fa9fe392 100644
--- a/xscen/ensembles.py
+++ b/xscen/ensembles.py
@@ -786,10 +786,10 @@ def _partition_from_catalog(
         "domain",
     ]
 
-    datasets.df["id"] = generate_id(datasets.df, col_id)
+    subcat.df["id"] = generate_id(subcat.df, col_id)
 
     # create a dataset for each bias_adjust_project, modify grid and concat them
-    # choose with dim that exists in partition_dim and is the first in the order of preference
+    # choose dim that exists in partition_dim and first in the order of preference
     order_of_preference = ["reference", "bias_adjust_project", "source"]
     dim_with_different_grid = list(set(partition_dim) & set(order_of_preference))[0]
 

From 0f53e79227a5a4eee1094fd8543c122a68e19d34 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Mon, 6 Jan 2025 16:36:26 -0500
Subject: [PATCH 20/37] cleanup

---
 src/xscen/ensembles.py | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/src/xscen/ensembles.py b/src/xscen/ensembles.py
index b1897c9b..2bf7e7ff 100644
--- a/src/xscen/ensembles.py
+++ b/src/xscen/ensembles.py
@@ -671,22 +671,6 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
 
-        # if indicators_kw:
-        #     dict_ind = compute_indicators(ds, **indicators_kw)
-        #     if len(dict_ind) > 1:
-        #         raise ValueError(
-        #             f"The indicators computation should return only indicators of the same frequency.Returned frequencies: {dict_ind.keys()}"
-        #         )
-        #     else:
-        #         ds = list(dict_ind.values())[0]
-
-        # # get calendar of each dataset
-        # if calendar_kw is None:
-        #     if "time" in ds.coords:
-        #         time = xr.decode_cf(ds).time
-        #         ds["time"] = time
-        #         calendars.append(xc.core.calendar.get_calendar(time))
-
         for dim in partition_dim:
             if f"cat:{dim}" in ds.attrs:
                 ds = ds.expand_dims(**{dim: [ds.attrs[f"cat:{dim}"]]})
@@ -811,8 +795,6 @@ def build_partition_data(
     partition_dim: list[str] = ["realization", "experiment", "bias_adjust_project"],
     subset_kw: dict | None = None,
     regrid_kw: dict | None = None,
-    indicators_kw: dict | None = None,
-    calendar_kw: dict | None = None,
     rename_dict: dict | None = None,
     to_dataset_kw: dict | None = None,
     to_level: str = "partition-ensemble",
@@ -847,17 +829,6 @@ def build_partition_data(
         Arguments to pass to `xs.spatial.subset()`.
     regrid_kw : dict, optional
         Arguments to pass to `xs.regrid_dataset()`.
-    indicators_kw : dict, optional
-        Arguments to pass to `xs.indicators.compute_indicators()`.
-        All indicators have to be for the same frequency, in order to be put on a single time axis.
-    calendar_kw : dict, optional
-        Arguments to pass to `xclim.core.calendar.convert_calendar`.
-        If None, the smallest common calendar is chosen.
-        For example, a mixed input of “noleap” and “360_day” will default to “noleap”.
-        ‘default’ is the standard calendar using np.datetime64 objects (xarray’s “standard” with use_cftime=False).
-        This is the same behavior as `calendar` in xclim.create_ensemble.
-        For conversions involving '360_day', the align_on='date' option is used by default.
-        If False, no conversion is done.
     rename_dict : dict, optional
         Dictionary to rename the dimensions from xscen names to xclim names.
         The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}.
@@ -880,7 +851,6 @@ def build_partition_data(
     subset_kw = subset_kw or {}
     regrid_kw = regrid_kw or {}
     to_dataset_kw = to_dataset_kw or {}
-    calendar_kw = calendar_kw or {}
 
     if isinstance(datasets, list):
         ens = _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw)

From 7894515b6b9f3fd7b921d76114d7740cd91f5160 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 11:44:10 -0500
Subject: [PATCH 21/37] add tests

---
 CHANGELOG.rst           |  3 ++-
 src/xscen/ensembles.py  | 16 +++++++-----
 tests/test_ensembles.py | 55 ++++++++++++++++++++++++++++++-----------
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 26043b0c..7e6946c8 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,11 +4,12 @@ Changelog
 
 v0.11.0 (unreleased)
 --------------------
-Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`).
+Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user:`juliettelavoie`).
 
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 * ``xs.io.make_toc`` now includes the global attributes of the dataset after the information about the variables. (:pull:`473`).
+* Improve ``xs.ensemles.build_partition_data``. (:pull:``).
 
 Bug fixes
 ^^^^^^^^^
diff --git a/src/xscen/ensembles.py b/src/xscen/ensembles.py
index 2bf7e7ff..36f7e92f 100644
--- a/src/xscen/ensembles.py
+++ b/src/xscen/ensembles.py
@@ -767,6 +767,9 @@ def _partition_from_catalog(
 
         if subset_kw:
             ds = subset(ds, **subset_kw)
+            ds = ds.drop_vars(
+                ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
+            )
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
 
@@ -806,14 +809,12 @@ def build_partition_data(
     `partition_dim` dimensions (and time) to pass to one of the xclim partition functions
     (https://xclim.readthedocs.io/en/stable/api.html#uncertainty-partitioning).
     If the inputs have different grids,
-    they have to be subsetted and regridded to a common grid/point.
-    Indicators can also be computed and calendar converted before combining the datasets.
-
+    they have to be subsetted and/or regridded to a common grid/point.
 
     Parameters
     ----------
-    datasets : dict
-        List, dictionnary or DataCatalog of Datasets that will be included in the ensemble.
+    datasets : list, dict, DataCatalog
+        List or dictionnary of Datasets or DataCatalog that will be included in the ensemble.
         The datasets should include the necessary ("cat:") attributes to understand their metadata.
         Tip: A dictionnary can be created with `datasets = pcat.search(**search_dict).to_dataset_dict()`.
 
@@ -829,9 +830,13 @@ def build_partition_data(
         Arguments to pass to `xs.spatial.subset()`.
     regrid_kw : dict, optional
         Arguments to pass to `xs.regrid_dataset()`.
+        Note thet regriding is computationnaly expensive. For large datasets,
+        it might be worth it to do do regridding first, outside of this function.
     rename_dict : dict, optional
         Dictionary to rename the dimensions from xscen names to xclim names.
         The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}.
+    to_dataset_kw : dict, optional
+        Arguments to pass to `xscen.DataCatalog.to_dataset()` if datasets is a DataCatalog.
     to_level: str
         The processing level of the output dataset. Default is 'partition-ensemble'.
 
@@ -844,7 +849,6 @@ def build_partition_data(
     --------
     xclim.ensembles
     """
-    # TODO: add warning if both realization and source in partition_dim
     if isinstance(datasets, dict):
         datasets = list(datasets.values())
     # initialize dict
diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py
index 6aa596e2..b7c33541 100644
--- a/tests/test_ensembles.py
+++ b/tests/test_ensembles.py
@@ -1071,21 +1071,18 @@ class TestEnsemblePartition:
     @pytest.mark.skipif(xe is None, reason="xesmf needed for testing regrdding")
     def test_build_partition_data(self, samplecat, tmp_path):
         # test subset
-        datasets = samplecat.search(variable="tas").to_dataset_dict(
+        datasets = samplecat.search(variable="tas", member="r1i1p1f1").to_dataset_dict(
             xarray_open_kwargs={"engine": "h5netcdf"}
         )
         ds = xs.ensembles.build_partition_data(
             datasets=datasets,
             partition_dim=["source", "experiment"],
             subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
-            indicators_kw=dict(indicators=[xc.atmos.tg_mean]),
             rename_dict={"source": "new-name"},
         )
 
-        assert ds.dims == {"time": 2, "scenario": 4, "new-name": 2}
-        assert ds.lat.values == 45.0
-        assert ds.lon.values == -74
-        assert [i for i in ds.data_vars] == ["tg_mean"]
+        assert ds.dims == {"time": 730, "scenario": 4, "new-name": 1}
+        assert ds.attrs["cat:processing_level"] == "partition-ensemble"
 
         # test regrid
         ds_grid = xe.util.cf_grid_2d(-75, -74, 0.25, 45, 48, 0.55)
@@ -1095,6 +1092,7 @@ def test_build_partition_data(self, samplecat, tmp_path):
         ds = xs.ensembles.build_partition_data(
             datasets=datasets,
             regrid_kw=dict(ds_grid=ds_grid, weights_location=tmp_path),
+            to_level="test",
         )
 
         assert ds.dims == {
@@ -1105,16 +1103,43 @@ def test_build_partition_data(self, samplecat, tmp_path):
             "lon": 4,
         }
         assert [i for i in ds.data_vars] == ["tas"]
+        assert ds.attrs["cat:processing_level"] == "test"
 
-        # test error
-        with pytest.raises(
-            ValueError,
-        ):
-            ds = xs.ensembles.build_partition_data(
-                datasets=datasets,
-                subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
-                indicators_kw=dict(indicators=[xc.atmos.tg_mean, xc.indicators.cf.tg]),
-            )
+    def test_partition_from_catalog(self, samplecat):
+        datasets = samplecat.search(variable="tas", member="r1i1p1f1")
+        ds_from_dict = xs.ensembles.build_partition_data(
+            datasets=datasets.to_dataset_dict(
+                xarray_open_kwargs={"engine": "h5netcdf"}
+            ),
+            partition_dim=["source", "experiment"],
+            subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
+        )
+
+        ds_from_cat = xs.ensembles.build_partition_data(
+            datasets=datasets,
+            partition_dim=["source", "experiment"],
+            subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
+            to_dataset_kw=dict(xarray_open_kwargs={"engine": "h5netcdf"}),
+        )
+        # fix order
+        ds_from_cat = ds_from_cat[["time", "model", "scenario", "tas"]]
+        ds_from_cat["tas"] = ds_from_cat["tas"].transpose("scenario", "model", "time")
+
+        assert ds_from_dict.equals(ds_from_cat)
+
+    def test_realization_partition(self, samplecat):
+
+        datasets = samplecat.search(variable="tas").to_dataset_dict(
+            xarray_open_kwargs={"engine": "h5netcdf"}
+        )
+        ds = xs.ensembles.build_partition_data(
+            datasets=datasets,
+            partition_dim=["realization", "experiment"],
+            subset_kw=dict(name="mtl", method="gridpoint", lat=[45.0], lon=[-74]),
+        )
+
+        assert "NCC_NorESM2-MM_r1i1p1f1" in ds.model.values
+        assert ds.dims == {"time": 730, "scenario": 4, "model": 2}
 
 
 class TestReduceEnsemble:

From 61eb239595f745d2d67542746535578d22268abb Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 14:09:18 -0500
Subject: [PATCH 22/37] pr num

---
 CHANGELOG.rst          | 2 +-
 src/xscen/ensembles.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 7e6946c8..ad56c54f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,7 +9,7 @@ Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliet
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 * ``xs.io.make_toc`` now includes the global attributes of the dataset after the information about the variables. (:pull:`473`).
-* Improve ``xs.ensemles.build_partition_data``. (:pull:``).
+* Improve ``xs.ensemles.build_partition_data``. (:pull:`504`).
 
 Bug fixes
 ^^^^^^^^^
diff --git a/src/xscen/ensembles.py b/src/xscen/ensembles.py
index 36f7e92f..a5d1f057 100644
--- a/src/xscen/ensembles.py
+++ b/src/xscen/ensembles.py
@@ -658,7 +658,6 @@ def generate_weights(  # noqa: C901
 
 def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
     list_ds = []
-    # calendars = []
     # only keep attrs common to all datasets
     common_attrs = False
     for ds in datasets:

From 9ecc8c90dad1f942b7757f707917983c50d91167 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 14:44:49 -0500
Subject: [PATCH 23/37] pin xarray

---
 environment-dev.yml | 2 +-
 environment.yml     | 2 +-
 pyproject.toml      | 2 +-
 src/xscen/io.py     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 56345653..06324bd2 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0
+  - xarray >=2023.11.0, !=2024.6.0, <2025.1.0 #FIXME: 2025.1.0 breaks rechunker with zarr
   - xclim >=0.53.2, <0.54
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
diff --git a/environment.yml b/environment.yml
index 126cb271..9b51f166 100644
--- a/environment.yml
+++ b/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0
+  - xarray >=2023.11.0, !=2024.6.0, <2025.1.0 #FIXME: 2025.1.0 breaks rechunker with zarr
   - xclim >=0.53.2, <0.54
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
diff --git a/pyproject.toml b/pyproject.toml
index a46c4883..d134504a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,7 @@ dependencies = [
   "shapely >=2.0",
   "sparse",
   "toolz",
-  "xarray >=2023.11.0, !=2024.6.0",
+  "xarray >=2023.11.0, !=2024.6.0, <2025.1.0", # FIXME: 2025.1.0 breaks rechunker with zarr
   "xclim >=0.53.2, <0.54",
   "zarr >=2.13"
 ]
diff --git a/src/xscen/io.py b/src/xscen/io.py
index ed7df22c..484e00a4 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -1053,7 +1053,7 @@ def rechunk(
         raise ValueError(
             "No chunks given. Need to give at `chunks_over_var` or `chunks_over_dim`."
         )
-
+    print(ds, chunks, worker_mem, str(path_out), str(temp_store))
     plan = _rechunk(ds, chunks, worker_mem, str(path_out), temp_store=str(temp_store))
 
     plan.execute()

From 1f5baa584e7be515e43243df819476f966ac981a Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 14:54:32 -0500
Subject: [PATCH 24/37] pin xarray

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d134504a..24df481a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,7 @@ dependencies = [
   "shapely >=2.0",
   "sparse",
   "toolz",
-  "xarray >=2023.11.0, !=2024.6.0, <2025.1.0", # FIXME: 2025.1.0 breaks rechunker with zarr
+  "xarray >=2023.11.0, !=2024.6.0, <2024.11.0", # FIXME: 2025.1.0 breaks rechunker with zarr
   "xclim >=0.53.2, <0.54",
   "zarr >=2.13"
 ]

From 10cd31b2a0f804792579ed131b846d785e65fd8b Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 15:03:45 -0500
Subject: [PATCH 25/37] pin xarray

---
 environment-dev.yml | 2 +-
 environment.yml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 06324bd2..afef0910 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0, <2025.1.0 #FIXME: 2025.1.0 breaks rechunker with zarr
+  - xarray >=2023.11.0, !=2024.6.0, <2024.11.0 #FIXME: 2025.1.0 breaks rechunker with zarr
   - xclim >=0.53.2, <0.54
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
diff --git a/environment.yml b/environment.yml
index 9b51f166..5167bbe1 100644
--- a/environment.yml
+++ b/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0, <2025.1.0 #FIXME: 2025.1.0 breaks rechunker with zarr
+  - xarray >=2023.11.0, !=2024.6.0, <2024.11.0 #FIXME: 2025.1.0 breaks rechunker with zarr
   - xclim >=0.53.2, <0.54
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13

From 7fe8abd385788acfdb05fc4b0d7b6a6b058169ab Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 15:46:33 -0500
Subject: [PATCH 26/37] fix doc

---
 docs/notebooks/4_ensembles.ipynb | 16 ++++++++++++----
 environment-dev.yml              |  2 +-
 environment.yml                  |  2 +-
 pyproject.toml                   |  2 +-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb
index fa76d1ac..0fbf8855 100644
--- a/docs/notebooks/4_ensembles.ipynb
+++ b/docs/notebooks/4_ensembles.ipynb
@@ -169,19 +169,28 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "is_executing": true
+   },
    "outputs": [],
    "source": [
     "# Get catalog\n",
     "from pathlib import Path\n",
     "\n",
+    "import xclim as xc\n",
+    "\n",
     "output_folder = Path().absolute() / \"_data\"\n",
     "cat = xs.DataCatalog(str(output_folder / \"tutorial-catalog.json\"))\n",
     "\n",
     "# create a dictionnary of datasets wanted for the partition\n",
     "input_dict = cat.search(variable=\"tas\", member=\"r1i1p1f1\").to_dataset_dict(\n",
     "    xarray_open_kwargs={\"engine\": \"h5netcdf\"}\n",
-    ")"
+    ")\n",
+    "datasets = {}\n",
+    "for k, v in input_dict.items():\n",
+    "    ds = xc.atmos.tg_mean(v.tas).to_dataset()\n",
+    "    ds.attrs = v.attrs\n",
+    "    datasets[k] = ds"
    ]
   },
   {
@@ -204,9 +213,8 @@
     "import xclim as xc\n",
     "\n",
     "ds = xs.ensembles.build_partition_data(\n",
-    "    input_dict,\n",
+    "    datasets,\n",
     "    subset_kw=dict(name=\"mtl\", method=\"gridpoint\", lat=[45.5], lon=[-73.6]),\n",
-    "    indicators_kw={\"indicators\": [xc.atmos.tg_mean]},\n",
     ")\n",
     "ds"
    ]
diff --git a/environment-dev.yml b/environment-dev.yml
index afef0910..4af3a0de 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0, <2024.11.0 #FIXME: 2025.1.0 breaks rechunker with zarr
+  - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr
   - xclim >=0.53.2, <0.54
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
diff --git a/environment.yml b/environment.yml
index 5167bbe1..f162d8a2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0, <2024.11.0 #FIXME: 2025.1.0 breaks rechunker with zarr
+  - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr
   - xclim >=0.53.2, <0.54
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
diff --git a/pyproject.toml b/pyproject.toml
index 24df481a..57cc309d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,7 @@ dependencies = [
   "shapely >=2.0",
   "sparse",
   "toolz",
-  "xarray >=2023.11.0, !=2024.6.0, <2024.11.0", # FIXME: 2025.1.0 breaks rechunker with zarr
+  "xarray >=2023.11.0, !=2024.6.0, <2024.10.0", # FIXME: 2024.10.0 breaks rechunker with zarr
   "xclim >=0.53.2, <0.54",
   "zarr >=2.13"
 ]

From c4aaf0895c0387aa7b0af7db1abe938ae95d4ae8 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 17:24:53 -0500
Subject: [PATCH 27/37] changelog

---
 CHANGELOG.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a3626eb0..483d37d6 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,7 +8,7 @@ Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliet
 
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* Improve ``xs.ensemles.build_partition_data``. (:pull:``).
+* Improve ``xs.ensemles.build_partition_data``. (:pull:`504`).
 
 Breaking changes
 ^^^^^^^^^^^^^^^^

From 98aa4ba5d65b5812b63ea4479d01b47f7286101e Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 17:48:43 -0500
Subject: [PATCH 28/37] update xclim v

---
 environment.yml                        |   2 +-
 src/xscen/data/fr/LC_MESSAGES/xscen.mo | Bin 1015 -> 1015 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index f162d8a2..c2605af5 100644
--- a/environment.yml
+++ b/environment.yml
@@ -31,7 +31,7 @@ dependencies:
   - sparse
   - toolz
   - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr
-  - xclim >=0.53.2, <0.54
+  - xclim >=0.54, <0.55
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
   # To install from source
diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.mo b/src/xscen/data/fr/LC_MESSAGES/xscen.mo
index 51b5812af3b98854aae679b7238a00236247820d..a8cfcf2504034a8fc8df704fe70f6671a7322999 100644
GIT binary patch
delta 32
ocmey){+)fpM@C*#T?0d119JsKb1Orm&HotX85zwc%Q4>s0Iy;Rz5oCK

delta 32
ocmey){+)fpM@C)~T|+}%Ln8$PODjW*&HotX85vC`%Q4>s0Iy{UzW@LL


From 0edc0aabd6073788c24c8db06a9afc32a950a09a Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Tue, 7 Jan 2025 17:52:40 -0500
Subject: [PATCH 29/37] update xclim v

---
 environment-dev.yml | 2 +-
 pyproject.toml      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 4af3a0de..a3f2c89e 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -31,7 +31,7 @@ dependencies:
   - sparse
   - toolz
   - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr
-  - xclim >=0.53.2, <0.54
+  - xclim >=0.54, <0.55
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
   # Opt
diff --git a/pyproject.toml b/pyproject.toml
index 57cc309d..d921dbf4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,7 +66,7 @@ dependencies = [
   "sparse",
   "toolz",
   "xarray >=2023.11.0, !=2024.6.0, <2024.10.0", # FIXME: 2024.10.0 breaks rechunker with zarr
-  "xclim >=0.53.2, <0.54",
+  "xclim >=0.54, <0.55",
   "zarr >=2.13"
 ]
 

From fa884c004724417119cd36727dab507b11e0546c Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 8 Jan 2025 10:28:06 -0500
Subject: [PATCH 30/37] fix docs

---
 environment-dev.yml | 3 ++-
 pyproject.toml      | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index a3f2c89e..8e234de8 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -30,7 +30,7 @@ dependencies:
   - shapely >=2.0
   - sparse
   - toolz
-  - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr
+  - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr, https://github.com/pangeo-data/rechunker/issues/154
   - xclim >=0.54, <0.55
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
   - zarr >=2.13
@@ -56,6 +56,7 @@ dependencies:
   - pandoc
   - pooch
   - pre-commit >=3.5.0
+  - pygments <2.19 #FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153
   - pytest >=8.3.2
   - pytest-cov >=5.0.0
   - pytest-xdist >=3.2.0
diff --git a/pyproject.toml b/pyproject.toml
index d921dbf4..1c9cb828 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,7 +109,8 @@ docs = [
   "sphinx-intl",
   "sphinx-mdinclude",
   "sphinx-rtd-theme >=1.0",
-  "sphinxcontrib-napoleon"
+  "sphinxcontrib-napoleon",
+  "pygments <2.19" # FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153
 ]
 extra = [
   "xesmf>=0.7, <0.8.8" # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.

From 32350ff94986d122a30d2fbfc7e5cf5df0747aad Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 8 Jan 2025 10:49:38 -0500
Subject: [PATCH 31/37] fix test

---
 tests/test_biasadjust.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_biasadjust.py b/tests/test_biasadjust.py
index b80ccdb2..d133c715 100644
--- a/tests/test_biasadjust.py
+++ b/tests/test_biasadjust.py
@@ -47,11 +47,10 @@ def test_basic_train(self, var, period):
 
     def test_preprocess(self):
 
-        dref360 = self.dref.convert_calendar("360_day", align_on="year")
-
+        dhist360 = self.dhist.convert_calendar("360_day", align_on="year")
         out = xs.train(
-            dref360,
-            self.dhist,
+            self.dref,
+            dhist360,
             var="tas",
             period=["2001", "2002"],
             adapt_freq={"thresh": "2 K"},

From 4ebc4c3a2e4fff19295634c6bb3a721d0e39a957 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 8 Jan 2025 15:27:50 -0500
Subject: [PATCH 32/37] remove test

---
 tests/test_biasadjust.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_biasadjust.py b/tests/test_biasadjust.py
index d133c715..5af316ef 100644
--- a/tests/test_biasadjust.py
+++ b/tests/test_biasadjust.py
@@ -53,14 +53,14 @@ def test_preprocess(self):
             dhist360,
             var="tas",
             period=["2001", "2002"],
-            adapt_freq={"thresh": "2 K"},
+            # adapt_freq={"thresh": "2 K"}, #FIXME: put back the test when xclim 0.55 is released, https://github.com/Ouranosinc/xclim/pull/2038/files
             jitter_over={"upper_bnd": "3 K", "thresh": "2 K"},
             jitter_under={"thresh": "2 K"},
         )
 
         assert out.attrs["train_params"] == {
             "maximal_calendar": "noleap",
-            "adapt_freq": {"thresh": "2 K"},
+            # "adapt_freq": {"thresh": "2 K"},  #FIXME: put back the test when xclim 0.55 is released, https://github.com/Ouranosinc/xclim/pull/2038/files
             "jitter_over": {"upper_bnd": "3 K", "thresh": "2 K"},
             "jitter_under": {"thresh": "2 K"},
             "var": ["tas"],

From afd6437c8aed13dc62e8b2fc5897421c929c0315 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 8 Jan 2025 15:45:01 -0500
Subject: [PATCH 33/37] try again

---
 tests/test_biasadjust.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_biasadjust.py b/tests/test_biasadjust.py
index 5af316ef..8b86f003 100644
--- a/tests/test_biasadjust.py
+++ b/tests/test_biasadjust.py
@@ -46,21 +46,23 @@ def test_basic_train(self, var, period):
         np.testing.assert_array_equal(out["scaling"], result)
 
     def test_preprocess(self):
+        # FIXME: put back the test when xclim 0.55 is released, https://github.com/Ouranosinc/xclim/pull/2038/files
+        # dhist360 = self.dhist.convert_calendar("360_day", align_on="year")
+        dhist360 = self.dhist.convert_calendar("noleap", align_on="year")
 
-        dhist360 = self.dhist.convert_calendar("360_day", align_on="year")
         out = xs.train(
             self.dref,
             dhist360,
             var="tas",
             period=["2001", "2002"],
-            # adapt_freq={"thresh": "2 K"}, #FIXME: put back the test when xclim 0.55 is released, https://github.com/Ouranosinc/xclim/pull/2038/files
+            adapt_freq={"thresh": "2 K"},
             jitter_over={"upper_bnd": "3 K", "thresh": "2 K"},
             jitter_under={"thresh": "2 K"},
         )
 
         assert out.attrs["train_params"] == {
             "maximal_calendar": "noleap",
-            # "adapt_freq": {"thresh": "2 K"},  #FIXME: put back the test when xclim 0.55 is released, https://github.com/Ouranosinc/xclim/pull/2038/files
+            "adapt_freq": {"thresh": "2 K"},
             "jitter_over": {"upper_bnd": "3 K", "thresh": "2 K"},
             "jitter_under": {"thresh": "2 K"},
             "var": ["tas"],

From e3c8bf55d4b0fa7497f632ccc11ceb5d5f42bb45 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 15 Jan 2025 13:41:56 -0500
Subject: [PATCH 34/37] Apply suggestions from code review

Co-authored-by: RondeauG <38501935+RondeauG@users.noreply.github.com>
---
 CHANGELOG.rst          |  2 +-
 src/xscen/ensembles.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 483d37d6..86c1c549 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,7 +8,7 @@ Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliet
 
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* Improve ``xs.ensemles.build_partition_data``. (:pull:`504`).
+* Improve ``xs.ensembles.build_partition_data``. (:pull:`504`).
 
 Breaking changes
 ^^^^^^^^^^^^^^^^
diff --git a/src/xscen/ensembles.py b/src/xscen/ensembles.py
index 53e801ef..b21fa0f0 100644
--- a/src/xscen/ensembles.py
+++ b/src/xscen/ensembles.py
@@ -787,10 +787,10 @@ def build_partition_data(
 
     Parameters
     ----------
-    datasets : list, dict, DataCatalog
-        List or dictionnary of Datasets or DataCatalog that will be included in the ensemble.
+    datasets : list[xr.Dataset], dict[str, xr.Dataset], DataCatalog
+        Either a list/dictionary of Datasets or a DataCatalog that will be included in the ensemble.
         The datasets should include the necessary ("cat:") attributes to understand their metadata.
-        Tip: A dictionnary can be created with `datasets = pcat.search(**search_dict).to_dataset_dict()`.
+        Tip: A dictionary can be created with `datasets = pcat.search(**search_dict).to_dataset_dict()`.
 
         The use of a DataCatalog is recommended for large ensembles.
         In that case, the ensembles will be loaded separately for each `bias_adjust_project`,
@@ -804,8 +804,8 @@ def build_partition_data(
         Arguments to pass to `xs.spatial.subset()`.
     regrid_kw : dict, optional
         Arguments to pass to `xs.regrid_dataset()`.
-        Note thet regriding is computationnaly expensive. For large datasets,
-        it might be worth it to do do regridding first, outside of this function.
+        Note that regriding is computationally expensive. For large datasets,
+        it might be worth it to do the regridding first, outside of this function.
     rename_dict : dict, optional
         Dictionary to rename the dimensions from xscen names to xclim names.
         The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}.
@@ -840,7 +840,7 @@ def build_partition_data(
 
     else:
         raise ValueError(
-            "datasets should be a list or a dictionary of xarray datasets or a xscen.DataCatalog"
+            "'datasets' should be a list/dictionary of xarray datasets or a xscen.DataCatalog"
         )
 
     rename_dict = rename_dict or {}

From ea1a99a2f8dfb32ffc0d957dbf483ee1fbf05029 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 15 Jan 2025 13:48:09 -0500
Subject: [PATCH 35/37] remove print

---
 src/xscen/io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/xscen/io.py b/src/xscen/io.py
index 484e00a4..e27f0c30 100644
--- a/src/xscen/io.py
+++ b/src/xscen/io.py
@@ -1053,7 +1053,6 @@ def rechunk(
         raise ValueError(
             "No chunks given. Need to give at `chunks_over_var` or `chunks_over_dim`."
         )
-    print(ds, chunks, worker_mem, str(path_out), str(temp_store))
     plan = _rechunk(ds, chunks, worker_mem, str(path_out), temp_store=str(temp_store))
 
     plan.execute()

From 0ca11fdddd1e0c851fb86f2d00aa8acceba84694 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 15 Jan 2025 14:06:35 -0500
Subject: [PATCH 36/37] pin zarr

---
 environment-dev.yml    |  2 +-
 environment.yml        |  2 +-
 pyproject.toml         |  2 +-
 src/xscen/ensembles.py | 23 ++++++++++++++++++++---
 4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 8e234de8..eefbb629 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -33,7 +33,7 @@ dependencies:
   - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr, https://github.com/pangeo-data/rechunker/issues/154
   - xclim >=0.54, <0.55
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
-  - zarr >=2.13
+  - zarr >=2.13, <3.0 #FIXME: xarray is compatible with zarr 3.0 from 2025.01.1, but we pin xarray below that version
   # Opt
   - nc-time-axis >=1.3.1
   - pyarrow >=10.0.1
diff --git a/environment.yml b/environment.yml
index c2605af5..a74a96b7 100644
--- a/environment.yml
+++ b/environment.yml
@@ -33,7 +33,7 @@ dependencies:
   - xarray >=2023.11.0, !=2024.6.0, <2024.10.0 #FIXME: 2024.10.0 breaks rechunker with zarr
   - xclim >=0.54, <0.55
   - xesmf >=0.7, <0.8.8  # FIXME: 0.8.8 currently creates segfaults on ReadTheDocs.
-  - zarr >=2.13
+  - zarr >=2.13, <3.0 #FIXME: xarray is compatible with zarr 3.0 from 2025.01.1, but we pin xarray below that version
   # To install from source
   - setuptools >=65.0.0
   - setuptools-scm >=8.0.0
diff --git a/pyproject.toml b/pyproject.toml
index 1c9cb828..0fa70967 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,7 @@ dependencies = [
   "toolz",
   "xarray >=2023.11.0, !=2024.6.0, <2024.10.0", # FIXME: 2024.10.0 breaks rechunker with zarr
   "xclim >=0.54, <0.55",
-  "zarr >=2.13"
+  "zarr >=2.13, <3.0" # FIXME: xarray is compatible with zarr 3.0 from 2025.01.1, but we pin xarray below that version"
 ]
 
 [project.optional-dependencies]
diff --git a/src/xscen/ensembles.py b/src/xscen/ensembles.py
index b21fa0f0..dd50faa0 100644
--- a/src/xscen/ensembles.py
+++ b/src/xscen/ensembles.py
@@ -17,7 +17,7 @@
 from .config import parse_config
 from .indicators import compute_indicators
 from .regrid import regrid_dataset
-from .spatial import subset
+from .spatial import get_grid_mapping, subset
 from .utils import clean_up, get_cat_attrs
 
 logger = logging.getLogger(__name__)
@@ -638,8 +638,16 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
     for ds in datasets:
         if subset_kw:
             ds = subset(ds, **subset_kw)
+            gridmap = get_grid_mapping(ds)
             ds = ds.drop_vars(
-                ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
+                [
+                    ds.cf["longitude"],
+                    ds.cf["latitude"],
+                    ds.cf.axes["X"][0],
+                    ds.cf.axes["Y"][0],
+                    gridmap,
+                ],
+                errors="ignore",
             )
 
         if regrid_kw:
@@ -741,9 +749,18 @@ def _partition_from_catalog(
 
         if subset_kw:
             ds = subset(ds, **subset_kw)
+            gridmap = get_grid_mapping(ds)
             ds = ds.drop_vars(
-                ["lat", "lon", "rlat", "rlon", "rotated_pole"], errors="ignore"
+                [
+                    ds.cf["longitude"],
+                    ds.cf["latitude"],
+                    ds.cf.axes["X"][0],
+                    ds.cf.axes["Y"][0],
+                    gridmap,
+                ],
+                errors="ignore",
             )
+
         if regrid_kw:
             ds = regrid_dataset(ds, **regrid_kw)
 

From d9ab99158ff3bef7a386b3a61f310a5f5f00d6ed Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Wed, 15 Jan 2025 14:30:15 -0500
Subject: [PATCH 37/37] fix name

---
 src/xscen/ensembles.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/xscen/ensembles.py b/src/xscen/ensembles.py
index dd50faa0..15df069c 100644
--- a/src/xscen/ensembles.py
+++ b/src/xscen/ensembles.py
@@ -641,8 +641,8 @@ def _partition_from_list(datasets, partition_dim, subset_kw, regrid_kw):
             gridmap = get_grid_mapping(ds)
             ds = ds.drop_vars(
                 [
-                    ds.cf["longitude"],
-                    ds.cf["latitude"],
+                    ds.cf["longitude"].name,
+                    ds.cf["latitude"].name,
                     ds.cf.axes["X"][0],
                     ds.cf.axes["Y"][0],
                     gridmap,
@@ -752,8 +752,8 @@ def _partition_from_catalog(
             gridmap = get_grid_mapping(ds)
             ds = ds.drop_vars(
                 [
-                    ds.cf["longitude"],
-                    ds.cf["latitude"],
+                    ds.cf["longitude"].name,
+                    ds.cf["latitude"].name,
                     ds.cf.axes["X"][0],
                     ds.cf.axes["Y"][0],
                     gridmap,