From cc6944a6fc4efd1f585f13182da6817f6f7bd8f4 Mon Sep 17 00:00:00 2001 From: jlenh Date: Thu, 9 Jan 2025 16:48:39 +0100 Subject: [PATCH 1/8] Adding preprocessor to extract surface from a 3D atmospheric variable --- esmvalcore/preprocessor/__init__.py | 3 + .../preprocessor/_supplementary_vars.py | 23 ++-- esmvalcore/preprocessor/_volume.py | 113 ++++++++++++++++++ 3 files changed, 132 insertions(+), 7 deletions(-) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 2c956aa0ad..4a6cefaa0a 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -90,6 +90,7 @@ extract_transect, extract_volume, volume_statistics, + extract_surface_from_atm, ) from ._weighting import weighting_landsea_fraction @@ -122,6 +123,8 @@ "resample_time", # Level extraction "extract_levels", + # Extract surface + "extract_surface_from_atm", # Weighting "weighting_landsea_fraction", # Mask landsea (fx or Natural Earth) diff --git a/esmvalcore/preprocessor/_supplementary_vars.py b/esmvalcore/preprocessor/_supplementary_vars.py index 4096036674..cd0fe430cf 100644 --- a/esmvalcore/preprocessor/_supplementary_vars.py +++ b/esmvalcore/preprocessor/_supplementary_vars.py @@ -5,6 +5,7 @@ import iris.coords import iris.cube +import numpy as np logger = logging.getLogger(__name__) @@ -105,13 +106,21 @@ def add_ancillary_variable(cube, ancillary_cube): var_name=ancillary_cube.var_name, attributes=ancillary_cube.attributes, ) - start_dim = cube.ndim - len(ancillary_var.shape) - cube.add_ancillary_variable(ancillary_var, range(start_dim, cube.ndim)) - logger.debug( - "Added %s as ancillary variable in cube of %s.", - ancillary_cube.var_name, - cube.var_name, - ) + data_dims = [None] * ancillary_cube.ndim + for coord in ancillary_cube.coords(): + for ancillary_dim, cube_dim in zip( + ancillary_cube.coord_dims(coord), cube.coord_dims(coord)): + data_dims[ancillary_dim] = cube_dim + if None in data_dims: + none_dims = ", ".join( + str(i) for i, d in enumerate(data_dims) if d is None) + msg = ( + f"Failed to add {ancillary_cube} to {cube} as ancillary variable. " + f"No coordinate associated with ancillary cube dimensions {none_dims}" + ) + raise ValueError(msg) + + cube.add_ancillary_variable(ancillary_var, data_dims) def add_supplementary_variables( diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 8d56d7b51a..07d8e8fe90 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -16,6 +16,7 @@ from iris.coords import AuxCoord, CellMeasure from iris.cube import Cube from iris.util import broadcast_to_shape +import stratify from ._shared import ( get_iris_aggregator, @@ -603,3 +604,115 @@ def extract_trajectory( points = [("latitude", latitudes), ("longitude", longitudes)] interpolated_cube = interpolate(cube, points) # Very slow! return interpolated_cube + + +def _get_first_unmasked_data(array, axis): + """Get first unmasked value of an array along an axis.""" + mask = da.ma.getmaskarray(array) + numerical_mask = da.where(mask, -1.0, 1.0) + indices_first_positive = da.argmax(numerical_mask, axis=axis) + indices = da.meshgrid( + *[da.arange(array.shape[i]) for i in range(array.ndim) if i != axis], + indexing="ij", + ) + indices = list(indices) + indices.insert(axis, indices_first_positive) + first_unmasked_data = np.array(array)[tuple(indices)] + return first_unmasked_data + + +@register_supplementaries( + variables=['ps'], + required='require_at_least_one' +) +def extract_surface_from_atm( + cube: Cube, + var: str +) -> Cube: + """Extract data from the lowest unmasked height level. + + Parameters + ---------- + cube: + Input cube. + var: + Variable to extract at the surface. + + Returns + ------- + iris.cube.Cube + Collapsed cube. + """ + # Declare required variables + # - 3D atmo variable to extract at the surface + # - surface_air_pressure (ps) + ps_cube = None + try: + ps_cube = cube.ancillary_variable("surface_air_pressure") + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug( + "Ancillary variable surface air pressure not found in cube. " + "Check file availability." + ) + if ps_cube: + var_cube = cube.extract( + iris.Constraint(name=var) + ) + + # Fill masked data if necessary (interpolation fails with masked data) + (z_axis,) = var_cube.coord_dims( + var_cube.coord(axis="Z", dim_coords=True) + ) + mask = da.ma.getmaskarray(var_cube.core_data()) + if mask.any(): + first_unmasked_data = _get_first_unmasked_data( + var_cube.core_data(), axis=z_axis + ) + dim_map = [dim for dim in range(var_cube.ndim) if dim != z_axis] + first_unmasked_data = iris.util.broadcast_to_shape( + first_unmasked_data, var_cube.shape, dim_map + ) + var_cube.data = da.where( + mask, first_unmasked_data, var_cube.core_data() + ) + + # Interpolation (not supported for dask arrays) + air_pressure_coord = var_cube.coord("air_pressure") + original_levels = iris.util.broadcast_to_shape( + air_pressure_coord.points, + var_cube.shape, + var_cube.coord_dims(air_pressure_coord), + ) + target_levels = np.expand_dims(ps_cube.data, axis=z_axis) + sfc_data = stratify.interpolate( + target_levels, + original_levels, + var_cube.data, + axis=z_axis, + interpolation="linear", + extrapolation="linear", + ) + sfc_data = np.squeeze(sfc_data, axis=z_axis) + + # Construct surface var cube + indices = [slice(None)] * var_cube.ndim + indices[z_axis] = 0 + var_cube = var_cube[tuple(indices)] + var_cube.data = sfc_data + if var_cube.coords("air_pressure"): + var_cube.remove_coord("air_pressure") + ps_coord = iris.coords.AuxCoord( + ps_cube.data, + var_name="plev", + standard_name="air_pressure", + long_name="pressure", + units=ps_cube.units, + ) + var_cube.add_aux_coord(ps_coord, np.arange(var_cube.ndim)) + var_cube.var_name = var + 's' + logger.debug("Extracting surface using surface air pressure.") + + else: + raise ValueError("Surface air pressure could not be found. Stopping.") + + return var_cube From 1e3d8bda1108865f3f2a011d245e8c542732011c Mon Sep 17 00:00:00 2001 From: jlenh Date: Tue, 21 Jan 2025 16:57:19 +0100 Subject: [PATCH 2/8] Remove unused package import --- esmvalcore/preprocessor/_supplementary_vars.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/preprocessor/_supplementary_vars.py b/esmvalcore/preprocessor/_supplementary_vars.py index cd0fe430cf..22640c363a 100644 --- a/esmvalcore/preprocessor/_supplementary_vars.py +++ b/esmvalcore/preprocessor/_supplementary_vars.py @@ -5,7 +5,6 @@ import iris.coords import iris.cube -import numpy as np logger = logging.getLogger(__name__) From b20ecac44b6ec4e8b69d5fdb800a00b463ea9f46 Mon Sep 17 00:00:00 2001 From: jlenh Date: Wed, 22 Jan 2025 11:39:49 +0100 Subject: [PATCH 3/8] Solve minor Codacy issues --- esmvalcore/preprocessor/_supplementary_vars.py | 4 ++-- esmvalcore/preprocessor/_volume.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/esmvalcore/preprocessor/_supplementary_vars.py b/esmvalcore/preprocessor/_supplementary_vars.py index 22640c363a..61cfc10014 100644 --- a/esmvalcore/preprocessor/_supplementary_vars.py +++ b/esmvalcore/preprocessor/_supplementary_vars.py @@ -107,8 +107,8 @@ def add_ancillary_variable(cube, ancillary_cube): ) data_dims = [None] * ancillary_cube.ndim for coord in ancillary_cube.coords(): - for ancillary_dim, cube_dim in zip( - ancillary_cube.coord_dims(coord), cube.coord_dims(coord)): + for ancillary_dim, cube_dim in zip(ancillary_cube.coord_dims(coord), + cube.coord_dims(coord)): data_dims[ancillary_dim] = cube_dim if None in data_dims: none_dims = ", ".join( diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 07d8e8fe90..f99507a75c 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -622,8 +622,8 @@ def _get_first_unmasked_data(array, axis): @register_supplementaries( - variables=['ps'], - required='require_at_least_one' + variables=['ps'], + required='require_at_least_one' ) def extract_surface_from_atm( cube: Cube, From fb4e507d6291674e6792cc4f1140087e2d98379d Mon Sep 17 00:00:00 2001 From: jlenh Date: Thu, 23 Jan 2025 14:37:42 +0100 Subject: [PATCH 4/8] Adding documentation in preprocessor.rst --- doc/recipe/preprocessor.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 37b1f8675b..13be702183 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -311,6 +311,7 @@ Preprocessor Variable s :ref:`weighting_landsea_fraction` [#f3]_ ``sftlf``, ``sftof`` land_area_fraction, sea_area_fraction :ref:`distance_metric` [#f5]_ ``areacella``, ``areacello`` cell_area :ref:`histogram` [#f5]_ ``areacella``, ``areacello`` cell_area +:ref:`extract_surface_from_atm` [#f3]_ ``ps`` surface_air_pressure ===================================================================== ============================== ===================================== .. [#f3] This preprocessor requires at least one of the mentioned supplementary @@ -2205,6 +2206,7 @@ The ``_volume.py`` module contains the following preprocessor functions: * ``extract_transect``: Extract data along a line of constant latitude or longitude. * ``extract_trajectory``: Extract data along a specified trajectory. +* ``extract_surface_from_atm``: Extract atmospheric data at the surface. ``extract_volume`` @@ -2388,6 +2390,21 @@ Note that this function uses the expensive ``interpolate`` method from See also :func:`esmvalcore.preprocessor.extract_trajectory`. +``extract_surface_from_atm`` +---------------------- + +This function extract data at the surface for an atmospheric variable. + +The function retrieves the first unmasked data points along the pressure levels +and returns the interpolated value at the corresponding surface pressure +given by the surface air pressure ``ps``. + +The required supplementary surface air pressure ``ps`` is attached to +the main dataset as described in :ref:`supplementary_variables`. + +See also :func:`esmvalcore.preprocessor.extract_surface_from_atm`. + + .. _cycles: Cycles From 71ab37e6ce5a3656e5ec167bd51864c706cce389 Mon Sep 17 00:00:00 2001 From: jlenh Date: Thu, 23 Jan 2025 15:12:50 +0100 Subject: [PATCH 5/8] Update preprocessor function --- esmvalcore/preprocessor/_volume.py | 39 ++++++++++++------------------ 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index f99507a75c..1f70946358 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -627,16 +627,13 @@ def _get_first_unmasked_data(array, axis): ) def extract_surface_from_atm( cube: Cube, - var: str ) -> Cube: - """Extract data from the lowest unmasked height level. + """Extract surface from 3D atmospheric variable based on surface pressure. Parameters ---------- cube: Input cube. - var: - Variable to extract at the surface. Returns ------- @@ -655,39 +652,35 @@ def extract_surface_from_atm( "Check file availability." ) if ps_cube: - var_cube = cube.extract( - iris.Constraint(name=var) - ) - # Fill masked data if necessary (interpolation fails with masked data) - (z_axis,) = var_cube.coord_dims( - var_cube.coord(axis="Z", dim_coords=True) + (z_axis,) = cube.coord_dims( + cube.coord(axis="Z", dim_coords=True) ) - mask = da.ma.getmaskarray(var_cube.core_data()) + mask = da.ma.getmaskarray(cube.core_data()) if mask.any(): first_unmasked_data = _get_first_unmasked_data( - var_cube.core_data(), axis=z_axis + cube.core_data(), axis=z_axis ) - dim_map = [dim for dim in range(var_cube.ndim) if dim != z_axis] + dim_map = [dim for dim in range(cube.ndim) if dim != z_axis] first_unmasked_data = iris.util.broadcast_to_shape( - first_unmasked_data, var_cube.shape, dim_map + first_unmasked_data, cube.shape, dim_map ) - var_cube.data = da.where( - mask, first_unmasked_data, var_cube.core_data() + cube.data = da.where( + mask, first_unmasked_data, cube.core_data() ) # Interpolation (not supported for dask arrays) - air_pressure_coord = var_cube.coord("air_pressure") + air_pressure_coord = cube.coord("air_pressure") original_levels = iris.util.broadcast_to_shape( air_pressure_coord.points, - var_cube.shape, - var_cube.coord_dims(air_pressure_coord), + cube.shape, + cube.coord_dims(air_pressure_coord), ) target_levels = np.expand_dims(ps_cube.data, axis=z_axis) sfc_data = stratify.interpolate( target_levels, original_levels, - var_cube.data, + cube.data, axis=z_axis, interpolation="linear", extrapolation="linear", @@ -695,9 +688,9 @@ def extract_surface_from_atm( sfc_data = np.squeeze(sfc_data, axis=z_axis) # Construct surface var cube - indices = [slice(None)] * var_cube.ndim + indices = [slice(None)] * cube.ndim indices[z_axis] = 0 - var_cube = var_cube[tuple(indices)] + var_cube = cube[tuple(indices)] var_cube.data = sfc_data if var_cube.coords("air_pressure"): var_cube.remove_coord("air_pressure") @@ -709,7 +702,7 @@ def extract_surface_from_atm( units=ps_cube.units, ) var_cube.add_aux_coord(ps_coord, np.arange(var_cube.ndim)) - var_cube.var_name = var + 's' + var_cube.var_name = cube.var_name + 's' logger.debug("Extracting surface using surface air pressure.") else: From 8abeef13b2206dc625342e25bd85313e6309bafa Mon Sep 17 00:00:00 2001 From: jlenh Date: Tue, 4 Feb 2025 17:23:45 +0100 Subject: [PATCH 6/8] Fix doc entries for extract_surface_from_atm, add author citation mention --- CITATION.cff | 5 +++++ doc/recipe/preprocessor.rst | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index c5a170d218..0234ca3b3c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -223,6 +223,11 @@ authors: family-names: Garcia Perdomo given-names: Karen orcid: "https://orcid.org/0009-0004-2333-3358" + - + affiliation: "SMHI, Sweden" + family-names: Lenhardt + given-names: Julien + orcid: "https://orcid.org/0000-0002-9949-3989" cff-version: 1.2.0 date-released: 2024-11-26 diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 13be702183..2fc052fbed 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -311,7 +311,7 @@ Preprocessor Variable s :ref:`weighting_landsea_fraction` [#f3]_ ``sftlf``, ``sftof`` land_area_fraction, sea_area_fraction :ref:`distance_metric` [#f5]_ ``areacella``, ``areacello`` cell_area :ref:`histogram` [#f5]_ ``areacella``, ``areacello`` cell_area -:ref:`extract_surface_from_atm` [#f3]_ ``ps`` surface_air_pressure +:ref:`extract_surface_from_atm` [#f3]_ ``ps`` surface_air_pressure ===================================================================== ============================== ===================================== .. [#f3] This preprocessor requires at least one of the mentioned supplementary @@ -2101,7 +2101,7 @@ See also :func:`esmvalcore.preprocessor.extract_location`. ``zonal_statistics`` -------------------- - +preprocessor.rst:315: WARNING: u The function calculates the zonal statistics by applying an operator along the longitude coordinate. @@ -2391,7 +2391,7 @@ See also :func:`esmvalcore.preprocessor.extract_trajectory`. ``extract_surface_from_atm`` ----------------------- +---------------------------- This function extract data at the surface for an atmospheric variable. From 326c84c3191c47c651781b6d7451a2e9086fb352 Mon Sep 17 00:00:00 2001 From: jlenh Date: Tue, 4 Feb 2025 17:43:33 +0100 Subject: [PATCH 7/8] Fix Codacy code analysis for expected object type in cube slicing --- esmvalcore/preprocessor/_volume.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 1f70946358..a76b4fa3a1 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -689,8 +689,9 @@ def extract_surface_from_atm( # Construct surface var cube indices = [slice(None)] * cube.ndim - indices[z_axis] = 0 + indices[z_axis] = slice(0, 1) var_cube = cube[tuple(indices)] + var_cube = iris.util.squeeze(var_cube) var_cube.data = sfc_data if var_cube.coords("air_pressure"): var_cube.remove_coord("air_pressure") From 0e0dd951cf75e1ae66c236263a084b545d0c757c Mon Sep 17 00:00:00 2001 From: jlenh Date: Tue, 4 Feb 2025 18:10:09 +0100 Subject: [PATCH 8/8] Add compatibility of add_ancillary_variable function with previous implementation and tests --- .../preprocessor/_supplementary_vars.py | 47 ++++++++++++++----- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/esmvalcore/preprocessor/_supplementary_vars.py b/esmvalcore/preprocessor/_supplementary_vars.py index 61cfc10014..b90e7df4db 100644 --- a/esmvalcore/preprocessor/_supplementary_vars.py +++ b/esmvalcore/preprocessor/_supplementary_vars.py @@ -105,22 +105,43 @@ def add_ancillary_variable(cube, ancillary_cube): var_name=ancillary_cube.var_name, attributes=ancillary_cube.attributes, ) - data_dims = [None] * ancillary_cube.ndim - for coord in ancillary_cube.coords(): - for ancillary_dim, cube_dim in zip(ancillary_cube.coord_dims(coord), - cube.coord_dims(coord)): - data_dims[ancillary_dim] = cube_dim - if None in data_dims: - none_dims = ", ".join( - str(i) for i, d in enumerate(data_dims) if d is None) - msg = ( - f"Failed to add {ancillary_cube} to {cube} as ancillary variable. " - f"No coordinate associated with ancillary cube dimensions {none_dims}" + if isinstance(ancillary_cube, iris.coords.AncillaryVariable): + start_dim = cube.ndim - len(ancillary_var.shape) + cube.add_ancillary_variable(ancillary_var, range(start_dim, cube.ndim)) + logger.debug( + "Added %s as ancillary variable in cube of %s.", + ancillary_cube.var_name, + cube.var_name, ) + elif isinstance(ancillary_cube, iris.cube.Cube): + data_dims = [None] * ancillary_cube.ndim + for coord in ancillary_cube.coords(): + for ancillary_dim, cube_dim in zip(ancillary_cube.coord_dims(coord), + cube.coord_dims(coord)): + data_dims[ancillary_dim] = cube_dim + if None in data_dims: + none_dims = ", ".join( + str(i) for i, d in enumerate(data_dims) if d is None) + msg = ( + f"Failed to add {ancillary_cube} to {cube} as ancillary var." + f"No coordinate associated with ancillary cube dimensions" + f"{none_dims}" + ) + raise ValueError(msg) + cube.add_ancillary_variable(ancillary_var, data_dims) + logger.debug( + "Added %s as ancillary variable in cube of %s.", + ancillary_cube.var_name, + cube.var_name, + ) + else: + msg = ( + f"Failed to add {ancillary_cube} to {cube} as ancillary var." + f"ancillary_cube should be either an iris.cube.Cube or an " + f"iris.coords.AncillaryVariable object." + ) raise ValueError(msg) - cube.add_ancillary_variable(ancillary_var, data_dims) - def add_supplementary_variables( cube: iris.cube.Cube,