qme_apply.py

import numpy as np
import xarray as xr
from qme_utils import *
from qme_vars import *


def apply_mean_values(data, mean_values, start_year = 0, chunked = True):
    """
    Applies the generated mean values to the corresponding years of the given data set.
    Inputs:
    data - the data to apply the mean values to
    mean_values - the mean values generated by qme_train.find_means. Pass the argument with a leading minus sign to subtract the values instead
    start_year - the year (relative to the domain, with 0 being the first) at which to start applying the mean values. Every year before is unchanged
    chunked - whether or not time has been chunked continuously (i.e. -1)
    """
    year_values = data.time.dt.year.values
    min_year = year_values.min()
    year_values = year_values - min_year

    if chunked:
        def apply_temp(data_loc, mean_loc):
            adjusted = data_loc.copy()
            for i in range(len(data_loc)):
                year = int(year_values[i])
                if year >= start_year:  
                    adjusted[i] += mean_loc[year]
            return adjusted
            
        output = xr.apply_ufunc(apply_temp, data, mean_values, input_core_dims = [["time"], ["values"]], 
                                output_core_dims = [["time"]], vectorize = True, keep_attrs = "no_conflicts",
                                output_dtypes = [np.float32], dask = 'parallelized')

    else:
        # workaround so dask doesn't get stuck trying to predict generic object size
        mdl_year_values = mdl.assign(year_values = ("time", year_values)).year_values
        
        def apply_temp(data_loc, mean_loc, year_value):
            return data_loc + mean_loc[year_value]
            
        output = xr.apply_ufunc(apply_temp, data, mean_values, mdl_year_values, input_core_dims = [[], ["values"], []], 
                                output_core_dims = [[]], vectorize = True, keep_attrs = "no_conflicts",
                                output_dtypes = [np.float32], dask = 'parallelized')

    return output.assign_attrs({"qme_account_trend_start_yr": start_year})


def apply_mean_value_yr(data, mean_values, start_year = 0):
    """
    Applies the generated mean values to the a single year's data.
    Inputs:
    data - the data to apply the mean values to, assumed to be a single year and not required to be chunked in a special way.
    mean_values - the mean values generated by qme_train.find_means. Pass the argument with a leading minus sign to subtract the values instead
    start_year - the year (NOT relative) at which to start applying the mean values. Every year before is unchanged
    """
    year_value = data.time.dt.year.values[0]

    if year_value >= start_year:
        
        mean_value = mean_values.sel(values = year_value, drop = True)
            
        output = xr.apply_ufunc(np.add, data, mean_value, input_core_dims = [[], []], 
                                output_core_dims = [[]], keep_attrs = "no_conflicts",
                                output_dtypes = [np.float32], dask = 'parallelized')

    else:
        output = data.assign_attrs(mean_values.attrs)

    return output.assign_attrs({"qme_account_trend_start_yr": start_year})


def apply_bc(var, mdl, bc, chunked = True):
    """
    Applies the bias correction factors to the model data.
    Inputs:
    mdl - the model data
    bc - the bias correction factors
    var - the variable being corrected
    chunked - whether or not time has been chunked continuously (i.e. -1)
    """
    
    var = get_qme_var(var)
    reso = var.bin_count()
    month_values = mdl.time.dt.month.values - 1

    # this is kind of a messy way to approach a new version of the function, but I wanted to ensure the previous use case,
    # where the data was assumed to be chunked with time = -1, could still function like it did before - I am worried that
    # the new version may not be as fast if the data is chunked
    if chunked:
        def apply(mdl_loc, bc_loc):
            adjusted = var.scale_data(var.limit_data(mdl_loc))
    
            # special rounding function used to correct Numpy rounding towards evens - see comments in qme_utils
            rounded = round_half_up(adjusted)
    
            # original version
            # rounded = np.round(adjusted).astype(int)
            
            for i, value in enumerate(rounded):
                # check for out of bounds in case of funky numbers when dealing with NaNs
                if value >= 0 and value < reso:
                    adjusted[i] += bc_loc[month_values[i]][value]
            adjusted = var.unscale_data(adjusted)
            return adjusted
    
        output = xr.apply_ufunc(apply, mdl, bc, input_core_dims = [["time"], ["month", "values"]], 
                                output_core_dims = [["time"]], vectorize = True, keep_attrs = "no_conflicts", 
                                output_dtypes = [np.float32], dask = 'parallelized')

    else:
        # workaround so dask doesn't get stuck trying to predict generic object size
        mdl_month_values = mdl.assign(month_values = ("time", month_values)).month_values
    
        def apply(mdl_loc, bc_loc, month_value):
            if np.isnan(mdl_loc):
                return mdl_loc
            
            adjusted = var.scale_data(var.limit_data(mdl_loc))
            
            # special rounding function used to correct Numpy rounding towards evens - see comments in qme_utils
            rounded = round_half_up(adjusted)
            
            if rounded >= 0 and rounded < reso:
                adjusted += bc_loc[month_value][rounded]
            
            adjusted = var.unscale_data(adjusted)
            return adjusted
            
        output = xr.apply_ufunc(apply, mdl, bc, mdl_month_values, input_core_dims = [[], ["month", "values"], []], 
                                output_core_dims = [[]], vectorize = True, keep_attrs = "no_conflicts", 
                                output_dtypes = [np.float32], dask = 'parallelized')

    if "qme_account_trend" not in output.attrs:
        output = output.assign_attrs({"qme_account_trend": "Disabled"})
        
    return output