-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqme_apply.py
136 lines (103 loc) · 6.07 KB
/
qme_apply.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np
import xarray as xr
from qme_utils import *
from qme_vars import *
def apply_mean_values(data, mean_values, start_year = 0, chunked = True):
"""
Applies the generated mean values to the corresponding years of the given data set.
Inputs:
data - the data to apply the mean values to
mean_values - the mean values generated by qme_train.find_means. Pass the argument with a leading minus sign to subtract the values instead
start_year - the year (relative to the domain, with 0 being the first) at which to start applying the mean values. Every year before is unchanged
chunked - whether or not time has been chunked continuously (i.e. -1)
"""
year_values = data.time.dt.year.values
min_year = year_values.min()
year_values = year_values - min_year
if chunked:
def apply_temp(data_loc, mean_loc):
adjusted = data_loc.copy()
for i in range(len(data_loc)):
year = int(year_values[i])
if year >= start_year:
adjusted[i] += mean_loc[year]
return adjusted
output = xr.apply_ufunc(apply_temp, data, mean_values, input_core_dims = [["time"], ["values"]],
output_core_dims = [["time"]], vectorize = True, keep_attrs = "no_conflicts",
output_dtypes = [np.float32], dask = 'parallelized')
else:
# workaround so dask doesn't get stuck trying to predict generic object size
mdl_year_values = mdl.assign(year_values = ("time", year_values)).year_values
def apply_temp(data_loc, mean_loc, year_value):
return data_loc + mean_loc[year_value]
output = xr.apply_ufunc(apply_temp, data, mean_values, mdl_year_values, input_core_dims = [[], ["values"], []],
output_core_dims = [[]], vectorize = True, keep_attrs = "no_conflicts",
output_dtypes = [np.float32], dask = 'parallelized')
return output.assign_attrs({"qme_account_trend_start_yr": start_year})
def apply_mean_value_yr(data, mean_values, start_year = 0):
"""
Applies the generated mean values to the a single year's data.
Inputs:
data - the data to apply the mean values to, assumed to be a single year and not required to be chunked in a special way.
mean_values - the mean values generated by qme_train.find_means. Pass the argument with a leading minus sign to subtract the values instead
start_year - the year (NOT relative) at which to start applying the mean values. Every year before is unchanged
"""
year_value = data.time.dt.year.values[0]
if year_value >= start_year:
mean_value = mean_values.sel(values = year_value, drop = True)
output = xr.apply_ufunc(np.add, data, mean_value, input_core_dims = [[], []],
output_core_dims = [[]], keep_attrs = "no_conflicts",
output_dtypes = [np.float32], dask = 'parallelized')
else:
output = data.assign_attrs(mean_values.attrs)
return output.assign_attrs({"qme_account_trend_start_yr": start_year})
def apply_bc(var, mdl, bc, chunked = True):
"""
Applies the bias correction factors to the model data.
Inputs:
mdl - the model data
bc - the bias correction factors
var - the variable being corrected
chunked - whether or not time has been chunked continuously (i.e. -1)
"""
var = get_qme_var(var)
reso = var.bin_count()
month_values = mdl.time.dt.month.values - 1
# this is kind of a messy way to approach a new version of the function, but I wanted to ensure the previous use case,
# where the data was assumed to be chunked with time = -1, could still function like it did before - I am worried that
# the new version may not be as fast if the data is chunked
if chunked:
def apply(mdl_loc, bc_loc):
adjusted = var.scale_data(var.limit_data(mdl_loc))
# special rounding function used to correct Numpy rounding towards evens - see comments in qme_utils
rounded = round_half_up(adjusted)
# original version
# rounded = np.round(adjusted).astype(int)
for i, value in enumerate(rounded):
# check for out of bounds in case of funky numbers when dealing with NaNs
if value >= 0 and value < reso:
adjusted[i] += bc_loc[month_values[i]][value]
adjusted = var.unscale_data(adjusted)
return adjusted
output = xr.apply_ufunc(apply, mdl, bc, input_core_dims = [["time"], ["month", "values"]],
output_core_dims = [["time"]], vectorize = True, keep_attrs = "no_conflicts",
output_dtypes = [np.float32], dask = 'parallelized')
else:
# workaround so dask doesn't get stuck trying to predict generic object size
mdl_month_values = mdl.assign(month_values = ("time", month_values)).month_values
def apply(mdl_loc, bc_loc, month_value):
if np.isnan(mdl_loc):
return mdl_loc
adjusted = var.scale_data(var.limit_data(mdl_loc))
# special rounding function used to correct Numpy rounding towards evens - see comments in qme_utils
rounded = round_half_up(adjusted)
if rounded >= 0 and rounded < reso:
adjusted += bc_loc[month_value][rounded]
adjusted = var.unscale_data(adjusted)
return adjusted
output = xr.apply_ufunc(apply, mdl, bc, mdl_month_values, input_core_dims = [[], ["month", "values"], []],
output_core_dims = [[]], vectorize = True, keep_attrs = "no_conflicts",
output_dtypes = [np.float32], dask = 'parallelized')
if "qme_account_trend" not in output.attrs:
output = output.assign_attrs({"qme_account_trend": "Disabled"})
return output