-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathE3SM_preproc_limvar.py
203 lines (158 loc) · 6.95 KB
/
E3SM_preproc_limvar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import netCDF4 as nc
import os
import datetime
import pandas as pd
import seaborn as sns
import numpy as np
import xarray as xr
from numpy import ma
from tqdm import tqdm
from time import time
import glob
# set variable of interest
QOIs = ['AEROD_v','FLNT','FSDSC','TREFHT','T050']
# QOI = "AOD" # might need to rewrite the clipping function if we do the full vertical temperature profile or SO2, but should be fine for the 2D variables
# do normalization?
dn = 0 # 0 for no, 1 for yes
# use mass ensembles?
massens = 1 # 0 for no, 1 for yes
try:
import clif
except:
import sys
sys.path.append("/gpfs/merbrow/CLDERA/Fingerprinting/") # or wherever you have installed clif
# from eof import fingerprints
import clif
def getZonalmeans(weights, dstack, ubnd, lbnd):
# get weights
lat_lon_weights = weights
# clip latitude only for use in lat lon weighting
clipLatT = clif.preprocessing.ClipTransform(dims=["lat"], bounds=[(lbnd, ubnd)])
lat_lon_weights_new = clipLatT.fit_transform(lat_lon_weights)
# clip the data
clipT = clif.preprocessing.ClipTransform(
dims=["lat"], bounds=[(lbnd, ubnd)]
)
data_new = clipT.fit_transform(dstack)
# get weighted zonal means
intoutT = clif.preprocessing.MarginalizeOutTransform(
dims=["lat", "lon"], lat_lon_weights=lat_lon_weights_new
)
data_new = intoutT.fit_transform(data_new)
return data_new
# # import limvar data
# monthly
SOURCE_DIR = "/gpfs/cldera/data/e3sm/e3sm-cldera/CLDERA-historical/limvar/"
#v2.LR.WCYCL20TR.pmcpu.limvar.ens2
TO_DATA = "/post/atm/180x360_aave/ts/daily/1yr/"
ens_members = glob.glob(SOURCE_DIR+"*limvar*ens*")
ens_members.sort()
full_dat = {}
full_dat['comb']={} # initialized combined data.
# # Loop over QOIs
for QOI in QOIs:
# Iterate over the directory structure to create a combined xarray dataset that contains
# each ensemble member in a dimension named 'member'
idx = 0
for i, member in enumerate(ens_members):
thisVar = member+TO_DATA+QOI+"*.nc"
# thisVarPaths =
thisVarData = xr.open_mfdataset(thisVar)
# thisVarData = xr.open_mfdataset(thisVarPaths,decode_times=False)
if idx == 0:
full_dat['comb']=thisVarData
else:
full_dat['comb']=xr.concat( [full_dat['comb'], thisVarData], dim='member')
idx += 1
wgts = thisVarData.area
if dn == 1:
# normalize
qmin = full_dat['comb'][QOI].min().values
qmax = full_dat['comb'][QOI].max().values
Qnorm = (full_dat['comb'][QOI]-qmin)/(qmax-qmin)
if dn == 0:
# don't normalize
Qnorm = full_dat['comb'][QOI]
# calculate zonal means
globe = getZonalmeans(wgts,Qnorm,90, -90)
polN = getZonalmeans(wgts,Qnorm,90, 66.5)
tempN = getZonalmeans(wgts,Qnorm,66.5,35)
subtropN = getZonalmeans(wgts,Qnorm,35,23.5)
tropical = getZonalmeans(wgts,Qnorm,23.5,-23.5)
subtropS = getZonalmeans(wgts,Qnorm,-23.5,-35)
tempS = getZonalmeans(wgts,Qnorm,-35,-66.5)
polS = getZonalmeans(wgts,Qnorm,-66.5,-90)
# calculate daily means for each ensemble
daily_globe = globe.resample(time = "1D").mean()
# daily_globe_mean = daily_globe.mean(dim="member")
daily_polN = polN.resample(time = "1D").mean()
# daily_polN_mean = daily_polN.mean(dim="member")
daily_polS = polS.resample(time = "1D").mean()
# daily_polS_mean = daily_polS.mean(dim="member")
daily_tempN = tempN.resample(time = "1D").mean()
# daily_tempN_mean = daily_tempN.mean(dim="member")
daily_tempS = tempS.resample(time = "1D").mean()
# daily_tempS_mean = daily_tempS.mean(dim="member")
daily_subtropN = subtropN.resample(time = "1D").mean()
# daily_subtropN_mean = daily_subtropN.mean(dim="member")
daily_subtropS = subtropS.resample(time = "1D").mean()
# daily_subtropS_mean = daily_subtropS.mean(dim="member")
daily_tropical = tropical.resample(time = "1D").mean()
# daily_tropical_mean = daily_tropical.mean(dim="member")
# write out zonal daily mean data for all ensembles
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['Globe_'+ensname+'_'+QOI] = daily_globe[i,].values
pd.DataFrame(dailydf).to_csv('Global_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['PolN_'+ensname+'_'+QOI] = daily_polN[i,].values
pd.DataFrame(dailydf).to_csv('PolarN_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['PolS_'+ensname+'_'+QOI] = daily_polS[i,].values
pd.DataFrame(dailydf).to_csv('PolarS_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['SubtropN_'+ensname+'_'+QOI] = daily_subtropN[i,].values
pd.DataFrame(dailydf).to_csv('SubtropN_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['SubtropS_'+ensname+'_'+QOI] = daily_subtropS[i,].values
pd.DataFrame(dailydf).to_csv('SubtropS_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['TempN_'+ensname+'_'+QOI] = daily_tempN[i,].values
pd.DataFrame(dailydf).to_csv('TempN_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['TempS_'+ensname+'_'+QOI] = daily_tempS[i,].values
pd.DataFrame(dailydf).to_csv('TempS_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))
# tstart = time.perf_counter()
dailydf=pd.DataFrame()
for i, ens in enumerate(tqdm(ens_members)):
ensname = ''.join(ens.split('.')[5:])
dailydf['Tropcial_'+ensname+'_'+QOI] = daily_tropical[i,].values
pd.DataFrame(dailydf).to_csv('Tropical_limvar_'+QOI+'_dailymean.csv')
# print('total time = %0.2f mins'%((time.perf_counter() - tstart)/60))