Skip to content

Commit

Permalink
more mods for time series processing
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianOBlanton committed Nov 29, 2023
1 parent 57e63bb commit bec3a62
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 16 deletions.
23 changes: 19 additions & 4 deletions adda/adda.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ def main(args):
knockout_dict=None, fort63_style=fort63_style )

# Fetch best resolution and no resampling
data_adc,meta_adc=rpl.fetch_station_product(urls, return_sample_min=args.return_sample_min, fort63_style=fort63_style )
data_adc,meta_adc=rpl.fetch_station_product(urls, return_sample_min=args.return_sample_min, fort63_style=fort63_style )

print(data_adc)
sys.exit(1)

# Revert Harvester filling of nans to -99999 back to nans
data_adc.replace('-99999',np.nan,inplace=True)
Expand Down Expand Up @@ -243,14 +246,26 @@ def main(args):
##
## compute errors
##
comp_err = compute_error_field.compute_error_field(data_obs_smoothed, data_adc, meta_obs, n_pad=0) # All default params

comp_err = compute_error_field.compute_error_field(data_obs_smoothed, data_adc, meta_obs, n_pad=0)

#comp_err.adc.to_csv('check_adc_ispre.csv',float_format='%.3f')
comp_err._intersection_stations()
#comp_err.adc.to_csv('check_adc_ispo.csv',float_format='%.3f')

#comp_err.adc.to_csv('check_adc_itpre.csv',float_format='%.3f')
comp_err._intersection_times()
#comp_err.adc.to_csv('check_adc_itpo.csv',float_format='%.3f')

#comp_err.adc.to_csv('check_adc_ttpre.csv',float_format='%.3f')
comp_err._tidal_transform_data()
#comp_err.adc.to_csv('check_adc_ttpo.csv',float_format='%.3f')

#print(f'COMPERR input times {obs_starttime} and {obs_endtime}')
comp_err._apply_time_bounds((obs_starttime,obs_endtime)) # redundant but here for illustration
comp_err._compute_and_average_errors()
comp_err.obs.to_csv('check_po5.csv',float_format='%.3f')
#comp_err.adc.to_csv('check_adc_po5.csv',float_format='%.3f')
#comp_err.obs.to_csv('check_obs_po5.csv',float_format='%.3f')

# Set up IO env
#utilities.log.info("Product Level Working in {}.".format(os.getcwd()))
Expand Down Expand Up @@ -320,7 +335,7 @@ def main(args):
adc_plot_grid = interpolate_scaled_offset_field.generic_grid()
df_plot_transformed = interpolate_scaled_offset_field.interpolation_model_transform(adc_plot_grid, model=model, input_grid_type='grid',pathpoly=pathpoly)

# Write out the model for posterity
# Write out the model
newfilename = io_utilities.get_full_filename_with_subdirectory_prepended(rootdir, iosubdir, 'interpolate_linear_model.h5')
try:
joblib.dump(model, newfilename)
Expand Down
1 change: 1 addition & 0 deletions harvester/get_adcirc_stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def fetch_station_product(self, urls, return_sample_min=0, fort63_style=False, v
utilities.log.info('Removing knockouts from acquired ADCIRC data')
except Exception as e:
utilities.log.error('TDS: Broad failure {}'.format(e))

return data, meta

## Several scenarios may be used for this example main. For now we choose the ADDA style execution
Expand Down
4 changes: 2 additions & 2 deletions harvester/get_observations_stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def remove_missingness_stations(self, df_in, max_nan_percentage_cutoff=100)-> pd

def fetch_station_product(self, time_range, return_sample_min=0, interval=None):
"""
Fetch the desire data. The main information is part of the class (sources, products, etc.). However, one must still specify the return_sample_minutes
Fetch the data. The main information is part of the class (sources, products, etc.). However, one must still specify the return_sample_minutes
to sample the data. This harvesting code will read the raw data for the selected product. Perform an interpolation (it doesn't pad nans), and then
resample the data at the desired freq (in minutes)
Expand All @@ -209,7 +209,7 @@ def fetch_station_product(self, time_range, return_sample_min=0, interval=None):
"""
starttime = time_range[0]
endtime=time_range[1]
utilities.log.info(f'Retrieving data for the time range {starttime}-{endtime}')
utilities.log.info(f'Retrieving data for the time range {starttime} to {endtime}')
template = "An exception of type {0} occurred."

interval=interval
Expand Down
16 changes: 10 additions & 6 deletions processing/compute_error_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ def interpolate_and_sample( diurnal_range, df_in )-> pd.DataFrame:
df_out: DataFrame. (datetime64 x station) on a semidiurnal time sequence
"""
df_in.to_csv('check_in.csv',float_format='%.3f')
#df_in.to_csv('check_in.csv',float_format='%.3f')
df_x = pd.DataFrame(diurnal_range, columns=['TIME'])
df_x.set_index('TIME',inplace=True)
df_out = df_in.append(df_x) # This merges the two indexes
df_out = df_out.loc[~df_out.index.duplicated(keep='first')] #Keep first because real data is first on the list
df_out.sort_index(inplace=True) # this is sorted with intervening nans that need to be imputed
df_out.to_csv('check_pre.csv',float_format='%.3f')
#df_out.to_csv('check_pre.csv',float_format='%.3f')
#df_out_int = df_out.interpolate(method='linear')
df_out_int = df_out.interpolate(method='values')
df_out_int.to_csv('check_po1.csv',float_format='%.3f')
#df_out_int.to_csv('check_po1.csv',float_format='%.3f')
df_out_int = df_out_int.loc[diurnal_range]
df_out_int.index.name='TIME'
df_out_int.to_csv('check_po2.csv',float_format='%.3f')
#df_out_int.to_csv('check_po2.csv',float_format='%.3f')
return df_out_int

def combine_data_to_dict(in_adc,in_obs,in_err, product='WL')->dict:
Expand Down Expand Up @@ -225,7 +225,9 @@ def _tidal_transform_data(self):
time_step = int(3600*n_hours_per_tide/n_hours_per_period) # Always scale to an hour (3600s)
diurnal_range = pd.date_range(timein, timeout+np.timedelta64(n_pad,'h'), freq=str(time_step)+'S').to_list()

#self.adc.to_csv('check_adc_po3a.csv',float_format='%.3f')
self.adc = interpolate_and_sample( diurnal_range, self.adc )
#self.adc.to_csv('check_adc_po3b.csv',float_format='%.3f')
self.obs = interpolate_and_sample( diurnal_range, self.obs )

# Truncate the time ranges to only retain full tidal periods. Update data inplace.
Expand All @@ -245,7 +247,8 @@ def _tidal_transform_data(self):
else:
utilities.log.debug('Tidal interpolation: Not enough data to perform a tidal interpolation. Skipping.')

self.obs.to_csv('check_po3.csv',float_format='%.3f')
#self.adc.to_csv('check_adc_po3.csv',float_format='%.3f')
#self.obs.to_csv('check_obs_po3.csv',float_format='%.3f')

# MIGHT need something special for Hurricanes ?
def _apply_time_bounds(self, time_range):
Expand Down Expand Up @@ -277,7 +280,8 @@ def _apply_time_bounds(self, time_range):
self._intersection_times() # Should not be needed here but just in case
utilities.log.debug('New adc time lo {}, New adc time hi {}'.format( min(self.adc.index).strftime(dformat), max(self.adc.index.strftime(dformat))))

self.obs.to_csv('check_po4.csv',float_format='%.3f')
#self.adc.to_csv('check_adc_po4.csv',float_format='%.3f')
#self.obs.to_csv('check_obs_po4.csv',float_format='%.3f')

# Statistical stuff
def _remove_station_outliers(self):
Expand Down
14 changes: 10 additions & 4 deletions test/test_adda_in_floodwater.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#!/bin/bash

fw_dir="/scratch/bblanton/ecflow_output/dev/ec95d_dev_da_idalia/"

#fw_dir="/scratch/bblanton/ecflow_output/dev/ec95d_dev_da_idalia/"
#fw_dir="/home/bblanton/storm_surge/ec95d_dev_da_idalia.test1"
fw_dir="/scratch/sbunya/ecflow_output/ncsc123_gfs_da/"

ADCIRC_DA_CONFIG_FILE="/home/bblanton/ecfc/da/data_assimilation.yaml"
#ADCIRC_DA_CONFIG_FILE="/home/sbunya/ecflow_configs/da/data_assimilation.yaml"

#...Retrieve config variables from yaml
venv=`cat $ADCIRC_DA_CONFIG_FILE | shyaml get-value venv`
Expand All @@ -16,10 +19,13 @@ echo "adda=$path2adda/adda.py"

RUN="conda run --no-capture-output"

current_time="--current_time advisory_017"
#current_time="--current_time advisory_017"
current_time="--current_time 20231118/hour_12"

grid="--gridname ec95d"
meteo="--met nhc"
#grid="--gridname ec95d"
grid="--gridname NCSC_SAB_V1.23"
#meteo="--met nhc"
meteo="--met gfs"
com="$RUN -n $venv $path2adda/adda.py $current_time $meteo --da_config_file $ADCIRC_DA_CONFIG_FILE $grid --fw_arch_dir $fw_dir"
echo $com
$com
Expand Down

0 comments on commit bec3a62

Please sign in to comment.