From e6a1b85b6b4d1172ac6a75939001a9d486f3dedb Mon Sep 17 00:00:00 2001 From: Neptune-Meister <140385333+JurgenZach-NOAA@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:00:49 -0700 Subject: [PATCH 01/29] Deleted file with obsolete routines (#807) --- src/troute-routing/troute/routing/utils.py | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 src/troute-routing/troute/routing/utils.py diff --git a/src/troute-routing/troute/routing/utils.py b/src/troute-routing/troute/routing/utils.py deleted file mode 100644 index 0c5a8e10b..000000000 --- a/src/troute-routing/troute/routing/utils.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas as pd -import numpy as np - - -def writetoFile(file, writeString): - file.write(writeString) - file.write("\n") - - -def constant_qlats(index_dataset, nsteps, qlat): - q = np.full((len(index_dataset.index), nsteps), qlat, dtype="float32") - ql = pd.DataFrame(q, index=index_dataset.index, columns=range(nsteps)) - return ql From 33017508ce2a70e4ce609ee6eb4e453cfbb07c9d Mon Sep 17 00:00:00 2001 From: karnesh Date: Tue, 5 Mar 2024 11:42:33 -0500 Subject: [PATCH 02/29] Added functionality to write flow, velocity and depth to parquet --- .../troute/config/output_parameters.py | 16 +- src/troute-nwm/src/nwm_routing/input.py | 19 ++ src/troute-nwm/src/nwm_routing/output.py | 213 ++++++++++++++---- test/LowerColorado_TX/test_AnA_V4_NHD.yaml | 5 +- 4 files changed, 200 insertions(+), 53 deletions(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index 61119c109..9c8ce421b 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -5,13 +5,16 @@ from typing_extensions import Annotated, Literal from .types import FilePath, DirectoryPath -streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl'] +streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl', '.parquet'] + class OutputParameters(BaseModel): chanobs_output: Optional["ChanobsOutput"] = None # NOTE: this appears to be optional. See nwm_routing/input.py ~:477 csv_output: Optional["CsvOutput"] = None # NOTE: this appears to be optional. See nwm_routing/input.py ~:496 + parquet_output: Optional["ParquetOutput"] = None + # NOTE: this appears to be optional. See nwm_routing/input.py ~:563 chrtout_output: Optional["ChrtoutOutput"] = None lite_restart: Optional["LiteRestart"] = None # NOTE: this appears to be optional. See nwm_routing/input.py ~:520 @@ -46,7 +49,13 @@ class CsvOutput(BaseModel): csv_output_segments: Optional[List[str]] = None -class ChrtoutOutput(BaseModel): +class ParquetOutput(BaseModel, extra='forbid'): + # NOTE: required if writing results to parquet + parquet_output_folder: Optional[DirectoryPath] = None + parquet_output_segments: Optional[List[str]] = None + + +class ChrtoutOutput(BaseModel, extra='forbid'): # NOTE: mandatory if writing results to CHRTOUT. wrf_hydro_channel_output_source_folder: Optional[DirectoryPath] = None @@ -79,6 +88,7 @@ class WrfHydroParityCheck(BaseModel): class ParityCheckCompareFileSet(BaseModel): validation_files: List[FilePath] + class StreamOutput(BaseModel): # NOTE: required if writing StreamOutput files stream_output_directory: Optional[DirectoryPath] = None @@ -93,7 +103,7 @@ def validate_stream_output_internal_frequency(cls, value, values): if values.get('stream_output_time') != -1 and value / 60 > values['stream_output_time']: raise ValueError("stream_output_internal_frequency should be less than or equal to stream_output_time in minutes.") return value - + OutputParameters.update_forward_refs() WrfHydroParityCheck.update_forward_refs() diff --git a/src/troute-nwm/src/nwm_routing/input.py b/src/troute-nwm/src/nwm_routing/input.py index 8a5f55f24..3e1ba4170 100644 --- a/src/troute-nwm/src/nwm_routing/input.py +++ b/src/troute-nwm/src/nwm_routing/input.py @@ -559,6 +559,25 @@ def check_inputs( else: LOG.debug('No csv output folder specified. Results will NOT be written to csv') + + if output_parameters.get('parquet_output', None): + + if output_parameters['parquet_output'].get('parquet_output_folder', None): + + _does_path_exist( + 'parquet_output_folder', + output_parameters['parquet_output']['parquet_output_folder'] + ) + + output_segs = output_parameters['parquet_output'].get('parquet_output_segments', None) + if not output_segs: + LOG.debug('No parquet output segments specified. Results for all domain segments will be written') + + else: + LOG.debug('No parquet output folder specified. Results will NOT be written in parquet format') + + else: + LOG.debug('No parquet output folder specified. Results will NOT be written in parquet format') if output_parameters.get('chrtout_output', None): diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 2404e7683..80a1a4e39 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -7,9 +7,9 @@ from build_tests import parity_check import logging - LOG = logging.getLogger('') + def _reindex_lake_to_link_id(target_df, crosswalk): ''' Utility function for replacing lake ID index values @@ -29,23 +29,24 @@ def _reindex_lake_to_link_id(target_df, crosswalk): # evaluate intersection of lake ids and target_df index values # i.e. what are the index positions of lake ids that need replacing? - lakeids = np.fromiter(crosswalk.keys(), dtype = int) + lakeids = np.fromiter(crosswalk.keys(), dtype=int) idxs = target_df.index.to_numpy() lake_index_intersect = np.intersect1d( - idxs, - lakeids, - return_indices = True + idxs, + lakeids, + return_indices=True ) # replace lake ids with link IDs in the target_df index array - linkids = np.fromiter(crosswalk.values(), dtype = int) + linkids = np.fromiter(crosswalk.values(), dtype=int) idxs[lake_index_intersect[1]] = linkids[lake_index_intersect[2]] # (re) set the target_df index - target_df.set_index(idxs, inplace = True) - + target_df.set_index(idxs, inplace=True) + return target_df +<<<<<<< HEAD def nwm_output_generator( run, results, @@ -66,7 +67,27 @@ def nwm_output_generator( link_lake_crosswalk = None, logFileName='NONE' ): +======= +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) +def nwm_output_generator( + run, + results, + supernetwork_parameters, + output_parameters, + parity_parameters, + restart_parameters, + parity_set, + qts_subdivisions, + return_courant, + cpu_pool, + waterbodies_df, + waterbody_types_df, + data_assimilation_parameters=False, + lastobs_df=None, + link_gage_df=None, + link_lake_crosswalk=None, +): dt = run.get("dt") nts = run.get("nts") t0 = run.get("t0") @@ -104,13 +125,15 @@ def nwm_output_generator( ) ################### Output Handling - + start_time = time.time() LOG.info(f"Handling output ...") csv_output = output_parameters.get("csv_output", None) csv_output_folder = None + parquet_output = output_parameters.get("parquet_output", None) + parquet_output_folder = None rsrto = output_parameters.get("hydro_rst_output", None) chrto = output_parameters.get("chrtout_output", None) test = output_parameters.get("test_output", None) @@ -125,8 +148,19 @@ def nwm_output_generator( ) csv_output_segments = csv_output.get("csv_output_segments", None) +<<<<<<< HEAD if csv_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output: +======= + if parquet_output: + parquet_output_folder = output_parameters["parquet_output"].get( + "parquet_output_folder", None + ) + parquet_output_segments = parquet_output.get("parquet_output_segments", None) + + if csv_output_folder or parquet_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output: + +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) start = time.time() qvd_columns = pd.MultiIndex.from_product( [range(nts), ["q", "v", "d"]] @@ -138,7 +172,6 @@ def nwm_output_generator( ) if wbdyo and not waterbodies_df.empty: - # create waterbody dataframe for output to netcdf file i_columns = pd.MultiIndex.from_product( [range(nts), ["i"]] @@ -163,20 +196,20 @@ def nwm_output_generator( timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0) ts_set = set(timestep_index[0].tolist()) flow_df_col_index = [i for i, e in enumerate(timestep) if e in ts_set] - flow_df = flow_df.iloc[:,flow_df_col_index] + flow_df = flow_df.iloc[:, flow_df_col_index] timestep, variable = zip(*wbdy.columns.tolist()) timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0) ts_set = set(timestep_index[0].tolist()) wbdy_col_index = [i for i, e in enumerate(timestep) if e in ts_set] - i_df = wbdy.iloc[:,wbdy_col_index] - q_df = flow_df.iloc[:,0::3] - d_df = flow_df.iloc[:,2::3] + i_df = wbdy.iloc[:, wbdy_col_index] + q_df = flow_df.iloc[:, 0::3] + d_df = flow_df.iloc[:, 2::3] # replace waterbody lake_ids with outlet link ids if (link_lake_crosswalk): flowveldepth = _reindex_lake_to_link_id(flowveldepth, link_lake_crosswalk) - + # todo: create a unit test by saving FVD array to disk and then checking that # it matches FVD array from parent branch or other configurations. # flowveldepth.to_pickle(output_parameters['test_output']) @@ -192,15 +225,14 @@ def nwm_output_generator( ], copy=False, ) - + # replace waterbody lake_ids with outlet link ids if link_lake_crosswalk: - # (re) set the flowveldepth index - courant.set_index(fvdidxs, inplace = True) - + courant.set_index(fvdidxs, inplace=True) + LOG.debug("Constructing the FVD DataFrame took %s seconds." % (time.time() - start)) - + if stream_output: stream_output_directory = stream_output['stream_output_directory'] stream_output_timediff = stream_output['stream_output_time'] @@ -209,6 +241,7 @@ def nwm_output_generator( nudge = np.concatenate([r[8] for r in results]) usgs_positions_id = np.concatenate([r[3][0] for r in results]) +<<<<<<< HEAD nhd_io.write_flowveldepth( Path(stream_output_directory), flowveldepth, @@ -234,11 +267,23 @@ def nwm_output_generator( preRunLog.write("Output of FVD data every "+str(fCalc)+" minutes\n") preRunLog.write("Writing "+str(nTimeBins)+" time bins for "+str(len(flowveldepth.index))+" segments per FVD output file\n") preRunLog.close() +======= + nhd_io.write_flowveldepth_netcdf(Path(stream_output_directory), + flowveldepth, + nudge, + usgs_positions_id, + t0, + int(stream_output_timediff), + stream_output_type, + stream_output_internal_frequency, + cpu_pool=cpu_pool) +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) if test: flowveldepth.to_pickle(Path(test)) - + if wbdyo and not waterbodies_df.empty: +<<<<<<< HEAD time_index, tmp_variable = map(list,zip(*i_df.columns.tolist())) if not duplicate_ids_df.empty: @@ -247,10 +292,15 @@ def nwm_output_generator( else: output_waterbodies_df = waterbodies_df output_waterbody_types_df = waterbody_types_df +======= + + time_index, tmp_variable = map(list, zip(*i_df.columns.tolist())) +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.info("- writing t-route flow results to LAKEOUT files") start = time.time() - for i in range(i_df.shape[1]): + for i in range(i_df.shape[1]): nhd_io.write_waterbody_netcdf( +<<<<<<< HEAD wbdyo, i_df.iloc[:,[i]], q_df.iloc[:,[i]], @@ -271,13 +321,27 @@ def nwm_output_generator( preRunLog.write("-----\n") preRunLog.close() +======= + wbdyo, + i_df.iloc[:, [i]], + q_df.iloc[:, [i]], + d_df.iloc[:, [i]], + waterbodies_df, + waterbody_types_df, + t0, + dt, + nts, + time_index[i], + ) + +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.debug("writing LAKEOUT files took a total time of %s seconds." % (time.time() - start)) - + if rsrto: LOG.info("- writing restart files") start = time.time() - + wrf_hydro_restart_dir = rsrto.get( "wrf_hydro_channel_restart_source_directory", None ) @@ -315,24 +379,25 @@ def nwm_output_generator( preRunLog.close() else: - LOG.critical('Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.') - + LOG.critical( + 'Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.') + else: - LOG.critical('wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.') - + LOG.critical( + 'wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.') + LOG.debug("writing restart files took %s seconds." % (time.time() - start)) if chrto: - + LOG.info("- writing t-route flow results to CHRTOUT files") start = time.time() - + chrtout_read_folder = chrto.get( "wrf_hydro_channel_output_source_folder", None ) - - if chrtout_read_folder: + if chrtout_read_folder: chrtout_files = sorted( Path(chrtout_read_folder) / f for f in run["qlat_files"] ) @@ -343,6 +408,7 @@ def nwm_output_generator( qts_subdivisions, cpu_pool, ) +<<<<<<< HEAD if (not logFileName == 'NONE'): with open(logFileName, 'a') as preRunLog: @@ -351,11 +417,13 @@ def nwm_output_generator( preRunLog.write("Output of FVD files into folder: "+str(chrtout_read_folder)+"\n") preRunLog.write("-----\n") preRunLog.close() +======= +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.debug("writing CHRTOUT files took a total time of %s seconds." % (time.time() - start)) - if csv_output_folder: - + if csv_output_folder: + LOG.info("- writing flow, velocity, and depth results to .csv") start = time.time() @@ -363,10 +431,10 @@ def nwm_output_generator( # TO DO: create more descriptive filenames if supernetwork_parameters.get("title_string", None): filename_fvd = ( - "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv" + "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv" ) filename_courant = ( - "courant_" + supernetwork_parameters["title_string"] + ".csv" + "courant_" + supernetwork_parameters["title_string"] + ".csv" ) else: run_time_stamp = datetime.now().isoformat() @@ -378,7 +446,7 @@ def nwm_output_generator( # no csv_output_segments are specified, then write results for all segments if not csv_output_segments: csv_output_segments = flowveldepth.index - + flowveldepth = flowveldepth.sort_index() flowveldepth.loc[csv_output_segments].to_csv(output_path.joinpath(filename_fvd)) @@ -389,12 +457,46 @@ def nwm_output_generator( LOG.debug("writing CSV file took %s seconds." % (time.time() - start)) # usgs_df_filtered = usgs_df[usgs_df.index.isin(csv_output_segments)] # usgs_df_filtered.to_csv(output_path.joinpath("usgs_df.csv")) - + + if parquet_output_folder: + + LOG.info("- writing flow, velocity, and depth results to .parquet") + start = time.time() + + # create filenames + # TO DO: create more descriptive filenames + if supernetwork_parameters.get("title_string", None): + filename_fvd = ( + "flowveldepth_" + supernetwork_parameters["title_string"] + ".parquet" + ) + filename_courant = ( + "courant_" + supernetwork_parameters["title_string"] + ".parquet" + ) + else: + run_time_stamp = datetime.now().isoformat() + filename_fvd = "flowveldepth_" + run_time_stamp + ".parquet" + filename_courant = "courant_" + run_time_stamp + ".parquet" + + output_path = Path(parquet_output_folder).resolve() + + # no csv_output_segments are specified, then write results for all segments + if not parquet_output_segments: + parquet_output_segments = flowveldepth.index + + flowveldepth = flowveldepth.sort_index() + flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd)) + + if return_courant: + courant = courant.sort_index() + courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant)) + + LOG.debug("writing parquet file took %s seconds." % (time.time() - start)) + if chano: LOG.info("- writing t-route flow results at gage locations to CHANOBS file") start = time.time() - + # replace waterbody lake_ids with outlet link ids if link_lake_crosswalk: link_gage_df = _reindex_lake_to_link_id(link_gage_df, link_lake_crosswalk) @@ -404,15 +506,16 @@ def nwm_output_generator( chano['chanobs_filepath'] = str(chano['chanobs_filepath']) nhd_io.write_chanobs( - Path(chano['chanobs_output_directory'] + chano['chanobs_filepath']), - flowveldepth, - link_gage_df, - t0, - dt, + Path(chano['chanobs_output_directory'] + chano['chanobs_filepath']), + flowveldepth, + link_gage_df, + t0, + dt, nts, # TODO allow user to pass a list of segments at which they would like to print results # rather than just printing at gages. ) +<<<<<<< HEAD if (not logFileName == 'NONE'): with open(logFileName, 'a') as preRunLog: @@ -423,8 +526,12 @@ def nwm_output_generator( preRunLog.close() LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start)) +======= +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) - if lastobso: + LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start)) + + if lastobso: # Write out LastObs as netcdf when using main_v04 or troute_model with HYfeature. # This is only needed if 1) streamflow nudging is ON and 2) a lastobs output # folder is provided by the user. @@ -437,8 +544,12 @@ def nwm_output_generator( nudging_true = streamflow_da.get('streamflow_nudging', None) +<<<<<<< HEAD if nudging_true and lastobs_output_folder and not lastobs_df.empty: +======= + if nudging_true and lastobs_output_folder: +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.info("- writing lastobs files") start = time.time() @@ -461,24 +572,28 @@ def nwm_output_generator( LOG.debug("writing lastobs files took %s seconds." % (time.time() - start)) +<<<<<<< HEAD # if 'flowveldepth' in locals(): # LOG.debug(flowveldepth) +======= + if 'flowveldepth' in locals(): + LOG.debug(flowveldepth) +>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.debug("output complete in %s seconds." % (time.time() - start_time)) ################### Parity Check if parity_set: - LOG.info( "conducting parity check, comparing WRF Hydro results against t-route results" ) - + start_time = time.time() - + parity_check( parity_set, results, ) - LOG.debug("parity check complete in %s seconds." % (time.time() - start_time)) \ No newline at end of file + LOG.debug("parity check complete in %s seconds." % (time.time() - start_time)) diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml index 0d09cbc82..f42cf940c 100644 --- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml +++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml @@ -105,7 +105,7 @@ compute_parameters: reservoir_rfc_forecasts_lookback_hours : 48 #-------------------------------------------------------------------------------- -# output_parameters: +output_parameters: # #---------- # test_output: output/lcr_flowveldepth.pkl # lite_restart: @@ -124,5 +124,8 @@ compute_parameters: # stream_output_time: 1 #[hr] # stream_output_type: '.nc' #please select only between netcdf '.nc' or '.csv' or '.pkl' # stream_output_internal_frequency: 30 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60 + parquet_output: + #--------- + parquet_output_folder: output/ \ No newline at end of file From 4e7cb462ca06b1d462fce0f0ce9422dd8bb55180 Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 25 Mar 2024 20:48:26 -0400 Subject: [PATCH 03/29] Modified parquet output format to match TEEHR input --- .../troute/config/output_parameters.py | 1 + src/troute-nwm/src/nwm_routing/output.py | 75 ++++++++++++++++++- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index 9c8ce421b..ff4722d1a 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -53,6 +53,7 @@ class ParquetOutput(BaseModel, extra='forbid'): # NOTE: required if writing results to parquet parquet_output_folder: Optional[DirectoryPath] = None parquet_output_segments: Optional[List[str]] = None + configuration: Optional[str] = None class ChrtoutOutput(BaseModel, extra='forbid'): diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 80a1a4e39..11106a870 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -70,6 +70,48 @@ def nwm_output_generator( ======= >>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) +def _parquet_output_format_converter(df, start_datetime, dt, configuration): + ''' + Utility function for convert flowveldepth dataframe + to a timeseries and to match parquet input format + of TEEHR + + Arguments: + ---------- + - df (DataFrame): Data frame to be converted + - start_datetime: Date time from restart parameters + - dt: Time step + - configuration: configuration (for instance- short_range, medium_range) + + Returns: + -------- + - timeseries_df (DataFrame): Converted timeseries data frame + ''' + + df.index.name = 'location_id' + df.reset_index(inplace=True) + timeseries_df = df.melt(id_vars=['location_id'], var_name='var') + timeseries_df['var'] = timeseries_df['var'].astype('string') + timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True) + timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "") + timeseries_df['timestep'] = timeseries_df['timestep'].astype('int') + timeseries_df['value_time'] = (start_datetime + pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s')) + variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"} + timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map) + timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True) + timeseries_df['configuration'] = configuration + variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"} + timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map) + timeseries_df['reference_time'] = start_datetime.date() + timeseries_df['location_id'] = timeseries_df['location_id'].astype('string') + timeseries_df['location_id'] = 'nex-' + timeseries_df['location_id'] + timeseries_df['value'] = timeseries_df['value'].astype('double') + timeseries_df['reference_time'] = timeseries_df['reference_time'].astype('datetime64[us]') + timeseries_df['value_time'] = timeseries_df['value_time'].astype('datetime64[us]') + + return timeseries_df + + def nwm_output_generator( run, results, @@ -484,11 +526,40 @@ def nwm_output_generator( parquet_output_segments = flowveldepth.index flowveldepth = flowveldepth.sort_index() - flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd)) + timeseries_df = _parquet_output_format_converter(flowveldepth, restart_parameters.get("start_datetime"), dt, + output_parameters["parquet_output"].get("configuration")) + """ + flowveldepth.index.name = 'Location_ID' + flowveldepth.reset_index(inplace=True) + timeseries_df = flowveldepth.melt(id_vars=['Location_ID'], var_name='var') + timeseries_df['var'] = timeseries_df['var'].astype('string') + timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True) + # timeseries_df[['timestep', 'variable_name']]= pd.DataFrame([x.strip("()").split(',') for x in list( + # timeseries_df['variable'])]) + timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "") + timeseries_df['timestep'] = timeseries_df['timestep'].astype('int') + timeseries_df['value_time'] = (restart_parameters.get("start_datetime") + + pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s')) + variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"} + timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map) + timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True) + timeseries_df['configuration'] = output_parameters["parquet_output"].get("configuration") + variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"} + timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map) + timeseries_df['reference_time'] = restart_parameters.get("start_datetime").date() + """ + parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments] + timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet( + output_path.joinpath(filename_fvd), allow_truncated_timestamps=True) + # flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd)) if return_courant: courant = courant.sort_index() - courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant)) + timeseries_courant = _parquet_output_format_converter(courant, restart_parameters.get("start_datetime"), dt, + output_parameters["parquet_output"].get("configuration")) + timeseries_courant.loc[timeseries_courant['location_id'].isin(parquet_output_segments_str)].to_parquet( + output_path.joinpath(filename_courant), allow_truncated_timestamps=True) + #courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant)) LOG.debug("writing parquet file took %s seconds." % (time.time() - start)) From 4b2bb010e474f1c8b2ec47ebfd2407813dd5a579 Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 1 Apr 2024 10:35:51 -0400 Subject: [PATCH 04/29] cleaned the code --- src/troute-nwm/src/nwm_routing/output.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 11106a870..c20281d67 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -528,26 +528,7 @@ def nwm_output_generator( flowveldepth = flowveldepth.sort_index() timeseries_df = _parquet_output_format_converter(flowveldepth, restart_parameters.get("start_datetime"), dt, output_parameters["parquet_output"].get("configuration")) - """ - flowveldepth.index.name = 'Location_ID' - flowveldepth.reset_index(inplace=True) - timeseries_df = flowveldepth.melt(id_vars=['Location_ID'], var_name='var') - timeseries_df['var'] = timeseries_df['var'].astype('string') - timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True) - # timeseries_df[['timestep', 'variable_name']]= pd.DataFrame([x.strip("()").split(',') for x in list( - # timeseries_df['variable'])]) - timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "") - timeseries_df['timestep'] = timeseries_df['timestep'].astype('int') - timeseries_df['value_time'] = (restart_parameters.get("start_datetime") + - pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s')) - variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"} - timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map) - timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True) - timeseries_df['configuration'] = output_parameters["parquet_output"].get("configuration") - variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"} - timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map) - timeseries_df['reference_time'] = restart_parameters.get("start_datetime").date() - """ + parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments] timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet( output_path.joinpath(filename_fvd), allow_truncated_timestamps=True) From 1bb44f50fc5fa246a2b8dc38a9c9dea85f543142 Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 1 Apr 2024 10:37:24 -0400 Subject: [PATCH 05/29] cleaned the code --- src/troute-nwm/src/nwm_routing/output.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index c20281d67..d19d5c7dc 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -521,7 +521,7 @@ def nwm_output_generator( output_path = Path(parquet_output_folder).resolve() - # no csv_output_segments are specified, then write results for all segments + # no parquet_output_segments are specified, then write results for all segments if not parquet_output_segments: parquet_output_segments = flowveldepth.index @@ -532,7 +532,6 @@ def nwm_output_generator( parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments] timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet( output_path.joinpath(filename_fvd), allow_truncated_timestamps=True) - # flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd)) if return_courant: courant = courant.sort_index() @@ -540,7 +539,6 @@ def nwm_output_generator( output_parameters["parquet_output"].get("configuration")) timeseries_courant.loc[timeseries_courant['location_id'].isin(parquet_output_segments_str)].to_parquet( output_path.joinpath(filename_courant), allow_truncated_timestamps=True) - #courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant)) LOG.debug("writing parquet file took %s seconds." % (time.time() - start)) From 16362ec7e64c300851d8f68a8acfddc253f3a128 Mon Sep 17 00:00:00 2001 From: karnesh Date: Tue, 23 Apr 2024 11:38:27 -0400 Subject: [PATCH 06/29] sample yaml file changes for parquet output --- test/LowerColorado_TX/test_AnA_V4_NHD.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml index f42cf940c..5ab341fe6 100644 --- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml +++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml @@ -127,5 +127,6 @@ output_parameters: parquet_output: #--------- parquet_output_folder: output/ + configuration: short_range \ No newline at end of file From 29a1bc6ade2f4e4134bf1e3da1782afdfeeec963 Mon Sep 17 00:00:00 2001 From: karnesh Date: Tue, 7 May 2024 13:38:16 -0400 Subject: [PATCH 07/29] added functionality to include user defined prefix for IDs --- .../troute/config/output_parameters.py | 1 + src/troute-nwm/src/nwm_routing/output.py | 95 +++---------------- test/LowerColorado_TX/test_AnA_V4_NHD.yaml | 1 + 3 files changed, 13 insertions(+), 84 deletions(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index ff4722d1a..eb19fea3d 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -54,6 +54,7 @@ class ParquetOutput(BaseModel, extra='forbid'): parquet_output_folder: Optional[DirectoryPath] = None parquet_output_segments: Optional[List[str]] = None configuration: Optional[str] = None + prefix_ids: Optional[str] = None class ChrtoutOutput(BaseModel, extra='forbid'): diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index d19d5c7dc..f54ad8117 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -46,31 +46,8 @@ def _reindex_lake_to_link_id(target_df, crosswalk): return target_df -<<<<<<< HEAD -def nwm_output_generator( - run, - results, - supernetwork_parameters, - output_parameters, - parity_parameters, - restart_parameters, - parity_set, - qts_subdivisions, - return_courant, - cpu_pool, - waterbodies_df, - waterbody_types_df, - duplicate_ids_df, - data_assimilation_parameters=False, - lastobs_df = None, - link_gage_df = None, - link_lake_crosswalk = None, - logFileName='NONE' -): -======= ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) -def _parquet_output_format_converter(df, start_datetime, dt, configuration): +def _parquet_output_format_converter(df, start_datetime, dt, configuration, prefix_ids): ''' Utility function for convert flowveldepth dataframe to a timeseries and to match parquet input format @@ -104,7 +81,7 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration): timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map) timeseries_df['reference_time'] = start_datetime.date() timeseries_df['location_id'] = timeseries_df['location_id'].astype('string') - timeseries_df['location_id'] = 'nex-' + timeseries_df['location_id'] + timeseries_df['location_id'] = prefix_ids + '-' + timeseries_df['location_id'] timeseries_df['value'] = timeseries_df['value'].astype('double') timeseries_df['reference_time'] = timeseries_df['reference_time'].astype('datetime64[us]') timeseries_df['value_time'] = timeseries_df['value_time'].astype('datetime64[us]') @@ -129,6 +106,7 @@ def nwm_output_generator( lastobs_df=None, link_gage_df=None, link_lake_crosswalk=None, + logFileName='NONE' ): dt = run.get("dt") nts = run.get("nts") @@ -190,10 +168,6 @@ def nwm_output_generator( ) csv_output_segments = csv_output.get("csv_output_segments", None) -<<<<<<< HEAD - if csv_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output: - -======= if parquet_output: parquet_output_folder = output_parameters["parquet_output"].get( "parquet_output_folder", None @@ -202,7 +176,6 @@ def nwm_output_generator( if csv_output_folder or parquet_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output: ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) start = time.time() qvd_columns = pd.MultiIndex.from_product( [range(nts), ["q", "v", "d"]] @@ -283,7 +256,7 @@ def nwm_output_generator( nudge = np.concatenate([r[8] for r in results]) usgs_positions_id = np.concatenate([r[3][0] for r in results]) -<<<<<<< HEAD + nhd_io.write_flowveldepth( Path(stream_output_directory), flowveldepth, @@ -309,23 +282,11 @@ def nwm_output_generator( preRunLog.write("Output of FVD data every "+str(fCalc)+" minutes\n") preRunLog.write("Writing "+str(nTimeBins)+" time bins for "+str(len(flowveldepth.index))+" segments per FVD output file\n") preRunLog.close() -======= - nhd_io.write_flowveldepth_netcdf(Path(stream_output_directory), - flowveldepth, - nudge, - usgs_positions_id, - t0, - int(stream_output_timediff), - stream_output_type, - stream_output_internal_frequency, - cpu_pool=cpu_pool) ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) - + if test: flowveldepth.to_pickle(Path(test)) if wbdyo and not waterbodies_df.empty: -<<<<<<< HEAD time_index, tmp_variable = map(list,zip(*i_df.columns.tolist())) if not duplicate_ids_df.empty: @@ -334,15 +295,11 @@ def nwm_output_generator( else: output_waterbodies_df = waterbodies_df output_waterbody_types_df = waterbody_types_df -======= - time_index, tmp_variable = map(list, zip(*i_df.columns.tolist())) ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.info("- writing t-route flow results to LAKEOUT files") start = time.time() for i in range(i_df.shape[1]): nhd_io.write_waterbody_netcdf( -<<<<<<< HEAD wbdyo, i_df.iloc[:,[i]], q_df.iloc[:,[i]], @@ -362,21 +319,7 @@ def nwm_output_generator( preRunLog.write("Output of waterbody files into folder: "+str(wbdyo)+"\n") preRunLog.write("-----\n") preRunLog.close() - -======= - wbdyo, - i_df.iloc[:, [i]], - q_df.iloc[:, [i]], - d_df.iloc[:, [i]], - waterbodies_df, - waterbody_types_df, - t0, - dt, - nts, - time_index[i], - ) ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.debug("writing LAKEOUT files took a total time of %s seconds." % (time.time() - start)) if rsrto: @@ -450,7 +393,6 @@ def nwm_output_generator( qts_subdivisions, cpu_pool, ) -<<<<<<< HEAD if (not logFileName == 'NONE'): with open(logFileName, 'a') as preRunLog: @@ -459,8 +401,6 @@ def nwm_output_generator( preRunLog.write("Output of FVD files into folder: "+str(chrtout_read_folder)+"\n") preRunLog.write("-----\n") preRunLog.close() -======= ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.debug("writing CHRTOUT files took a total time of %s seconds." % (time.time() - start)) @@ -526,17 +466,19 @@ def nwm_output_generator( parquet_output_segments = flowveldepth.index flowveldepth = flowveldepth.sort_index() + configuration = output_parameters["parquet_output"].get("configuration") + prefix_ids = output_parameters["parquet_output"].get("prefix_ids") timeseries_df = _parquet_output_format_converter(flowveldepth, restart_parameters.get("start_datetime"), dt, - output_parameters["parquet_output"].get("configuration")) + configuration, prefix_ids) - parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments] + parquet_output_segments_str = [prefix_ids + '-' + str(segment) for segment in parquet_output_segments] timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet( output_path.joinpath(filename_fvd), allow_truncated_timestamps=True) if return_courant: courant = courant.sort_index() timeseries_courant = _parquet_output_format_converter(courant, restart_parameters.get("start_datetime"), dt, - output_parameters["parquet_output"].get("configuration")) + configuration, prefix_ids) timeseries_courant.loc[timeseries_courant['location_id'].isin(parquet_output_segments_str)].to_parquet( output_path.joinpath(filename_courant), allow_truncated_timestamps=True) @@ -565,8 +507,7 @@ def nwm_output_generator( # TODO allow user to pass a list of segments at which they would like to print results # rather than just printing at gages. ) -<<<<<<< HEAD - + if (not logFileName == 'NONE'): with open(logFileName, 'a') as preRunLog: preRunLog.write("\n") @@ -576,10 +517,6 @@ def nwm_output_generator( preRunLog.close() LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start)) -======= ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) - - LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start)) if lastobso: # Write out LastObs as netcdf when using main_v04 or troute_model with HYfeature. @@ -594,12 +531,8 @@ def nwm_output_generator( nudging_true = streamflow_da.get('streamflow_nudging', None) -<<<<<<< HEAD if nudging_true and lastobs_output_folder and not lastobs_df.empty: -======= - if nudging_true and lastobs_output_folder: ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.info("- writing lastobs files") start = time.time() @@ -622,14 +555,8 @@ def nwm_output_generator( LOG.debug("writing lastobs files took %s seconds." % (time.time() - start)) -<<<<<<< HEAD - - # if 'flowveldepth' in locals(): - # LOG.debug(flowveldepth) -======= if 'flowveldepth' in locals(): LOG.debug(flowveldepth) ->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet) LOG.debug("output complete in %s seconds." % (time.time() - start_time)) diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml index 5ab341fe6..b8a906517 100644 --- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml +++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml @@ -128,5 +128,6 @@ output_parameters: #--------- parquet_output_folder: output/ configuration: short_range + prefix_ids: nex \ No newline at end of file From d8310243c0e48817a16d7fd47e977043743d0759 Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 20 May 2024 10:23:20 -0400 Subject: [PATCH 08/29] reverted back the formatting changes --- src/troute-nwm/src/nwm_routing/output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index f54ad8117..cd50ed17c 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -34,7 +34,7 @@ def _reindex_lake_to_link_id(target_df, crosswalk): lake_index_intersect = np.intersect1d( idxs, lakeids, - return_indices=True + return_indices = True ) # replace lake ids with link IDs in the target_df index array @@ -300,7 +300,7 @@ def nwm_output_generator( start = time.time() for i in range(i_df.shape[1]): nhd_io.write_waterbody_netcdf( - wbdyo, + wbdyo, i_df.iloc[:,[i]], q_df.iloc[:,[i]], d_df.iloc[:,[i]], From 4e42b63d2e8486f4e75f4e5c3b4150d2d099ab17 Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 20 May 2024 10:31:55 -0400 Subject: [PATCH 09/29] reverted back the formatting changes --- src/troute-nwm/src/nwm_routing/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index cd50ed17c..1ab22d2b0 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -572,5 +572,5 @@ def nwm_output_generator( parity_check( parity_set, results, ) - + LOG.debug("parity check complete in %s seconds." % (time.time() - start_time)) From 75d798d7946c557bc9d543e69e4131dc8a0beee7 Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 20 May 2024 10:34:52 -0400 Subject: [PATCH 10/29] reverted back the formatting changes --- src/troute-config/troute/config/output_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index eb19fea3d..f8710bcf5 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -105,7 +105,7 @@ def validate_stream_output_internal_frequency(cls, value, values): if values.get('stream_output_time') != -1 and value / 60 > values['stream_output_time']: raise ValueError("stream_output_internal_frequency should be less than or equal to stream_output_time in minutes.") return value - + OutputParameters.update_forward_refs() WrfHydroParityCheck.update_forward_refs() From de4364035efc4962ec34592b4482cbc9cb5ca6bc Mon Sep 17 00:00:00 2001 From: karnesh Date: Mon, 20 May 2024 10:36:17 -0400 Subject: [PATCH 11/29] reverted back the formatting changes --- src/troute-config/troute/config/output_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index f8710bcf5..5b91c6116 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -105,7 +105,7 @@ def validate_stream_output_internal_frequency(cls, value, values): if values.get('stream_output_time') != -1 and value / 60 > values['stream_output_time']: raise ValueError("stream_output_internal_frequency should be less than or equal to stream_output_time in minutes.") return value - + OutputParameters.update_forward_refs() WrfHydroParityCheck.update_forward_refs() From c959cb37b7d3f37152fd2db1cb59981c504f64d1 Mon Sep 17 00:00:00 2001 From: karnesh Date: Fri, 26 Jul 2024 13:12:54 -0400 Subject: [PATCH 12/29] removed parquet from streamOutput_allowedTypes --- src/troute-config/troute/config/output_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index 5b91c6116..0b5919dfb 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -5,7 +5,7 @@ from typing_extensions import Annotated, Literal from .types import FilePath, DirectoryPath -streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl', '.parquet'] +streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl'] class OutputParameters(BaseModel): From 0a5046c99e6b6b3583227c25e4fc777975d625af Mon Sep 17 00:00:00 2001 From: karnesh Date: Fri, 26 Jul 2024 14:24:35 -0400 Subject: [PATCH 13/29] added missing parameter in nwm_output_generator function and fixed formatting --- .../troute/config/output_parameters.py | 4 +- src/troute-nwm/src/nwm_routing/output.py | 94 +++++++++---------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index 0b5919dfb..b367582d2 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -49,7 +49,7 @@ class CsvOutput(BaseModel): csv_output_segments: Optional[List[str]] = None -class ParquetOutput(BaseModel, extra='forbid'): +class ParquetOutput(BaseModel): # NOTE: required if writing results to parquet parquet_output_folder: Optional[DirectoryPath] = None parquet_output_segments: Optional[List[str]] = None @@ -57,7 +57,7 @@ class ParquetOutput(BaseModel, extra='forbid'): prefix_ids: Optional[str] = None -class ChrtoutOutput(BaseModel, extra='forbid'): +class ChrtoutOutput(BaseModel): # NOTE: mandatory if writing results to CHRTOUT. wrf_hydro_channel_output_source_folder: Optional[DirectoryPath] = None diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 1ab22d2b0..08bef8c78 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -7,8 +7,8 @@ from build_tests import parity_check import logging -LOG = logging.getLogger('') +LOG = logging.getLogger('') def _reindex_lake_to_link_id(target_df, crosswalk): ''' @@ -29,20 +29,20 @@ def _reindex_lake_to_link_id(target_df, crosswalk): # evaluate intersection of lake ids and target_df index values # i.e. what are the index positions of lake ids that need replacing? - lakeids = np.fromiter(crosswalk.keys(), dtype=int) + lakeids = np.fromiter(crosswalk.keys(), dtype = int) idxs = target_df.index.to_numpy() lake_index_intersect = np.intersect1d( - idxs, - lakeids, + idxs, + lakeids, return_indices = True ) # replace lake ids with link IDs in the target_df index array - linkids = np.fromiter(crosswalk.values(), dtype=int) + linkids = np.fromiter(crosswalk.values(), dtype = int) idxs[lake_index_intersect[1]] = linkids[lake_index_intersect[2]] # (re) set the target_df index - target_df.set_index(idxs, inplace=True) + target_df.set_index(idxs, inplace = True) return target_df @@ -102,6 +102,7 @@ def nwm_output_generator( cpu_pool, waterbodies_df, waterbody_types_df, + duplicate_ids_df, data_assimilation_parameters=False, lastobs_df=None, link_gage_df=None, @@ -145,7 +146,7 @@ def nwm_output_generator( ) ################### Output Handling - + start_time = time.time() LOG.info(f"Handling output ...") @@ -187,6 +188,7 @@ def nwm_output_generator( ) if wbdyo and not waterbodies_df.empty: + # create waterbody dataframe for output to netcdf file i_columns = pd.MultiIndex.from_product( [range(nts), ["i"]] @@ -211,20 +213,20 @@ def nwm_output_generator( timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0) ts_set = set(timestep_index[0].tolist()) flow_df_col_index = [i for i, e in enumerate(timestep) if e in ts_set] - flow_df = flow_df.iloc[:, flow_df_col_index] + flow_df = flow_df.iloc[:,flow_df_col_index] timestep, variable = zip(*wbdy.columns.tolist()) timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0) ts_set = set(timestep_index[0].tolist()) wbdy_col_index = [i for i, e in enumerate(timestep) if e in ts_set] - i_df = wbdy.iloc[:, wbdy_col_index] - q_df = flow_df.iloc[:, 0::3] - d_df = flow_df.iloc[:, 2::3] + i_df = wbdy.iloc[:,wbdy_col_index] + q_df = flow_df.iloc[:,0::3] + d_df = flow_df.iloc[:,2::3] # replace waterbody lake_ids with outlet link ids if (link_lake_crosswalk): flowveldepth = _reindex_lake_to_link_id(flowveldepth, link_lake_crosswalk) - + # todo: create a unit test by saving FVD array to disk and then checking that # it matches FVD array from parent branch or other configurations. # flowveldepth.to_pickle(output_parameters['test_output']) @@ -240,14 +242,14 @@ def nwm_output_generator( ], copy=False, ) - + # replace waterbody lake_ids with outlet link ids if link_lake_crosswalk: # (re) set the flowveldepth index - courant.set_index(fvdidxs, inplace=True) - + courant.set_index(fvdidxs, inplace = True) + LOG.debug("Constructing the FVD DataFrame took %s seconds." % (time.time() - start)) - + if stream_output: stream_output_directory = stream_output['stream_output_directory'] stream_output_timediff = stream_output['stream_output_time'] @@ -256,7 +258,6 @@ def nwm_output_generator( nudge = np.concatenate([r[8] for r in results]) usgs_positions_id = np.concatenate([r[3][0] for r in results]) - nhd_io.write_flowveldepth( Path(stream_output_directory), flowveldepth, @@ -282,10 +283,10 @@ def nwm_output_generator( preRunLog.write("Output of FVD data every "+str(fCalc)+" minutes\n") preRunLog.write("Writing "+str(nTimeBins)+" time bins for "+str(len(flowveldepth.index))+" segments per FVD output file\n") preRunLog.close() - + if test: flowveldepth.to_pickle(Path(test)) - + if wbdyo and not waterbodies_df.empty: time_index, tmp_variable = map(list,zip(*i_df.columns.tolist())) @@ -295,12 +296,11 @@ def nwm_output_generator( else: output_waterbodies_df = waterbodies_df output_waterbody_types_df = waterbody_types_df - LOG.info("- writing t-route flow results to LAKEOUT files") start = time.time() - for i in range(i_df.shape[1]): + for i in range(i_df.shape[1]): nhd_io.write_waterbody_netcdf( - wbdyo, + wbdyo, i_df.iloc[:,[i]], q_df.iloc[:,[i]], d_df.iloc[:,[i]], @@ -319,14 +319,14 @@ def nwm_output_generator( preRunLog.write("Output of waterbody files into folder: "+str(wbdyo)+"\n") preRunLog.write("-----\n") preRunLog.close() - + LOG.debug("writing LAKEOUT files took a total time of %s seconds." % (time.time() - start)) - + if rsrto: LOG.info("- writing restart files") start = time.time() - + wrf_hydro_restart_dir = rsrto.get( "wrf_hydro_channel_restart_source_directory", None ) @@ -364,12 +364,10 @@ def nwm_output_generator( preRunLog.close() else: - LOG.critical( - 'Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.') + LOG.critical('Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.') else: - LOG.critical( - 'wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.') + LOG.critical('wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.') LOG.debug("writing restart files took %s seconds." % (time.time() - start)) @@ -377,12 +375,13 @@ def nwm_output_generator( LOG.info("- writing t-route flow results to CHRTOUT files") start = time.time() - + chrtout_read_folder = chrto.get( "wrf_hydro_channel_output_source_folder", None ) if chrtout_read_folder: + chrtout_files = sorted( Path(chrtout_read_folder) / f for f in run["qlat_files"] ) @@ -404,8 +403,8 @@ def nwm_output_generator( LOG.debug("writing CHRTOUT files took a total time of %s seconds." % (time.time() - start)) - if csv_output_folder: - + if csv_output_folder: + LOG.info("- writing flow, velocity, and depth results to .csv") start = time.time() @@ -413,10 +412,10 @@ def nwm_output_generator( # TO DO: create more descriptive filenames if supernetwork_parameters.get("title_string", None): filename_fvd = ( - "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv" + "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv" ) filename_courant = ( - "courant_" + supernetwork_parameters["title_string"] + ".csv" + "courant_" + supernetwork_parameters["title_string"] + ".csv" ) else: run_time_stamp = datetime.now().isoformat() @@ -428,7 +427,7 @@ def nwm_output_generator( # no csv_output_segments are specified, then write results for all segments if not csv_output_segments: csv_output_segments = flowveldepth.index - + flowveldepth = flowveldepth.sort_index() flowveldepth.loc[csv_output_segments].to_csv(output_path.joinpath(filename_fvd)) @@ -488,7 +487,7 @@ def nwm_output_generator( LOG.info("- writing t-route flow results at gage locations to CHANOBS file") start = time.time() - + # replace waterbody lake_ids with outlet link ids if link_lake_crosswalk: link_gage_df = _reindex_lake_to_link_id(link_gage_df, link_lake_crosswalk) @@ -499,15 +498,15 @@ def nwm_output_generator( nhd_io.write_chanobs( Path(chano['chanobs_output_directory'] + chano['chanobs_filepath']), - flowveldepth, - link_gage_df, - t0, - dt, + flowveldepth, + link_gage_df, + t0, + dt, nts, # TODO allow user to pass a list of segments at which they would like to print results # rather than just printing at gages. ) - + if (not logFileName == 'NONE'): with open(logFileName, 'a') as preRunLog: preRunLog.write("\n") @@ -518,7 +517,7 @@ def nwm_output_generator( LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start)) - if lastobso: + if lastobso: # Write out LastObs as netcdf when using main_v04 or troute_model with HYfeature. # This is only needed if 1) streamflow nudging is ON and 2) a lastobs output # folder is provided by the user. @@ -555,22 +554,23 @@ def nwm_output_generator( LOG.debug("writing lastobs files took %s seconds." % (time.time() - start)) - if 'flowveldepth' in locals(): - LOG.debug(flowveldepth) + # if 'flowveldepth' in locals(): + # LOG.debug(flowveldepth) LOG.debug("output complete in %s seconds." % (time.time() - start_time)) ################### Parity Check if parity_set: + LOG.info( "conducting parity check, comparing WRF Hydro results against t-route results" ) - + start_time = time.time() - + parity_check( parity_set, results, ) - + LOG.debug("parity check complete in %s seconds." % (time.time() - start_time)) From 11556c0fcde67476bb58a6313df468a82dcedae8 Mon Sep 17 00:00:00 2001 From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:37:26 -0400 Subject: [PATCH 14/29] mask output (#806) * mask_output * deleting pdb * error on NHD * change in yaml file * handling some edge cases * add 9999 to record all nex * delete extra pdb * update to run v3 * adding 9999 for wb * speeding up the process --- .../troute/config/output_parameters.py | 1 + src/troute-network/troute/AbstractNetwork.py | 10 +- .../troute/HYFeaturesNetwork.py | 12 ++ src/troute-network/troute/NHDNetwork.py | 3 +- src/troute-network/troute/nhd_io.py | 113 +++++++++++++++++- src/troute-nwm/src/nwm_routing/__main__.py | 16 ++- src/troute-nwm/src/nwm_routing/output.py | 51 ++++---- .../domain/mask_output.yaml | 25 ++++ .../test_AnA_V4_HYFeature.yaml | 1 + 9 files changed, 203 insertions(+), 29 deletions(-) create mode 100644 test/LowerColorado_TX_v4/domain/mask_output.yaml diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index b367582d2..983c1d9fb 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -94,6 +94,7 @@ class ParityCheckCompareFileSet(BaseModel): class StreamOutput(BaseModel): # NOTE: required if writing StreamOutput files stream_output_directory: Optional[DirectoryPath] = None + mask_output: Optional[FilePath] = None stream_output_time: int = 1 stream_output_type:streamOutput_allowedTypes = ".nc" stream_output_internal_frequency: Annotated[int, Field(strict=True, ge=5)] = 5 diff --git a/src/troute-network/troute/AbstractNetwork.py b/src/troute-network/troute/AbstractNetwork.py index 344bd9919..5bbdffa4b 100644 --- a/src/troute-network/troute/AbstractNetwork.py +++ b/src/troute-network/troute/AbstractNetwork.py @@ -34,7 +34,7 @@ class AbstractNetwork(ABC): "supernetwork_parameters", "waterbody_parameters","data_assimilation_parameters", "restart_parameters", "compute_parameters", "forcing_parameters", "hybrid_parameters", "preprocessing_parameters", "output_parameters", - "verbose", "showtiming", "break_points", "_routing"] + "verbose", "showtiming", "break_points", "_routing", "_nexus_dict", "_poi_nex_dict"] def __init__(self, from_files=True, value_dict={}): @@ -371,6 +371,14 @@ def gages(self): @property def dataframe(self): return self._dataframe + + @property + def nexus_dict(self): + return self._nexus_dict + + @property + def poi_nex_dict(self): + return self._poi_nex_dict @property def terminal_codes(self): diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py index b476df709..f5eb91bc7 100644 --- a/src/troute-network/troute/HYFeaturesNetwork.py +++ b/src/troute-network/troute/HYFeaturesNetwork.py @@ -282,6 +282,8 @@ def __init__(self, # Preprocess network objects self.preprocess_network(flowpaths, nexus) + + self.crosswalk_nex_flowpath_poi(flowpaths, nexus) # Preprocess waterbody objects self.preprocess_waterbodies(lakes, nexus) @@ -415,6 +417,16 @@ def preprocess_network(self, flowpaths, nexus): # to the model engine/coastal models self._nexus_latlon = nexus + def crosswalk_nex_flowpath_poi(self, flowpaths, nexus): + + mask_flowpaths = flowpaths['toid'].str.startswith(('nex-', 'tnex-')) + filtered_flowpaths = flowpaths[mask_flowpaths] + self._nexus_dict = filtered_flowpaths.groupby('toid')['id'].apply(list).to_dict() ##{id: toid} + if 'poi_id' in nexus.columns: + self._poi_nex_dict = nexus.groupby('poi_id')['id'].apply(list).to_dict() + else: + self._poi_nex_dict = None + def preprocess_waterbodies(self, lakes, nexus): # If waterbodies are being simulated, create waterbody dataframes and dictionaries if not lakes.empty: diff --git a/src/troute-network/troute/NHDNetwork.py b/src/troute-network/troute/NHDNetwork.py index 115280b2c..ee4afcf52 100644 --- a/src/troute-network/troute/NHDNetwork.py +++ b/src/troute-network/troute/NHDNetwork.py @@ -45,7 +45,8 @@ def __init__( self.output_parameters = output_parameters self.verbose = verbose self.showtiming = showtiming - + self._poi_nex_dict = None + self._nexus_dict = None if self.verbose: print("creating supernetwork connections set") if self.showtiming: diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py index 645af585f..de50b0847 100644 --- a/src/troute-network/troute/nhd_io.py +++ b/src/troute-network/troute/nhd_io.py @@ -2007,6 +2007,8 @@ def write_flowveldepth_netcdf(stream_output_directory, file_name, # ============ DIMENSIONS =================== _ = ncfile.createDimension('feature_id', len(flow)) _ = ncfile.createDimension('time', len(timestamps)) + max_str_len = max(len(str(x)) for x in flow.index.get_level_values('Type')) + _ = ncfile.createDimension('type_strlen', max_str_len) #_ = ncfile.createDimension('gage', gage) #_ = ncfile.createDimension('nudge_timestep', nudge_timesteps) # Add dimension for nudge time steps @@ -2048,12 +2050,24 @@ def write_flowveldepth_netcdf(stream_output_directory, file_name, datatype = 'int64', dimensions = ("feature_id",), ) - FEATURE_ID[:] = flow.index + FEATURE_ID[:] = flow.index.get_level_values('featureID') ncfile['feature_id'].setncatts( { 'long_name': 'Segment ID', } ) + # =========== type VARIABLE =============== + TYPE = ncfile.createVariable( + varname="type", + datatype=f'S{max_str_len}', + dimensions=("feature_id",), + ) + TYPE[:] = np.array(flow.index.get_level_values('Type').astype(str).tolist(), dtype=f'S{max_str_len}') + ncfile['type'].setncatts( + { + 'long_name': 'Type', + } + ) # =========== flow VARIABLE =============== flow_var = ncfile.createVariable( @@ -2128,9 +2142,99 @@ def write_flowveldepth_netcdf(stream_output_directory, file_name, 'code_version': '', } ) +def stream_output_mask_reader(stream_output_mask): + if not stream_output_mask: + return {} + with open(stream_output_mask, 'r') as file: + mask_list = yaml.safe_load(file) + + return mask_list + +def mask_find_seg(mask_list, nexus_dict, poi_crosswalk): + seg_id = [] + nex_id = {} + + if not mask_list: + return nex_id, seg_id + if 'wb' in mask_list and mask_list['wb']: + if 9999 not in mask_list['wb']: + seg_id.extend(mask_list['wb']) + else: + seg_id.extend([9999]) + + if 'nex' in mask_list and mask_list['nex']: + if 9999 in mask_list['nex']: + + for key, val in nexus_dict.items(): + nex_key = int(key.split('-')[-1]) + nex_id[nex_key] = [int(v.split('-')[-1]) for v in val] + else: + for key in mask_list['nex']: + item = f'nex-{key}' + nex_id[key] = [int(val.split('-')[-1]) for val in nexus_dict.get(item, [])] + + return nex_id, seg_id + +def updated_flowveldepth(flowveldepth, nex_id, seg_id, mask_list): + flowveldepth.index.name = 'featureID' + flowveldepth['Type'] = 'wb' + flowveldepth.set_index('Type', append=True, inplace=True) + + def create_mask(ids): + if 9999 in ids: + ids = flowveldepth.index.get_level_values('featureID') + + masked = (flowveldepth.index.get_level_values('featureID').isin(ids)) & (flowveldepth.index.get_level_values('Type') == 'wb') + return masked + + flowveldepth_seg = flowveldepth[create_mask(seg_id)] if seg_id else pd.DataFrame() + + if nex_id: + nex_df = pd.DataFrame([(nex, wb) for nex, wbs in nex_id.items() for wb in wbs], columns=['nex', 'featureID']) + flowveldepth_reset = flowveldepth.reset_index() + merge_flowveldepth_reset = flowveldepth_reset.merge(nex_df, on='featureID', how='left') + + merge_flowveldepth_reset = merge_flowveldepth_reset.dropna(subset=['nex']) + merge_flowveldepth_reset['nex'] = merge_flowveldepth_reset['nex'].astype(int) + # Define custom aggregation functions + def sum_q_columns(group): + q_columns = [col for col in group.columns if col[1] == 'q'] + return group[q_columns].sum() + + def custom_v(group): + v_columns = [col for col in group.columns if col[1] == 'v'] + v_values = group[v_columns] + if len(v_values) > 1: + return pd.Series({col: np.nan for col in v_values.columns}) + else: + return v_values.iloc[0] + + def avg_d_columns(group): + d_columns = [col for col in group.columns if col[1] == 'd'] + return group[d_columns].mean() + + # Apply the groupby with the custom aggregation functions + def custom_agg(group): + result = pd.concat([sum_q_columns(group), custom_v(group), avg_d_columns(group)]) + return result + + all_nex_data = merge_flowveldepth_reset.groupby('nex').apply(custom_agg) + all_nex_data.index = all_nex_data.index.rename('featureID') + all_nex_data['Type'] = 'nex' + # Set the new 'Type' column as an index + all_nex_data = all_nex_data.set_index('Type', append=True) + + else: + all_nex_data = pd.DataFrame() + + if not flowveldepth_seg.empty or not all_nex_data.empty: + flowveldepth = pd.concat([flowveldepth_seg, all_nex_data]) + + return flowveldepth def write_flowveldepth( stream_output_directory, + stream_output_mask, flowveldepth, nudge, usgs_positions_id, @@ -2140,6 +2244,8 @@ def write_flowveldepth( stream_output_type, stream_output_internal_frequency = 5, cpu_pool = 1, + poi_crosswalk = None, + nexus_dict= None, ): ''' Write the results of flowveldepth and nudge to netcdf- break. @@ -2150,6 +2256,11 @@ def write_flowveldepth( nudge (numpy.ndarray) - nudge data with shape (76, 289) usgs_positions_id (array) - Position ids of usgs gages ''' + + mask_list = stream_output_mask_reader(stream_output_mask) + nex_id, seg_id = mask_find_seg(mask_list, nexus_dict, poi_crosswalk) + flowveldepth = updated_flowveldepth(flowveldepth, nex_id, seg_id, mask_list) + # timesteps, variable = zip(*flowveldepth.columns.tolist()) # timesteps = list(timesteps) n_timesteps = flowveldepth.shape[1]//3 diff --git a/src/troute-nwm/src/nwm_routing/__main__.py b/src/troute-nwm/src/nwm_routing/__main__.py index 4b2150240..01dbd4bad 100644 --- a/src/troute-nwm/src/nwm_routing/__main__.py +++ b/src/troute-nwm/src/nwm_routing/__main__.py @@ -109,10 +109,10 @@ def main_v04(argv): network_end_time = time.time() task_times['network_creation_time'] = network_end_time - network_start_time - + # Create run_sets: sets of forcing files for each loop run_sets = network.build_forcing_sets() - + # Create da_sets: sets of TimeSlice files for each loop if "data_assimilation_parameters" in compute_parameters: da_sets = hnu.build_da_sets(data_assimilation_parameters, run_sets, network.t0) @@ -290,9 +290,13 @@ def main_v04(argv): forcing_end_time = time.time() task_times['forcing_time'] += forcing_end_time - route_end_time - - output_start_time = time.time() + if network.poi_nex_dict: + poi_crosswalk = network.poi_nex_dict + else: + poi_crosswalk = dict() + output_start_time = time.time() + #TODO Update this to work with either network type... nwm_output_generator( run, @@ -311,7 +315,9 @@ def main_v04(argv): data_assimilation_parameters, data_assimilation.lastobs_df, network.link_gage_df, - network.link_lake_crosswalk, + network.link_lake_crosswalk, + network.nexus_dict, + poi_crosswalk, logFileName ) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 08bef8c78..38a6e6547 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -90,25 +90,28 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration, pref def nwm_output_generator( - run, - results, - supernetwork_parameters, - output_parameters, - parity_parameters, - restart_parameters, - parity_set, - qts_subdivisions, - return_courant, - cpu_pool, - waterbodies_df, - waterbody_types_df, - duplicate_ids_df, - data_assimilation_parameters=False, - lastobs_df=None, - link_gage_df=None, - link_lake_crosswalk=None, - logFileName='NONE' + run, + results, + supernetwork_parameters, + output_parameters, + parity_parameters, + restart_parameters, + parity_set, + qts_subdivisions, + return_courant, + cpu_pool, + waterbodies_df, + waterbody_types_df, + duplicate_ids_df, + data_assimilation_parameters=False, + lastobs_df = None, + link_gage_df = None, + link_lake_crosswalk = None, + nexus_dict = None, + poi_crosswalk = None, + logFileName='NONE' ): + dt = run.get("dt") nts = run.get("nts") t0 = run.get("t0") @@ -252,14 +255,18 @@ def nwm_output_generator( if stream_output: stream_output_directory = stream_output['stream_output_directory'] + stream_output_mask = stream_output.get('mask_output',) stream_output_timediff = stream_output['stream_output_time'] stream_output_type = stream_output['stream_output_type'] stream_output_internal_frequency = stream_output['stream_output_internal_frequency'] - + if stream_output_mask: + stream_output_mask = Path(stream_output_mask) + nudge = np.concatenate([r[8] for r in results]) usgs_positions_id = np.concatenate([r[3][0] for r in results]) nhd_io.write_flowveldepth( - Path(stream_output_directory), + Path(stream_output_directory), + stream_output_mask, flowveldepth, nudge, usgs_positions_id, @@ -268,7 +275,9 @@ def nwm_output_generator( int(stream_output_timediff), stream_output_type, stream_output_internal_frequency, - cpu_pool = cpu_pool + cpu_pool = cpu_pool, + poi_crosswalk = poi_crosswalk, + nexus_dict= nexus_dict, ) if (not logFileName == 'NONE'): diff --git a/test/LowerColorado_TX_v4/domain/mask_output.yaml b/test/LowerColorado_TX_v4/domain/mask_output.yaml new file mode 100644 index 000000000..f5abd1588 --- /dev/null +++ b/test/LowerColorado_TX_v4/domain/mask_output.yaml @@ -0,0 +1,25 @@ +##### Description ##### +# If you want to mask flowpaths, make the list below. +# please provide in either POI, NEXUS or flowpath ID. +# by putting 9999 in nex or wb, it will record all the nexus or wb points +####################### +# mask: +wb: + # - 9999 + # - 2427667 + # - 2427668 + # - 2427671 + - 2420805 + - 2420804 + - 2426394 + +nex: + # - 9999 + # - 2427668 + # - 2427669 + - 2420805 + - 2420803 + # - 2420970 + # - 2420863 + + diff --git a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml index 7f1990c8e..0e00ef5cd 100644 --- a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml +++ b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml @@ -131,6 +131,7 @@ compute_parameters: # lastobs_output: lastobs/ # stream_output : # stream_output_directory: output/ +# mask_output: domain/mask_output.yaml # stream_output_time: 1 #[hr] ** Consider `stream_output_time = -1` means save everything in one file ** # stream_output_type: '.nc' #please select only between netcdf '.nc' or '.csv' or '.pkl' # stream_output_internal_frequency: 60 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60 From 0e6095943715283040b603c3b90f26c45106a9dd Mon Sep 17 00:00:00 2001 From: Neptune-Meister <140385333+JurgenZach-NOAA@users.noreply.github.com> Date: Mon, 29 Jul 2024 07:13:39 -0700 Subject: [PATCH 15/29] Included pip & venv based instructions for WSL2 (#812) (#813) * Included pip & venv based instructions for WSL2 (#812) Possible Cython compile errors in T-route caused by PR784 (https://github.com/NOAA-OWP/t-route/pull/784) have resulted in a recommendation to compile in "no-editable' mode (PR 801, https://github.com/NOAA-OWP/t-route/pull/801). The no-e option, in turn, has resulted in the necessity to recompile T-route after each edit of python code. The instructions provided here showcase how T-route can be installed in a manner compatible with PR 784 while avoiding the non-editable compile option. It is based on pip and venv, avoiding conda altogether. It has been implemented on WSL, but should be readily adaptable to the same recent long-term stable Ubuntu distros (22.04 and 20.04). * Updated shell formatting * Update readme.md * Update readme.md * Only clone single branch * Included warning about mixing pip and conda --- readme.md | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/readme.md b/readme.md index 1e68b5ed3..a0bc3fe45 100644 --- a/readme.md +++ b/readme.md @@ -111,6 +111,120 @@ python3 -m nwm_routing -f -V4 test_AnA_V4_HYFeature.yaml **Note**: The following instructions are for setting up T-Route on a Linux environment (standalone, no MPI). If you are using Windows, please install WSL (Windows Subsystem for Linux) before proceeding. +### T-Route Setup and Testing Guide for Windows Users WITHOUT conda [based on pip and venv - only widely available dependencies]. +### WARNING: INSTALLATION WITHIN EXISTING MINICONDA/CONDA VIRTUAL ENVIRONMENT NOT RECOMMENDED, PIP AND CONDA DO NOT MIX WELL, AND YOU MAY BREAK YOUR CONDA ENVIRONMENT! + +1. **Install Recommended distro:** + - Download and install WSL2 for your Windows OS + - We recommend long-term stable (LTS) Ubuntu distribution 22.04 or 20.04 (24.04 not recommended yet) + - Open Windows Power Shell and issue + ``` shell + wsl --install Ubuntu-22.04 + ``` + - Enter (root) username and password (2x) of your choice + +2. **Set up venv-based virtual environment:** + - From root (the username you created under 1): + - Update Linux distro: + ```shell + sudo apt update + ``` + - Install pip (package manager): + ```shell + sudo apt install python3-pip + ``` + - Install venv: + ```shell + sudo apt install python3.10-venv + ``` + - Create a virtual environment for T-route (named here 'troute-env1'): + ```shell + python3 -m venv troute_env1 + ``` + - Activate your shiny new virtual environment: + ```shell + source troute_env1/bin/activate + ``` + - Now, the command prompts in the Shell window should start with (troute-env1) + +3. **Clone T-route:** + - Go to a folder of your choice (here, your home folder) and create a T-route directory + ```shell + mkdir troute1 + cd troute1 + ``` + - Clone a T-route repository (the current main branch is used as an example): + ```shell + git clone --progress --single-branch --branch master http://github.com/NOAA-OWP/t-route.git + cd troute1 + ``` + - Install python packages per requirements file + ```shell + pip install -r requirements.txt + ``` + +4. **Download & build netcdf fortran libraries from UCAR:** + - Go to a folder of your choice (here, your home folder) and download the source code: + ```shell + cd ~ + wget https://downloads.unidata.ucar.edu/netcdf-fortran/4.6.1/netcdf-fortran-4.6.1.tar.gz + ``` + - Unzip it: + ```shell + tar xvf netcdf-fortran-4.6.1.tar.gz + ``` + - Enter the directory: + ```shell + cd netcdf-fortran-4.6.1/ + ``` + - Install some prerequisites (Fortran compiler, build essentials, standard C-netcdf library): + ```shell + sudo apt install gfortran + sudo apt install build-essential + sudo apt-get install libnetcdf-dev + ``` + - Configure the fortran-netcdf libraries: + ```shell + ./configure + ``` + - There should be no error message, and the output log should end up with something like: + ![image](https://github.com/user-attachments/assets/48268212-0b74-4f75-9d52-97f68e6c80d0) + - Check the installation (running two sets of examples): + ```shell + make check + ``` + - Again, there should be no error message (expect some warnings, though), and output should end with "passing" of two sets: + ![image](https://github.com/user-attachments/assets/83745989-9f14-4c1b-a2a1-675aa94e5181) + - Finally, install the libraries: + ```shell + sudo make install + ``` + - Output should be something like: + ![image](https://github.com/user-attachments/assets/57e48501-18f4-4004-9b10-5a9245186e38) + +5. **Build and test T-route:** + - Go to your T-route folder: + ```shell + cd ~/troute1 + ``` + - Compile T-route (may take a few minutes, depending on the machine): + ```shell + ./compiler.sh + ``` + - Set path to runtime netcdf-Fortran library [recommend including this in the .bashrc file or your equivalent]: + ```shell + export LD_LIBRARY_PATH=/usr/local/lib/ + ``` + - Run one of the demo examples provided: + ```shell + cd test/LowerColorado_TX + python -m nwm_routing -f -V3 test_AnA.yaml + ``` + - The latter is a hybrid (MC + diffusive) routing example that should run within a few minutes at most + + +### T-Route Setup Instructions and Troubleshooting Guide for Windows Users - Legacy Conda Version [may have to be built with compiler.sh no-e option] + 1. **Install Required Components:** - Open the WSL terminal. - Install Miniconda, Python, Pip, and Git. From a7260b78531f6dfb39638b07189310affdafb179 Mon Sep 17 00:00:00 2001 From: shorvath-noaa <103054653+shorvath-noaa@users.noreply.github.com> Date: Wed, 31 Jul 2024 10:49:10 -0600 Subject: [PATCH 16/29] Great Lakes Data Assimilation (#808) * update creation of great lakes climatology * pass great lakes DA info through to compute * update properties handling great lakes info * set default empty df for great lakes info * preprocess great lakes DA info, create 'update_after_compute()' function * create new get_timeslice_obs function specific for great lakes * add option for great lakes DA * function for performing great lakes DA * finalize great_lakes.update_after_compute() function * define after compute functions for great lakes DA * update _prep_reservoir_da_dataframes() to include great lakes * refine passing objects to great lakes da module * finalize da function for great lakes * update creation of great lakes climatology * pass great lakes DA info through to compute * update properties handling great lakes info * set default empty df for great lakes info * preprocess great lakes DA info, create 'update_after_compute()' function * create new get_timeslice_obs function specific for great lakes * add option for great lakes DA * function for performing great lakes DA * finalize great_lakes.update_after_compute() function * define after compute functions for great lakes DA * update _prep_reservoir_da_dataframes() to include great lakes * refine passing objects to great lakes da module * finalize da function for great lakes * add default empty dataframe for gl_climatology_df_sub * add column names for empty data frame gl_df_sub * set default empty data frames for great lakes dfs * move _canadian_gage_df slot from HYFeatures to AbstractNetwork * check if list of new great_lakes_param_dfs is empty before trying to concat * set default _canadian_gage_link_df in NHDNetwork * update DA settings for test cases * add empty placeholders in diffusive results for great lakes DA --- src/troute-network/troute/AbstractNetwork.py | 12 +- src/troute-network/troute/DataAssimilation.py | 302 ++++++++++---- .../troute/HYFeaturesNetwork.py | 14 +- src/troute-network/troute/NHDNetwork.py | 2 + src/troute-network/troute/nhd_io.py | 90 +++++ .../troute/rfc_lake_gage_crosswalk.py | 17 +- src/troute-nwm/src/nwm_routing/__main__.py | 12 + src/troute-routing/troute/routing/compute.py | 107 ++++- .../troute/routing/fast_reach/mc_reach.pyx | 376 ++++++++++-------- .../routing/fast_reach/reservoir_GL_da.py | 131 ++++++ test/LowerColorado_TX/test_AnA_V4_NHD.yaml | 6 +- .../test_AnA_V4_HYFeature.yaml | 2 +- 12 files changed, 818 insertions(+), 253 deletions(-) create mode 100644 src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py diff --git a/src/troute-network/troute/AbstractNetwork.py b/src/troute-network/troute/AbstractNetwork.py index 5bbdffa4b..830be578e 100644 --- a/src/troute-network/troute/AbstractNetwork.py +++ b/src/troute-network/troute/AbstractNetwork.py @@ -27,6 +27,7 @@ class AbstractNetwork(ABC): __slots__ = ["_dataframe", "_waterbody_connections", "_gages", "_terminal_codes", "_connections", "_waterbody_df", "_waterbody_types_df", "_waterbody_type_specified", "_link_gage_df", + "_canadian_gage_link_df", "_independent_networks", "_reaches_by_tw", "_flowpath_dict", "_reverse_network", "_q0", "_t0", "_link_lake_crosswalk", "_usgs_lake_gage_crosswalk", "_usace_lake_gage_crosswalk", "_rfc_lake_gage_crosswalk", @@ -34,7 +35,8 @@ class AbstractNetwork(ABC): "supernetwork_parameters", "waterbody_parameters","data_assimilation_parameters", "restart_parameters", "compute_parameters", "forcing_parameters", "hybrid_parameters", "preprocessing_parameters", "output_parameters", - "verbose", "showtiming", "break_points", "_routing", "_nexus_dict", "_poi_nex_dict"] + "verbose", "showtiming", "break_points", "_routing", "_gl_climatology_df", "_nexus_dict", "_poi_nex_dict"] + def __init__(self, from_files=True, value_dict={}): @@ -410,6 +412,14 @@ def refactored_reaches(self): @property def unrefactored_topobathy_df(self): return self._routing.unrefactored_topobathy_df + + @property + def great_lakes_climatology_df(self): + return self._gl_climatology_df + + @property + def canadian_gage_df(self): + return self._canadian_gage_link_df def set_synthetic_wb_segments(self, synthetic_wb_segments, synthetic_wb_id_offset): diff --git a/src/troute-network/troute/DataAssimilation.py b/src/troute-network/troute/DataAssimilation.py index 7dd89a8bd..0c7971616 100644 --- a/src/troute-network/troute/DataAssimilation.py +++ b/src/troute-network/troute/DataAssimilation.py @@ -35,7 +35,7 @@ class AbstractDA(ABC): "_reservoir_usgs_df", "_reservoir_usgs_param_df", "_reservoir_usace_df", "_reservoir_usace_param_df", "_reservoir_rfc_df", "_reservoir_rfc_synthetic", - "_reservoir_rfc_param_df", + "_reservoir_rfc_param_df", "_great_lakes_df", "_great_lakes_param_df", "_dateNull", "_datesSecondsArray_usgs", "_nDates_usgs", "_stationArray_usgs", "_stationStringLengthArray_usgs", "_nStations_usgs", @@ -213,7 +213,7 @@ def __init__(self, network, from_files, value_dict, da_run=[]): self._last_obs_df = _reindex_link_to_lake_id(self._last_obs_df, network.link_lake_crosswalk) self._usgs_df = _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run) - if 'canada_timeslice_files' in da_run: + if ('canada_timeslice_files' in da_run) & (not network.canadian_gage_df.empty): self._canada_df = _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run) self._canada_is_created = True LOG.debug("NudgingDA class is completed in %s seconds." % (time.time() - main_start_time)) @@ -272,7 +272,7 @@ def update_for_next_loop(self, network, da_run,): if streamflow_da_parameters.get('streamflow_nudging', False): self._usgs_df = _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run) - if 'canada_timeslice_files' in da_run: + if ('canada_timeslice_files' in da_run) & (not network.canadian_gage_df.empty): self._canada_df = _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run) else: self._canada_df = pd.DataFrame() @@ -543,6 +543,7 @@ def __init__(self, network, from_files, value_dict, da_run=[]): if not self._usgs_df.empty: self._usgs_df = self._usgs_df.loc[:,network.t0:] LOG.debug("PersistenceDA class is completed in %s seconds." % (time.time() - PersistenceDA_start_time)) + def update_after_compute(self, run_results,): ''' Function to update data assimilation object after running routing module. @@ -592,12 +593,12 @@ def update_for_next_loop(self, network, da_run,): run_parameters = self._run_parameters # update usgs_df if it is not empty - streamflow_da_parameters = data_assimilation_parameters.get('streamflow_da', None) - reservoir_da_parameters = data_assimilation_parameters.get('reservoir_da', None) + streamflow_da_parameters = data_assimilation_parameters.get('streamflow_da', {}) + reservoir_da_parameters = data_assimilation_parameters.get('reservoir_da', {}) - if not self._usgs_df.empty: - - if reservoir_da_parameters.get('reservoir_persistence_da').get('reservoir_persistence_usgs', False): + if not self.usgs_df.empty: + + if reservoir_da_parameters.get('reservoir_persistence_da',{}).get('reservoir_persistence_usgs', False): gage_lake_df = ( network.usgs_lake_gage_crosswalk. @@ -630,17 +631,16 @@ def update_for_next_loop(self, network, da_run,): # subset and re-index `usgs_df`, using the segID <> lakeID crosswalk self._reservoir_usgs_df = ( - usgs_df_15min.join(link_lake_df, how = 'inner'). - reset_index(). - set_index('usgs_lake_id'). - drop(['link'], axis = 1) - ) - + usgs_df_15min.join(link_lake_df, how = 'inner'). + reset_index(drop=True). + set_index('usgs_lake_id') + ) + # replace link ids with lake ids, for gages at waterbody outlets, # otherwise, gage data will not be assimilated at waterbody outlet # segments. if network.link_lake_crosswalk: - self._usgs_df = _reindex_link_to_lake_id(self._usgs_df, network.link_lake_crosswalk) + self._usgs_df = _reindex_link_to_lake_id(self.usgs_df, network.link_lake_crosswalk) elif reservoir_da_parameters.get('reservoir_persistence_usgs', False): ( @@ -655,6 +655,12 @@ def update_for_next_loop(self, network, da_run,): da_run, lake_gage_crosswalk = network.usgs_lake_gage_crosswalk, res_source = 'usgs') + + # replace link ids with lake ids, for gages at waterbody outlets, + # otherwise, gage data will not be assimilated at waterbody outlet + # segments. + if network.link_lake_crosswalk: + usgs_df = _reindex_link_to_lake_id(usgs_df, network.link_lake_crosswalk) # USACE if reservoir_da_parameters.get('reservoir_persistence_da').get('reservoir_persistence_usace', False): @@ -717,62 +723,114 @@ def __init__(self, network, from_files, value_dict, da_run): run_parameters = self._run_parameters reservoir_persistence_da = data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_da', {}) + self._great_lakes_df = pd.DataFrame() + self._great_lakes_param_df = pd.DataFrame() + if reservoir_persistence_da: greatLake = reservoir_persistence_da.get('reservoir_persistence_greatLake', False) if greatLake: - streamflow_da_parameters = data_assimilation_parameters.get('streamflow_da', {}) + GL_crosswalk_df = pd.DataFrame( + { + 'link': [4800002,4800004,4800006], + 'gages': ['04127885','04159130','02HA013'] + } + ).set_index('link') - if not self._canada_is_created and ('canada_timeslice_files' in da_run): - self._canada_df = _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run) - self._canada_is_created = True - - if 'LakeOntario_outflow' in da_run: - self._lake_ontario_df = _create_LakeOntario_df(run_parameters, network, da_run) - else: - self._lake_ontario_df = pd.DataFrame() - - lake_ontario_df = self._lake_ontario_df - canada_df = self._canada_df - - ids_to_check = {'4800002': '04127885', '4800004': '13196034'} - # the segment corresponding to 04127885 gages isn't exist as of now. Should be replaced in future - # Initialize an empty DataFrame with the same columns as the usgs DataFrame - if self._usgs_df.empty: - self._usgs_df = _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run) - - usgs_df_GL = pd.DataFrame(columns=self._usgs_df.columns) - - # Check if any ids are present in the index - for key, value in ids_to_check.items(): - if value in self._usgs_df.index: - temp_df = (self._usgs_df.loc[[value]] - .transpose() - .resample('15min') - .asfreq() - .transpose()) - temp_df.index = [key] - else: - temp_df = pd.DataFrame(index=[key], columns=self._usgs_df.columns) - - usgs_df_GL = pd.concat([usgs_df_GL, temp_df], axis=0) + self._great_lakes_df, self._great_lakes_param_df = _create_GL_dfs( + GL_crosswalk_df, + data_assimilation_parameters, + run_parameters, + da_run, + network.t0, + ) - if not lake_ontario_df.empty: - lake_ontario_df = lake_ontario_df.T.reset_index().drop('index', axis = 1) - else: - lake_ontario_df = pd.DataFrame(columns=self._usgs_df.columns) - lake_ontario_df['link'] = 4800007 - lake_ontario_df.set_index('link', inplace=True) + LOG.debug("great_lake class is completed in %s seconds." % (time.time() - great_lake_start_time)) + + def update_after_compute(self, run_results, time_increment): + ''' + Function to update data assimilation object after running routing module. + + Arguments: + ---------- + - run_results (list): output from the compute kernel sequence, + organized (because that is how it comes + out of the kernel) by network. + For each item in the result, there are + 10 elements, the 9th of which are lists of + four elements containing: + 1) a list of the segments ids where data + assimilation was performed (if any) in that network; + 2) a list of the previously persisted outflow; + 3) a list of the previously assimilated observation times; + 4) a list of the update time. + + Returns: + -------- + - data_assimilation (Object): Object containing all data assimilation information + - _great_lakes_param_df (DataFrame): Great Lakes reservoir DA parameters + ''' + # get reservoir DA initial parameters for next loop iteration + great_lakes_param_df = pd.DataFrame() + tmp_list = [] + for r in run_results: - if canada_df.empty: - canada_df = pd.DataFrame(columns=self._usgs_df.columns, index=pd.Index([4800006], name='link')) + if len(r[9][0]) > 0: + tmp_df = pd.DataFrame(data = r[9][0], columns = ['lake_id']) + tmp_df['previous_assimilated_outflows'] = r[9][1] + tmp_df['previous_assimilated_time'] = r[9][2] + tmp_df['update_time'] = r[9][3] + tmp_list.append(tmp_df) + + if tmp_list: + great_lakes_param_df = pd.concat(tmp_list) + great_lakes_param_df['previous_assimilated_time'] = great_lakes_param_df['previous_assimilated_time'] - time_increment + great_lakes_param_df['update_time'] = great_lakes_param_df['update_time'] - time_increment + + self._great_lakes_param_df = great_lakes_param_df + + def update_for_next_loop(self, network, da_run,): + ''' + Function to update data assimilation object for the next loop iteration. + + Arguments: + ---------- + - network (Object): network object created from abstract class + - da_run (list): list of data assimilation files separated + by for loop chunks + + Returns: + -------- + - data_assimilation (Object): Object containing all data assimilation information + - reservoir_usgs_df (DataFrame): USGS reservoir observations + - reservoir_usace_df (DataFrame): USACE reservoir observations + ''' + greatLake = False + data_assimilation_parameters = self._data_assimilation_parameters + run_parameters = self._run_parameters + reservoir_persistence_da = data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_da', {}) - # List of DataFrames - dfs = [lake_ontario_df, canada_df, usgs_df_GL] + if reservoir_persistence_da: + greatLake = reservoir_persistence_da.get('reservoir_persistence_greatLake', False) + + if greatLake: + + GL_crosswalk_df = pd.DataFrame( + { + 'link': [4800002,4800004,4800006], + 'gages': ['04127885','04159130','02HA013'] + } + ).set_index('link') + + self._great_lakes_df, _ = _create_GL_dfs( + GL_crosswalk_df, + data_assimilation_parameters, + run_parameters, + da_run, + network.t0, + ) - self.great_lake_all = pd.concat(dfs, axis=0, join='outer', ignore_index=False) - LOG.debug("great_lake class is completed in %s seconds." % (time.time() - great_lake_start_time)) class RFCDA(AbstractDA): """ @@ -877,6 +935,7 @@ def __init__(self, network, from_files, value_dict): self._reservoir_rfc_df = pd.DataFrame() self._reservoir_rfc_param_df = pd.DataFrame() LOG.debug("RFCDA class is completed in %s seconds." % (time.time() - RFCDA_start_time)) + def update_after_compute(self, run_results): ''' Function to update data assimilation object after running routing module. @@ -939,6 +998,7 @@ def update_after_compute(self, run_results, time_increment): NudgingDA.update_after_compute(self, run_results, time_increment) PersistenceDA.update_after_compute(self, run_results) RFCDA.update_after_compute(self, run_results) + great_lake.update_after_compute(self, run_results, time_increment) def update_for_next_loop(self, network, da_run,): ''' @@ -947,6 +1007,7 @@ def update_for_next_loop(self, network, da_run,): NudgingDA.update_for_next_loop(self, network, da_run) PersistenceDA.update_for_next_loop(self, network, da_run) RFCDA.update_for_next_loop(self) + great_lake.update_for_next_loop(self, network, da_run) @property @@ -984,6 +1045,14 @@ def reservoir_rfc_df(self): @property def reservoir_rfc_param_df(self): return self._reservoir_rfc_param_df + + @property + def great_lakes_df(self): + return self._great_lakes_df + + @property + def great_lakes_param_df(self): + return self._great_lakes_param_df # -------------------------------------------------------------- @@ -1083,19 +1152,13 @@ def _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_ LOG.debug("Reading and preprocessing usgs timeslice files is completed in %s seconds." % (time.time() - usgs_df_start_time)) return usgs_df -def _create_LakeOntario_df(run_parameters, network, da_run): +def _create_LakeOntario_df(run_parameters, t0, da_run): LOG.info("Creating Lake Ontario dataframe is started.") LakeOntario_df_start_time = time.time() - t0 = network.t0 + start_time = t0 - pd.Timedelta(weeks = 10) nts = run_parameters.get('nts') dt = run_parameters.get('dt') - end_time = pd.to_datetime(t0) + pd.Timedelta(hours = nts/(3600/dt)) - time_total = [] - t = t0 - while t < end_time: - time_total.append(t) - t += pd.Timedelta(minutes=15) - + end_time = t0 + pd.Timedelta(hours = nts/(3600/dt)) lake_ontario_df = pd.read_csv(da_run.get('LakeOntario_outflow')) @@ -1113,17 +1176,22 @@ def _create_LakeOntario_df(run_parameters, network, da_run): lake_ontario_df = lake_ontario_df.drop_duplicates() lake_ontario_df = lake_ontario_df.drop(['Date', 'Hour'], axis=1) - # Filter DataFrame based on extracted times - time_total_df = pd.DataFrame(time_total, columns=['Datetime']) - time_total_df['Outflow(m3/s)'] = None # Initialize with None or NaN - time_total_df = time_total_df.set_index('Datetime') + # Rename outflow column to discharge + lake_ontario_df = lake_ontario_df.rename(columns={'Outflow(m3/s)': 'Discharge'}) + # Filter for needed time stamps + lake_ontario_df = lake_ontario_df[(lake_ontario_df.index>=start_time) & (lake_ontario_df.index<=end_time)] + + # Add 'link' column with waterbody ID + lake_ontario_df['link'] = 4800007 + + # Reset index and convert Datetimes to strings + lake_ontario_df.reset_index(inplace=True) + lake_ontario_df['Datetime'] = lake_ontario_df['Datetime'].dt.strftime('%Y-%m-%d_%H:%M:%S') - filtered_df = lake_ontario_df.loc[(lake_ontario_df.index >= t0) & (lake_ontario_df.index < end_time)] - total_df = pd.merge(time_total_df, filtered_df, left_index=True, right_index=True, how='left') - total_df = total_df.rename(columns={'Outflow(m3/s)_y': 'Outflow(m3/s)'}).drop(columns='Outflow(m3/s)_x') LOG.debug("Creating Lake Ontario dataframe is completed in %s seconds." % (time.time() - LakeOntario_df_start_time)) - return total_df + + return lake_ontario_df def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run): ''' @@ -1141,7 +1209,7 @@ def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, ru Returns: -------- - - usgs_df (DataFrame): dataframe of USGS gage observations + - canada_df (DataFrame): dataframe of Canadian gage observations ''' canada_timeslices_folder = data_assimilation_parameters.get("canada_timeslices_folder", None) #lastobs_file = streamflow_da_parameters.get("wrf_hydro_lastobs_file", None) @@ -1159,11 +1227,10 @@ def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, ru canada_df_start_time = time.time() canada_files = [canada_timeslices_folder.joinpath(f) for f in da_run['canada_timeslice_files']] - if canada_files: canada_df = ( nhd_io.get_obs_from_timeslices( - network.link_gage_df, + network.canadian_gage_df, crosswalk_gage_field, crosswalk_segID_field, canada_files, @@ -1173,7 +1240,7 @@ def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, ru network.t0, run_parameters.get("cpu_pool", None) ). - loc[network.link_gage_df.index] + loc[network.canadian_gage_df.index] ) else: @@ -1982,3 +2049,74 @@ def _read_lastobs_file( LOG.debug(f"Reading last observation file is completed in %s seconds." % (time.time() - read_lastobs_start_time)) return lastobs_df +def _create_GL_dfs(GL_crosswalk_df, data_assimilation_parameters, run_parameters, + da_run, t0): + + # USGS gages: + usgs_timeslices_folder = data_assimilation_parameters.get("usgs_timeslices_folder", None) + + # TODO: join timeslice folder and files into complete path upstream + usgs_timeslices_folder = pathlib.Path(usgs_timeslices_folder) + usgs_files = [usgs_timeslices_folder.joinpath(f) for f in + da_run['usgs_timeslice_files']] + + usgs_GL_crosswalk_df = GL_crosswalk_df[GL_crosswalk_df.index.isin([4800002,4800004])] + if usgs_files: + usgs_GL_df = ( + nhd_io.get_GL_obs_from_timeslices( + usgs_GL_crosswalk_df, + usgs_files, + cpu_pool=run_parameters.get("cpu_pool", 1), + ) + ) + usgs_GL_df = pd.melt(usgs_GL_df, + var_name='Datetime', + value_name='Discharge', + ignore_index=False).dropna().reset_index() + + else: + usgs_GL_df = pd.DataFrame() + + # Canadian gages: + canadian_timeslices_folder = data_assimilation_parameters.get("canada_timeslices_folder", None) + canadian_files = [canadian_timeslices_folder.joinpath(f) for f in + da_run['canada_timeslice_files']] + + canadian_GL_crosswalk_df = GL_crosswalk_df.loc[[4800006]] + + if canadian_files: + canadian_GL_df = ( + nhd_io.get_GL_obs_from_timeslices( + canadian_GL_crosswalk_df, + canadian_files, + cpu_pool=run_parameters.get("cpu_pool", 1), + ) + ) + canadian_GL_df = pd.melt(canadian_GL_df, + var_name='Datetime', + value_name='Discharge', + ignore_index=False).dropna().reset_index() + + else: + canadian_GL_df = pd.DataFrame() + + # Lake Ontario data: + if 'LakeOntario_outflow' in da_run: + lake_ontario_df = _create_LakeOntario_df(run_parameters, t0, da_run) + else: + lake_ontario_df = pd.DataFrame() + + great_lakes_df = pd.concat( + [usgs_GL_df, canadian_GL_df, lake_ontario_df] + ).rename(columns={'link': 'lake_id'}).sort_values(by=['lake_id','Datetime']) + + great_lakes_df['time'] = pd.to_datetime(great_lakes_df['Datetime'], format='%Y-%m-%d_%H:%M:%S') - t0 + great_lakes_df['time'] = great_lakes_df.time.dt.total_seconds().astype(int) + great_lakes_df.drop('Datetime', axis=1, inplace=True) + + great_lakes_param_df = pd.DataFrame(great_lakes_df.lake_id.unique(), columns=['lake_id']).sort_values('lake_id') + great_lakes_param_df['previous_assimilated_outflows'] = np.nan + great_lakes_param_df['previous_assimilated_time'] = 0 + great_lakes_param_df['update_time'] = 0 + + return great_lakes_df, great_lakes_param_df \ No newline at end of file diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py index f5eb91bc7..85644f3d1 100644 --- a/src/troute-network/troute/HYFeaturesNetwork.py +++ b/src/troute-network/troute/HYFeaturesNetwork.py @@ -216,7 +216,7 @@ class HYFeaturesNetwork(AbstractNetwork): """ """ - __slots__ = ["_upstream_terminal", "_nexus_latlon", "_duplicate_ids_df"] + __slots__ = ["_upstream_terminal", "_nexus_latlon", "_duplicate_ids_df",] def __init__(self, supernetwork_parameters, @@ -340,6 +340,7 @@ def gages(self): def waterbody_null(self): return np.nan #pd.NA + def preprocess_network(self, flowpaths, nexus): self._dataframe = flowpaths @@ -541,9 +542,13 @@ def preprocess_waterbodies(self, lakes, nexus): self.waterbody_dataframe, gl_wbody_df ] - ) + ).sort_index() + + self._gl_climatology_df = get_great_lakes_climatology() + else: gl_dict = {} + self._gl_climatology_df = pd.DataFrame() self._waterbody_types_df = pd.DataFrame( data = 1, @@ -569,6 +574,7 @@ def preprocess_waterbodies(self, lakes, nexus): self._waterbody_type_specified = False self._link_lake_crosswalk = None self._duplicate_ids_df = pd.DataFrame() + self._gl_climatology_df = pd.DataFrame() self._dataframe = self.dataframe.drop('waterbody', axis=1).drop_duplicates() @@ -605,6 +611,10 @@ def preprocess_data_assimilation(self, network): .rename_axis(None, axis=0).to_dict() ) + #FIXME: temporary solution, add canadian gage crosswalk dataframe. This should come from + # the hydrofabric. + self._canadian_gage_link_df = pd.DataFrame(columns=['gages','link']).set_index('link') + # Find furthest downstream gage and create our lake_gage_df to make crosswalk dataframes. lake_gage_hydroseq_df = gages_df[~gages_df['lake_id'].isnull()][['lake_id', 'value', 'hydroseq']].rename(columns={'value': 'gages'}) lake_gage_hydroseq_df['lake_id'] = lake_gage_hydroseq_df['lake_id'].astype(int) diff --git a/src/troute-network/troute/NHDNetwork.py b/src/troute-network/troute/NHDNetwork.py index ee4afcf52..189393200 100644 --- a/src/troute-network/troute/NHDNetwork.py +++ b/src/troute-network/troute/NHDNetwork.py @@ -64,6 +64,8 @@ def __init__( print("... in %s seconds." % (time.time() - start_time)) self._flowpath_dict = {} + self._gl_climatology_df = pd.DataFrame() + self._canadian_gage_link_df = pd.DataFrame(columns=['gages','link']).set_index('link') super().__init__() diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py index de50b0847..539e661aa 100644 --- a/src/troute-network/troute/nhd_io.py +++ b/src/troute-network/troute/nhd_io.py @@ -1249,6 +1249,96 @@ def get_obs_from_timeslices( return observation_df_new +def get_GL_obs_from_timeslices( + crosswalk_df, + timeslice_files, + crosswalk_gage_field='gages', + crosswalk_dest_field='link', + qc_threshold=1, + cpu_pool=1, +): + """ + Read observations from TimeSlice files and organize into a Pandas DataFrame. + This is specific to observations needed for Great Lakes data assimilation, + so no interpolation happens. + + Aguments + -------- + - crosswalk_gage_field (str): fieldname of gage ID data in crosswalk dataframe + - crosswalk_dest_field (str): fieldname of destination data in link_gage_df. + For streamflow DA, this is the field + containing segment IDs. For reservoir DA, + this is the field containing waterbody IDs. + - timeslice_files (list of PosixPath): Full paths to existing TimeSlice files + - qc_threshold (int): Numerical observation quality + threshold. Observations with quality + flags less than this value will be + removed and replaced witn nan. + - cpu_pool (int): Number of CPUs used for parallel + TimeSlice reading and interolation + + Returns + ------- + - observation_df_new (Pandas DataFrame): + + Notes + ----- + """ + # open TimeSlce files, organize data into dataframes + with Parallel(n_jobs=cpu_pool) as parallel: + jobs = [] + for f in timeslice_files: + jobs.append(delayed(_read_timeslice_file)(f)) + timeslice_dataframes = parallel(jobs) + + all_empty = all(df.empty for tuple in timeslice_dataframes for df in tuple) + if all_empty: + LOG.debug(f'{crosswalk_gage_field} DataFrames is empty, check timeslice files.') + return pd.DataFrame() + + # create lists of observations and obs quality dataframes returned + # from _read_timeslice_file + timeslice_obs_frames = [] + timeslice_qual_frames = [] + for d in timeslice_dataframes: + timeslice_obs_frames.append(d[0]) # TimeSlice gage observation + timeslice_qual_frames.append(d[1]) # TimeSlice observation qual + + # concatenate dataframes + timeslice_obs_df = pd.concat(timeslice_obs_frames, axis = 1) + timeslice_qual_df = pd.concat(timeslice_qual_frames, axis = 1) + + # Link <> gage crosswalk data + df = crosswalk_df.reset_index() + df = df.set_index(crosswalk_gage_field) + df.index = df.index.str.strip() + # join crosswalk data with timeslice data, indexed on crosswalk destination field + observation_df = (df.join(timeslice_obs_df). + reset_index(). + set_index(crosswalk_dest_field). + select_dtypes(include='number')) + + observation_qual_df = (df.join(timeslice_qual_df). + reset_index(). + set_index(crosswalk_dest_field). + select_dtypes(include='number')) + + # ---- Laugh testing ------ + # screen-out erroneous qc flags + observation_qual_df = (observation_qual_df. + mask(observation_qual_df < 0, np.nan). + mask(observation_qual_df > 1, np.nan) + ) + + # screen-out poor quality flow observations + observation_df = (observation_df. + mask(observation_qual_df < qc_threshold, np.nan). + mask(observation_df <= 0, np.nan) + ) + + return observation_df + + def get_param_str(target_file, param): # TODO: remove this duplicate function return get_attribute(target_file, param) diff --git a/src/troute-network/troute/rfc_lake_gage_crosswalk.py b/src/troute-network/troute/rfc_lake_gage_crosswalk.py index 082b03d3a..7d4b002ef 100644 --- a/src/troute-network/troute/rfc_lake_gage_crosswalk.py +++ b/src/troute-network/troute/rfc_lake_gage_crosswalk.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np def get_rfc_lake_gage_crosswalk(): """ @@ -77,14 +78,14 @@ def get_rfc_lake_gage_crosswalk(): def get_great_lakes_climatology(): - df = pd.DataFrame( - { - 'Month': ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"], - 'SMR': [1946, 1907, 1879, 1929, 2105, 2202, 2306, 2378, 2343, 2282, 2235, 2046], - 'SCR': [4623, 4518, 4946, 5217, 5410, 5494, 5546, 5555, 5519, 5475, 5424, 5253], - 'NIR': [5630, 5523, 5673, 5921, 6179, 6172, 6089, 5977, 5839, 5751, 5757, 5771], - 'SLR': [6380, 6561, 6875, 7159, 7418, 7547, 7500, 7360, 7161, 6954, 6852, 6725] - } + outflow_data = np.array([[1946, 1907, 1879, 1929, 2105, 2202, 2306, 2378, 2343, 2282, 2235, 2046], + [4623, 4518, 4946, 5217, 5410, 5494, 5546, 5555, 5519, 5475, 5424, 5253], + [5630, 5523, 5673, 5921, 6179, 6172, 6089, 5977, 5839, 5751, 5757, 5771], + [6380, 6561, 6875, 7159, 7418, 7547, 7500, 7360, 7161, 6954, 6852, 6725]]) + df = ( + pd.DataFrame(data=outflow_data, index=[4800002,4800004,4800006,4800007], + columns=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]) + .astype('float32') ) return df \ No newline at end of file diff --git a/src/troute-nwm/src/nwm_routing/__main__.py b/src/troute-nwm/src/nwm_routing/__main__.py index 01dbd4bad..3cc6bba73 100644 --- a/src/troute-nwm/src/nwm_routing/__main__.py +++ b/src/troute-nwm/src/nwm_routing/__main__.py @@ -230,6 +230,9 @@ def main_v04(argv): data_assimilation.reservoir_usace_param_df, data_assimilation.reservoir_rfc_df, data_assimilation.reservoir_rfc_param_df, + data_assimilation.great_lakes_df, + data_assimilation.great_lakes_param_df, + network.great_lakes_climatology_df, data_assimilation.assimilation_parameters, assume_short_ts, return_courant, @@ -1141,6 +1144,9 @@ def nwm_route( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, da_parameter_dict, assume_short_ts, return_courant, @@ -1231,6 +1237,9 @@ def nwm_route( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, da_parameter_dict, assume_short_ts, return_courant, @@ -1675,6 +1684,9 @@ def main_v03(argv): reservoir_usace_param_df, pd.DataFrame(), #empty dataframe for RFC data...not needed unless running via BMI pd.DataFrame(), #empty dataframe for RFC param data...not needed unless running via BMI + pd.DataFrame(), #empty dataframe for great lakes data... + pd.DataFrame(), #empty dataframe for great lakes param data... + pd.DataFrame(), #empty dataframe for great lakes climatology data... da_parameter_dict, assume_short_ts, return_courant, diff --git a/src/troute-routing/troute/routing/compute.py b/src/troute-routing/troute/routing/compute.py index e96aedf3b..55f529596 100644 --- a/src/troute-routing/troute/routing/compute.py +++ b/src/troute-routing/troute/routing/compute.py @@ -145,6 +145,9 @@ def _prep_reservoir_da_dataframes(reservoir_usgs_df, reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, waterbody_types_df_sub, t0, from_files, @@ -258,11 +261,36 @@ def _prep_reservoir_da_dataframes(reservoir_usgs_df, if not from_files: if not waterbody_types_df_sub.empty: waterbody_types_df_sub.loc[waterbody_types_df_sub['reservoir_type'] == 4] = 1 + + # Great Lakes + if not great_lakes_df.empty: + gl_wbodies_sub = waterbody_types_df_sub[ + waterbody_types_df_sub['reservoir_type']==6 + ].index + if exclude_segments: + gl_wbodies_sub = list(set(gl_wbodies_sub).difference(set(exclude_segments))) + gl_df_sub = great_lakes_df[great_lakes_df['lake_id'].isin(gl_wbodies_sub)] + gl_climatology_df_sub = great_lakes_climatology_df.loc[gl_wbodies_sub] + gl_param_df_sub = great_lakes_param_df[great_lakes_param_df['lake_id'].isin(gl_wbodies_sub)] + gl_parm_lake_id_sub = gl_param_df_sub.lake_id.to_numpy() + gl_param_flows_sub = gl_param_df_sub.previous_assimilated_outflows.to_numpy() + gl_param_time_sub = gl_param_df_sub.previous_assimilated_time.to_numpy() + gl_param_update_time_sub = gl_param_df_sub.update_time.to_numpy() + else: + gl_df_sub = pd.DataFrame(columns=['lake_id','time','Discharge']) + gl_climatology_df_sub = pd.DataFrame() + gl_parm_lake_id_sub = pd.DataFrame().to_numpy().reshape(0,) + gl_param_flows_sub = pd.DataFrame().to_numpy().reshape(0,) + gl_param_time_sub = pd.DataFrame().to_numpy().reshape(0,) + gl_param_update_time_sub = pd.DataFrame().to_numpy().reshape(0,) + if not waterbody_types_df_sub.empty: + waterbody_types_df_sub.loc[waterbody_types_df_sub['reservoir_type'] == 6] = 1 return ( reservoir_usgs_df_sub, reservoir_usgs_df_time, reservoir_usgs_update_time, reservoir_usgs_prev_persisted_flow, reservoir_usgs_persistence_update_time, reservoir_usgs_persistence_index, reservoir_usace_df_sub, reservoir_usace_df_time, reservoir_usace_update_time, reservoir_usace_prev_persisted_flow, reservoir_usace_persistence_update_time, reservoir_usace_persistence_index, reservoir_rfc_df_sub, reservoir_rfc_totalCounts, reservoir_rfc_file, reservoir_rfc_use_forecast, reservoir_rfc_timeseries_idx, reservoir_rfc_update_time, reservoir_rfc_da_timestep, reservoir_rfc_persist_days, + gl_df_sub, gl_parm_lake_id_sub, gl_param_flows_sub, gl_param_time_sub, gl_param_update_time_sub, gl_climatology_df_sub, waterbody_types_df_sub ) @@ -501,6 +529,9 @@ def compute_nhd_routing_v02( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, da_parameter_dict, assume_short_ts, return_courant, @@ -744,6 +775,12 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time, reservoir_rfc_da_timestep, reservoir_rfc_persist_days, + gl_df_sub, + gl_parm_lake_id_sub, + gl_param_flows_sub, + gl_param_time_sub, + gl_param_update_time_sub, + gl_climatology_df_sub, waterbody_types_df_sub, ) = _prep_reservoir_da_dataframes( reservoir_usgs_df, @@ -752,6 +789,9 @@ def compute_nhd_routing_v02( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, waterbody_types_df_sub, t0, from_files, @@ -818,6 +858,15 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time.astype("float32"), reservoir_rfc_da_timestep.astype("int32"), reservoir_rfc_persist_days.astype("int32"), + # Great Lakes DA data + gl_df_sub.lake_id.values.astype("int32"), + gl_df_sub.time.values.astype("int32"), + gl_df_sub.Discharge.values.astype("float32"), + gl_parm_lake_id_sub.astype("int32"), + gl_param_flows_sub.astype("float32"), + gl_param_time_sub.astype("int32"), + gl_param_update_time_sub.astype("int32"), + gl_climatology_df_sub.values.astype("float32"), { us: fvd for us, fvd in flowveldepth_interorder.items() @@ -1032,6 +1081,12 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time, reservoir_rfc_da_timestep, reservoir_rfc_persist_days, + gl_df_sub, + gl_parm_lake_id_sub, + gl_param_flows_sub, + gl_param_time_sub, + gl_param_update_time_sub, + gl_climatology_df_sub, waterbody_types_df_sub, ) = _prep_reservoir_da_dataframes( reservoir_usgs_df, @@ -1040,6 +1095,9 @@ def compute_nhd_routing_v02( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, waterbody_types_df_sub, t0, from_files, @@ -1104,6 +1162,15 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time.astype("float32"), reservoir_rfc_da_timestep.astype("int32"), reservoir_rfc_persist_days.astype("int32"), + # Great Lakes DA data + gl_df_sub.lake_id.values.astype("int32"), + gl_df_sub.time.values.astype("int32"), + gl_df_sub.Discharge.values.astype("float32"), + gl_parm_lake_id_sub.astype("int32"), + gl_param_flows_sub.astype("float32"), + gl_param_time_sub.astype("int32"), + gl_param_update_time_sub.astype("int32"), + gl_climatology_df_sub.values.astype("float32"), { us: fvd for us, fvd in flowveldepth_interorder.items() @@ -1235,6 +1302,12 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time, reservoir_rfc_da_timestep, reservoir_rfc_persist_days, + gl_df_sub, + gl_parm_lake_id_sub, + gl_param_flows_sub, + gl_param_time_sub, + gl_param_update_time_sub, + gl_climatology_df_sub, waterbody_types_df_sub, ) = _prep_reservoir_da_dataframes( reservoir_usgs_df, @@ -1243,6 +1316,9 @@ def compute_nhd_routing_v02( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, waterbody_types_df_sub, t0, from_files, @@ -1300,6 +1376,15 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time.astype("float32"), reservoir_rfc_da_timestep.astype("int32"), reservoir_rfc_persist_days.astype("int32"), + # Great Lakes DA data + gl_df_sub.lake_id.values.astype("int32"), + gl_df_sub.time.values.astype("int32"), + gl_df_sub.Discharge.values.astype("float32"), + gl_parm_lake_id_sub.astype("int32"), + gl_param_flows_sub.astype("float32"), + gl_param_time_sub.astype("int32"), + gl_param_update_time_sub.astype("int32"), + gl_climatology_df_sub.values.astype("float32"), {}, assume_short_ts, return_courant, @@ -1402,6 +1487,12 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time, reservoir_rfc_da_timestep, reservoir_rfc_persist_days, + gl_df_sub, + gl_parm_lake_id_sub, + gl_param_flows_sub, + gl_param_time_sub, + gl_param_update_time_sub, + gl_climatology_df_sub, waterbody_types_df_sub, ) = _prep_reservoir_da_dataframes( reservoir_usgs_df, @@ -1410,6 +1501,9 @@ def compute_nhd_routing_v02( reservoir_usace_param_df, reservoir_rfc_df, reservoir_rfc_param_df, + great_lakes_df, + great_lakes_param_df, + great_lakes_climatology_df, waterbody_types_df_sub, t0, from_files, @@ -1466,6 +1560,15 @@ def compute_nhd_routing_v02( reservoir_rfc_update_time.astype("float32"), reservoir_rfc_da_timestep.astype("int32"), reservoir_rfc_persist_days.astype("int32"), + # Great Lakes DA data + gl_df_sub.lake_id.values.astype("int32"), + gl_df_sub.time.values.astype("int32"), + gl_df_sub.Discharge.values.astype("float32"), + gl_parm_lake_id_sub.astype("int32"), + gl_param_flows_sub.astype("float32"), + gl_param_time_sub.astype("int32"), + gl_param_update_time_sub.astype("int32"), + gl_climatology_df_sub.values.astype("float32"), {}, assume_short_ts, return_courant, @@ -1764,7 +1867,7 @@ def compute_diffusive_routing( rch_list[~x], dat_all[~x,3:], 0, # place-holder for streamflow DA parameters (np.asarray([]), np.asarray([]), np.asarray([])), - # place-holder for reservoir DA paramters + # place-holder for reservoir DA parameters (np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])), (np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])), # place holder for reservoir inflows @@ -1773,6 +1876,8 @@ def compute_diffusive_routing( (np.asarray([]), np.asarray([]), np.asarray([])), # place-holder for nudge values (np.empty(shape=(0, nts + 1), dtype='float32')), + # place-holder for great lakes DA values/parameters + (np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])), ) ) diff --git a/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx b/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx index 5d37e3f70..0d205d98e 100644 --- a/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx +++ b/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx @@ -19,6 +19,7 @@ from troute.network.reservoirs.levelpool.levelpool cimport MC_Levelpool, run_lp_ from troute.network.reservoirs.rfc.rfc cimport MC_RFC, run_rfc_c from troute.routing.fast_reach.reservoir_hybrid_da import reservoir_hybrid_da from troute.routing.fast_reach.reservoir_RFC_da import reservoir_RFC_da +from troute.routing.fast_reach.reservoir_GL_da import great_lakes_da from cython.parallel import prange #import cProfile @@ -207,6 +208,14 @@ cpdef object compute_network_structured( const float[:] reservoir_rfc_update_time, const int[:] reservoir_rfc_da_timestep, const int[:] reservoir_rfc_persist_days, + const int[:] great_lakes_idx, + const int[:] great_lakes_times, + const float[:] great_lakes_discharge, + const int[:] great_lakes_param_idx, + const float[:] great_lakes_param_prev_assim_flow, + const int[:] great_lakes_param_prev_assim_times, + const int[:] great_lakes_param_update_times, + const float[:,:] great_lakes_climatology, dict upstream_results={}, bint assume_short_ts=False, bint return_courant=False, @@ -425,6 +434,17 @@ cpdef object compute_network_structured( cdef np.ndarray[float, ndim=1] usace_prev_persistence_index = np.asarray(reservoir_usace_persistence_index) cdef np.ndarray[int, ndim=1] rfc_timeseries_idx = np.asarray(reservoir_rfc_timeseries_idx) + # great lakes arrays + cdef np.ndarray[int, ndim=1] gl_idx = np.asarray(great_lakes_idx) + cdef np.ndarray[float, ndim=1] gl_obs = np.asarray(great_lakes_discharge) + cdef np.ndarray[int, ndim=1] gl_times = np.asarray(great_lakes_times) + cdef np.ndarray[int, ndim=1] gl_param_idx = np.asarray(great_lakes_param_idx) + cdef np.ndarray[int, ndim=1] gl_update_time = np.asarray(great_lakes_param_update_times) + cdef np.ndarray[float, ndim=1] gl_prev_assim_ouflow = np.asarray(great_lakes_param_prev_assim_flow) + cdef np.ndarray[int, ndim=1] gl_prev_assim_timestamp = np.asarray(great_lakes_param_prev_assim_times) + cdef np.ndarray[float, ndim=2] gl_climatology = np.asarray(great_lakes_climatology) + + #--------------------------------------------------------------------------------------------- #--------------------------------------------------------------------------------------------- @@ -486,168 +506,208 @@ cpdef object compute_network_structured( if r.type == compute_type.RESERVOIR_LP: - # water elevation before levelpool calculation - initial_water_elevation = r.reach.lp.water_elevation - - # levelpool reservoir storage/outflow calculation - run_lp_c(r, upstream_flows, 0.0, routing_period, &reservoir_outflow, &reservoir_water_elevation) - - # USGS reservoir hybrid DA inputs - if r.reach.lp.wbody_type_code == 2: - # find index location of waterbody in reservoir_usgs_obs - # and reservoir_usgs_time - res_idx = np.where(usgs_idx == r.reach.lp.lake_number) - wbody_gage_obs = reservoir_usgs_obs[res_idx[0][0],:] - wbody_gage_time = reservoir_usgs_time - prev_persisted_outflow = usgs_prev_persisted_ouflow[res_idx[0][0]] - persistence_update_time = usgs_persistence_update_time[res_idx[0][0]] - persistence_index = usgs_prev_persistence_index[res_idx[0][0]] - update_time = usgs_update_time[res_idx[0][0]] - - # USACE reservoir hybrid DA inputs - if r.reach.lp.wbody_type_code == 3: - # find index location of waterbody in reservoir_usgs_obs - # and reservoir_usgs_time - res_idx = np.where(usace_idx == r.reach.lp.lake_number) - wbody_gage_obs = reservoir_usace_obs[res_idx[0][0],:] - wbody_gage_time = reservoir_usace_time - prev_persisted_outflow = usace_prev_persisted_ouflow[res_idx[0][0]] - persistence_update_time = usace_persistence_update_time[res_idx[0][0]] - persistence_index = usace_prev_persistence_index[res_idx[0][0]] - update_time = usace_update_time[res_idx[0][0]] - - # Execute reservoir DA - both USGS(2) and USACE(3) types - if r.reach.lp.wbody_type_code == 2 or r.reach.lp.wbody_type_code == 3: - - #print('***********************************************************') - #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) - #print('before DA, simulated outflow = ', reservoir_outflow) - #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation) - + # Great Lake waterbody: doesn't actually route anything, default outflows + # are from climatology. + if r.reach.lp.wbody_type_code == 6: + # find index location of waterbody in great_lakes_df + # and great_lakes_param_df + res_idx = np.where(gl_idx == r.reach.lp.lake_number) + wbody_gage_obs = gl_obs[res_idx[0]] + wbody_gage_time = gl_times[res_idx[0]] + res_param_idx = np.where(gl_param_idx == r.reach.lp.lake_number) + param_prev_assim_flow = gl_prev_assim_ouflow[res_param_idx[0][0]] + param_prev_assim_timestamp = gl_prev_assim_timestamp[res_param_idx[0][0]] + param_update_time = gl_update_time[res_param_idx[0][0]] + climatology = gl_climatology[res_param_idx[0][0],:] + (new_outflow, - new_persisted_outflow, - new_water_elevation, - new_update_time, - new_persistence_index, - new_persistence_update_time - ) = reservoir_hybrid_da( - r.reach.lp.lake_number, # lake identification number - wbody_gage_obs, # gage observation values (cms) - wbody_gage_time, # gage observation times (sec) - dt * timestep, # model time (sec) - prev_persisted_outflow, # previously persisted outflow (cms) - persistence_update_time, - persistence_index, # number of sequentially persisted update cycles - reservoir_outflow, # levelpool simulated outflow (cms) - upstream_flows, # waterbody inflow (cms) - dt, # model timestep (sec) - r.reach.lp.area, # waterbody surface area (km2) - r.reach.lp.max_depth, # max waterbody depth (m) - r.reach.lp.orifice_elevation, # orifice elevation (m) - initial_water_elevation, # water surface el., previous timestep (m) - 48.0, # gage lookback hours (hrs) - update_time # waterbody update time (sec) + new_assimilated_outflow, + new_assimilated_timestamp, + new_update_time + ) = great_lakes_da( + wbody_gage_obs, # gage observations (cms) + wbody_gage_time, # timestamps of gage observations (datetime str) + param_prev_assim_flow, # last used observation (cms) + param_prev_assim_timestamp, # timestamp of last used observation (datetime str) + param_update_time, # timestamp to determine whether to look for new observation (datetime str) + model_start_time, # model initilaization time (datetime str) + dt * timestep, # model time (sec) + climatology, # climatology outflows (cms) ) + + gl_update_time[res_param_idx[0][0]] = new_update_time + gl_prev_assim_ouflow[res_param_idx[0][0]] = new_assimilated_outflow + gl_prev_assim_timestamp[res_param_idx[0][0]] = new_assimilated_timestamp + + # populate flowveldepth array with levelpool or hybrid DA results + flowveldepth[r.id, timestep, 0] = new_outflow + flowveldepth[r.id, timestep, 1] = 0.0 + flowveldepth[r.id, timestep, 2] = 0.0 + upstream_array[r.id, timestep, 0] = upstream_flows + + else: + # water elevation before levelpool calculation + initial_water_elevation = r.reach.lp.water_elevation - #print('After DA, outflow = ', new_outflow) - #print('After DA, water elevation =', new_water_elevation) - - # update levelpool water elevation state - update_lp_c(r, new_water_elevation, &reservoir_water_elevation) - - # change reservoir_outflow - reservoir_outflow = new_outflow - - #print('confirming DA elevation replacement:', reservoir_water_elevation) - #print('===========================================================') - - # update USGS DA reservoir state arrays - if r.reach.lp.wbody_type_code == 2: - usgs_update_time[res_idx[0][0]] = new_update_time - usgs_prev_persisted_ouflow[res_idx[0][0]] = new_persisted_outflow - usgs_prev_persistence_index[res_idx[0][0]] = new_persistence_index - usgs_persistence_update_time[res_idx[0][0]] = new_persistence_update_time - - # update USACE DA reservoir state arrays - if r.reach.lp.wbody_type_code == 3: - usace_update_time[res_idx[0][0]] = new_update_time - usace_prev_persisted_ouflow[res_idx[0][0]] = new_persisted_outflow - usace_prev_persistence_index[res_idx[0][0]] = new_persistence_index - usace_persistence_update_time[res_idx[0][0]] = new_persistence_update_time - - - # RFC reservoir hybrid DA inputs - if r.reach.lp.wbody_type_code == 4: - # find index location of waterbody in reservoir_rfc_obs - # and reservoir_rfc_time - res_idx = np.where(rfc_idx == r.reach.lp.lake_number) - wbody_gage_obs = reservoir_rfc_obs[res_idx[0][0],:] - totalCounts = reservoir_rfc_totalCounts[res_idx[0][0]] - rfc_file = reservoir_rfc_file[res_idx[0][0]] - use_RFC = reservoir_rfc_use_forecast[res_idx[0][0]] - current_timeseries_idx = rfc_timeseries_idx[res_idx[0][0]] - update_time = rfc_update_time[res_idx[0][0]] - rfc_timestep = reservoir_rfc_da_timestep[res_idx[0][0]] - rfc_persist_days = reservoir_rfc_persist_days[res_idx[0][0]] - - # Execute RFC reservoir DA - both RFC(4) and Glacially Dammed Lake(5) types - if r.reach.lp.wbody_type_code == 4 or r.reach.lp.wbody_type_code == 5: + # levelpool reservoir storage/outflow calculation + run_lp_c(r, upstream_flows, 0.0, routing_period, &reservoir_outflow, &reservoir_water_elevation) - #print('***********************************************************') - #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) - #print('before DA, simulated outflow = ', reservoir_outflow) - #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation) + # USGS reservoir hybrid DA inputs + if r.reach.lp.wbody_type_code == 2: + # find index location of waterbody in reservoir_usgs_obs + # and reservoir_usgs_time + res_idx = np.where(usgs_idx == r.reach.lp.lake_number) + wbody_gage_obs = reservoir_usgs_obs[res_idx[0][0],:] + wbody_gage_time = reservoir_usgs_time + prev_persisted_outflow = usgs_prev_persisted_ouflow[res_idx[0][0]] + persistence_update_time = usgs_persistence_update_time[res_idx[0][0]] + persistence_index = usgs_prev_persistence_index[res_idx[0][0]] + update_time = usgs_update_time[res_idx[0][0]] - ( - new_outflow, + # USACE reservoir hybrid DA inputs + if r.reach.lp.wbody_type_code == 3: + # find index location of waterbody in reservoir_usgs_obs + # and reservoir_usgs_time + res_idx = np.where(usace_idx == r.reach.lp.lake_number) + wbody_gage_obs = reservoir_usace_obs[res_idx[0][0],:] + wbody_gage_time = reservoir_usace_time + prev_persisted_outflow = usace_prev_persisted_ouflow[res_idx[0][0]] + persistence_update_time = usace_persistence_update_time[res_idx[0][0]] + persistence_index = usace_prev_persistence_index[res_idx[0][0]] + update_time = usace_update_time[res_idx[0][0]] + + # Execute reservoir DA - both USGS(2) and USACE(3) types + if r.reach.lp.wbody_type_code == 2 or r.reach.lp.wbody_type_code == 3: + + #print('***********************************************************') + #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) + #print('before DA, simulated outflow = ', reservoir_outflow) + #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation) + + (new_outflow, + new_persisted_outflow, new_water_elevation, - new_update_time, - new_timeseries_idx, - dynamic_reservoir_type, - assimilated_value, - assimilated_source_file, - ) = reservoir_RFC_da( - use_RFC, # boolean whether to use RFC values or not - wbody_gage_obs, # gage observation values (cms) - current_timeseries_idx, # index of for current time series observation - totalCounts, # total number of observations in RFC timeseries - routing_period, # routing period (sec) - dt * timestep, # model time (sec) - update_time, # time to advance to next time series index - rfc_timestep, # frequency of DA observations (sec) - rfc_persist_days*24*60*60, # max seconds RFC forecasts will be used/persisted (days -> seconds) - r.reach.lp.wbody_type_code, # reservoir type - upstream_flows, # waterbody inflow (cms) - initial_water_elevation, # water surface el., previous timestep (m) - reservoir_outflow, # levelpool simulated outflow (cms) - reservoir_water_elevation, # levelpool simulated water elevation (m) - r.reach.lp.area*1.0e6, # waterbody surface area (km2 -> m2) - r.reach.lp.max_depth, # max waterbody depth (m) - rfc_file, # RFC file name - ) + new_update_time, + new_persistence_index, + new_persistence_update_time + ) = reservoir_hybrid_da( + r.reach.lp.lake_number, # lake identification number + wbody_gage_obs, # gage observation values (cms) + wbody_gage_time, # gage observation times (sec) + dt * timestep, # model time (sec) + prev_persisted_outflow, # previously persisted outflow (cms) + persistence_update_time, + persistence_index, # number of sequentially persisted update cycles + reservoir_outflow, # levelpool simulated outflow (cms) + upstream_flows, # waterbody inflow (cms) + dt, # model timestep (sec) + r.reach.lp.area, # waterbody surface area (km2) + r.reach.lp.max_depth, # max waterbody depth (m) + r.reach.lp.orifice_elevation, # orifice elevation (m) + initial_water_elevation, # water surface el., previous timestep (m) + 48.0, # gage lookback hours (hrs) + update_time # waterbody update time (sec) + ) + + #print('After DA, outflow = ', new_outflow) + #print('After DA, water elevation =', new_water_elevation) + + # update levelpool water elevation state + update_lp_c(r, new_water_elevation, &reservoir_water_elevation) + + # change reservoir_outflow + reservoir_outflow = new_outflow + + #print('confirming DA elevation replacement:', reservoir_water_elevation) + #print('===========================================================') + + # update USGS DA reservoir state arrays + if r.reach.lp.wbody_type_code == 2: + usgs_update_time[res_idx[0][0]] = new_update_time + usgs_prev_persisted_ouflow[res_idx[0][0]] = new_persisted_outflow + usgs_prev_persistence_index[res_idx[0][0]] = new_persistence_index + usgs_persistence_update_time[res_idx[0][0]] = new_persistence_update_time + + # update USACE DA reservoir state arrays + if r.reach.lp.wbody_type_code == 3: + usace_update_time[res_idx[0][0]] = new_update_time + usace_prev_persisted_ouflow[res_idx[0][0]] = new_persisted_outflow + usace_prev_persistence_index[res_idx[0][0]] = new_persistence_index + usace_persistence_update_time[res_idx[0][0]] = new_persistence_update_time + + + # RFC reservoir hybrid DA inputs + if r.reach.lp.wbody_type_code == 4: + # find index location of waterbody in reservoir_rfc_obs + # and reservoir_rfc_time + res_idx = np.where(rfc_idx == r.reach.lp.lake_number) + wbody_gage_obs = reservoir_rfc_obs[res_idx[0][0],:] + totalCounts = reservoir_rfc_totalCounts[res_idx[0][0]] + rfc_file = reservoir_rfc_file[res_idx[0][0]] + use_RFC = reservoir_rfc_use_forecast[res_idx[0][0]] + current_timeseries_idx = rfc_timeseries_idx[res_idx[0][0]] + update_time = rfc_update_time[res_idx[0][0]] + rfc_timestep = reservoir_rfc_da_timestep[res_idx[0][0]] + rfc_persist_days = reservoir_rfc_persist_days[res_idx[0][0]] + + # Execute RFC reservoir DA - both RFC(4) and Glacially Dammed Lake(5) types + if r.reach.lp.wbody_type_code == 4 or r.reach.lp.wbody_type_code == 5: + + #print('***********************************************************') + #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) + #print('before DA, simulated outflow = ', reservoir_outflow) + #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation) + + ( + new_outflow, + new_water_elevation, + new_update_time, + new_timeseries_idx, + dynamic_reservoir_type, + assimilated_value, + assimilated_source_file, + ) = reservoir_RFC_da( + use_RFC, # boolean whether to use RFC values or not + wbody_gage_obs, # gage observation values (cms) + current_timeseries_idx, # index of for current time series observation + totalCounts, # total number of observations in RFC timeseries + routing_period, # routing period (sec) + dt * timestep, # model time (sec) + update_time, # time to advance to next time series index + rfc_timestep, # frequency of DA observations (sec) + rfc_persist_days*24*60*60, # max seconds RFC forecasts will be used/persisted (days -> seconds) + r.reach.lp.wbody_type_code, # reservoir type + upstream_flows, # waterbody inflow (cms) + initial_water_elevation, # water surface el., previous timestep (m) + reservoir_outflow, # levelpool simulated outflow (cms) + reservoir_water_elevation, # levelpool simulated water elevation (m) + r.reach.lp.area*1.0e6, # waterbody surface area (km2 -> m2) + r.reach.lp.max_depth, # max waterbody depth (m) + rfc_file, # RFC file name + ) - #print('After DA, outflow = ', new_outflow) - #print('After DA, water elevation =', new_water_elevation) - - # update levelpool water elevation state - update_lp_c(r, new_water_elevation, &reservoir_water_elevation) - - # change reservoir_outflow - reservoir_outflow = new_outflow - - #print('confirming DA elevation replacement:', reservoir_water_elevation) - #print('===========================================================') - - # update RFC DA reservoir state arrays - rfc_update_time[res_idx[0][0]] = new_update_time - rfc_timeseries_idx[res_idx[0][0]] = new_timeseries_idx + #print('After DA, outflow = ', new_outflow) + #print('After DA, water elevation =', new_water_elevation) + + # update levelpool water elevation state + update_lp_c(r, new_water_elevation, &reservoir_water_elevation) + + # change reservoir_outflow + reservoir_outflow = new_outflow + + #print('confirming DA elevation replacement:', reservoir_water_elevation) + #print('===========================================================') + + # update RFC DA reservoir state arrays + rfc_update_time[res_idx[0][0]] = new_update_time + rfc_timeseries_idx[res_idx[0][0]] = new_timeseries_idx + - - # populate flowveldepth array with levelpool or hybrid DA results - flowveldepth[r.id, timestep, 0] = reservoir_outflow - flowveldepth[r.id, timestep, 1] = 0.0 - flowveldepth[r.id, timestep, 2] = reservoir_water_elevation - upstream_array[r.id, timestep, 0] = upstream_flows + # populate flowveldepth array with levelpool or hybrid DA results + flowveldepth[r.id, timestep, 0] = reservoir_outflow + flowveldepth[r.id, timestep, 1] = 0.0 + flowveldepth[r.id, timestep, 2] = reservoir_water_elevation + upstream_array[r.id, timestep, 0] = upstream_flows elif r.type == compute_type.RESERVOIR_RFC: run_rfc_c(r, upstream_flows, 0.0, routing_period, &reservoir_outflow, &reservoir_water_elevation) @@ -775,5 +835,11 @@ cpdef object compute_network_structured( rfc_idx, rfc_update_time-((timestep-1)*dt), rfc_timeseries_idx ), - np.asarray(nudge) + np.asarray(nudge), + ( + gl_param_idx, + gl_prev_assim_ouflow, + gl_prev_assim_timestamp, + gl_update_time + ) ) \ No newline at end of file diff --git a/src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py b/src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py new file mode 100644 index 000000000..6683e9ee3 --- /dev/null +++ b/src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py @@ -0,0 +1,131 @@ +import numpy as np +import logging +from datetime import datetime, timedelta +LOG = logging.getLogger('') + +def great_lakes_da( + gage_obs, + gage_time, + previous_assimilated_outflow, + previous_assimilated_time, + update_time, + t0, + now, + climatology_outflows, + update_time_interval = 3600, + persistence_limit = 11, +): + + """ + Perform persistence reservoir data assimilation for the Great Lakes. + + Arguments + --------- + - lake_number (int): unique lake identification + number + - gage_obs (memoryview slice): array of gage observations for + at the gage associated with this + waterbody + - gage_time (memoryview slice): array of observation times + (secs) relative to the model + initialization time (t0). + - t0 (str): Initialization time (t0). + - now (float): Current time, seconds since in- + itialization time (t0). + - previous_persisted_outflow (numpy.float32): Persisted outflow value from + last timestep. + - obs_lookback_hours (int): Maximum allowable lookback time + when searching for new outflow + observations + - update_time (numpy.float32): time to search for new observ- + ations, seconds since model + initialization time (t0) + - update_time_interval (float): Time interval from current to + next update time (secs) + + Returns + ------- + - outflow (float): Persisted reservoir outflow rate (m3/sec) + """ + + # LOG.debug('Great Lakes data assimilation for lake_id: %s at time %s from run start' % (lake_number, now)) + + # set new values as old values to start: + new_assimilated_outflow = previous_assimilated_outflow + new_assimilated_time = previous_assimilated_time + new_update_time = update_time + + # determine which climatology value to use based on model time + t0_datetime = datetime.strptime(t0, '%Y-%m-%d_%H:%M:%S') + now_datetime = t0_datetime + timedelta(seconds=now) + month_idx = now_datetime.month - 1 # subtract 1 for python indexing + climatology_outflow = climatology_outflows[month_idx] + + if np.isnan(previous_assimilated_outflow): + previous_assimilated_outflow = climatology_outflow + + if now >= update_time: + # LOG.debug( + # 'Looking for observation to assimilate...' + # ) + + # initialize variable to store assimilated observations. We initialize + # as np.nan, so that if no good quality observations are found, we can + # easily catch it. + obs = np.nan + + # identify location of gage_time that is nearest to, but not greater + # than the update time + t_idxs = np.nonzero(((now - gage_time) >= 0))[0] + if len(t_idxs)>0: + t_idx = t_idxs[-1] + + # record good observation to obs + obs = gage_obs[t_idx] + + # determine how many seconds prior to the update_time the + # observation was taken + t_obs = gage_time[t_idx] + gage_lookback_seconds = now - t_obs + + if np.isnan(obs): + ''' + - If no good observation is found, then we do not update the + update time. Consequently we will continue to search for a good + observation at each model timestep, before updating the update + time. + ''' + # LOG.debug( + # 'No good observation found, persisting previously assimilated flow' + # ) + outflow = previous_assimilated_outflow + + elif gage_lookback_seconds > (persistence_limit*60*60*24): + ''' + If a good observation was found, but the time difference between + the current model time and the observation timestamp is greater than + the persistence limit, default to climatology. + ''' + outflow = climatology_outflow + + else: + ''' + A good observation is found and it is within the persistence limit. + ''' + outflow = obs + new_assimilated_outflow = obs + new_assimilated_time = t_obs + new_update_time = update_time + update_time_interval + + else: + outflow = previous_assimilated_outflow + + if (now - previous_assimilated_time) > (persistence_limit*60*60*24): + ''' + If the time difference between the current model time and the + observation timestamp is greater than + the persistence limit, default to climatology. + ''' + outflow = climatology_outflow + + return outflow, new_assimilated_outflow, new_assimilated_time, new_update_time \ No newline at end of file diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml index b8a906517..c53cd8412 100644 --- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml +++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml @@ -95,9 +95,9 @@ compute_parameters: reservoir_parameter_file : domain/reservoir_index_AnA.nc reservoir_persistence_da: #---------- - reservoir_persistence_usgs : False - reservoir_persistence_usace : False - reservoir_persistence_greatLake : True + reservoir_persistence_usgs : True + reservoir_persistence_usace : True + reservoir_persistence_greatLake : False reservoir_rfc_da: #---------- reservoir_rfc_forecasts : False diff --git a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml index 0e00ef5cd..6105673af 100644 --- a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml +++ b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml @@ -113,7 +113,7 @@ compute_parameters: #---------- reservoir_persistence_usgs : True reservoir_persistence_usace : True - reservoir_persistence_greatLake : True + reservoir_persistence_greatLake : False reservoir_rfc_da: #---------- reservoir_rfc_forecasts : True From c2cd1b4f61773942f80f04b860d38406345fe4fc Mon Sep 17 00:00:00 2001 From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com> Date: Thu, 1 Aug 2024 18:19:11 -0400 Subject: [PATCH 17/29] speeding up Parquet_output function by vectorizing (#816) * speeding up Parquet_output function by vectorizing * updating the index * delete pdb --- src/troute-nwm/src/nwm_routing/output.py | 63 +++++++++++++++++------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 38a6e6547..893825015 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -64,28 +64,53 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration, pref -------- - timeseries_df (DataFrame): Converted timeseries data frame ''' - - df.index.name = 'location_id' - df.reset_index(inplace=True) - timeseries_df = df.melt(id_vars=['location_id'], var_name='var') - timeseries_df['var'] = timeseries_df['var'].astype('string') - timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True) - timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "") - timeseries_df['timestep'] = timeseries_df['timestep'].astype('int') - timeseries_df['value_time'] = (start_datetime + pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s')) variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"} - timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map) - timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True) - timeseries_df['configuration'] = configuration variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"} - timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map) - timeseries_df['reference_time'] = start_datetime.date() - timeseries_df['location_id'] = timeseries_df['location_id'].astype('string') - timeseries_df['location_id'] = prefix_ids + '-' + timeseries_df['location_id'] - timeseries_df['value'] = timeseries_df['value'].astype('double') - timeseries_df['reference_time'] = timeseries_df['reference_time'].astype('datetime64[us]') - timeseries_df['value_time'] = timeseries_df['value_time'].astype('datetime64[us]') + # Prepare the location_id with prefix + df.index.name = 'location_id' + df.reset_index(inplace=True) + location_ids = prefix_ids + '-' + df['location_id'].astype(str) + + # Flatten the dataframe using NumPy + num_locations = df.shape[0] + num_time_variables = df.shape[1] - 1 + num_records = num_locations * num_time_variables + + # Prepare timestep and variable arrays + times = df.columns[1:] + timesteps = np.array([t[0] for t in times], dtype=int) + variables = np.array([t[1] for t in times]) + + # Preallocate arrays + location_ids_repeated = np.tile(location_ids, num_time_variables) + value_time = np.empty(num_records, dtype='datetime64[us]') + variable_names = np.empty(num_records, dtype=object) + units = np.empty(num_records, dtype=object) + values = np.empty(num_records, dtype=float) + + # Calculate value_time, variable_names, units, and values in a vectorized manner + for i in range(num_time_variables): + start_idx = i * num_locations + end_idx = start_idx + num_locations + value_time[start_idx:end_idx] = start_datetime + pd.to_timedelta(timesteps[i] * dt, unit='s') + variable_name = variable_to_name_map[variables[i]] + unit = variable_to_units_map[variable_name] + variable_names[start_idx:end_idx] = variable_name + units[start_idx:end_idx] = unit + values[start_idx:end_idx] = df.iloc[:, i + 1].values + + # Create the resulting DataFrame + timeseries_df = pd.DataFrame({ + 'location_id': location_ids_repeated, + 'value': values, + 'value_time': value_time, + 'variable_name': variable_names, + 'units': units, + 'reference_time': start_datetime.date(), + 'configuration': configuration + }) + return timeseries_df From 5865910b73865bedf281cd79d7de4463675a2834 Mon Sep 17 00:00:00 2001 From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com> Date: Tue, 6 Aug 2024 08:28:59 -0400 Subject: [PATCH 18/29] updating time on csv output (#818) --- src/troute-network/troute/nhd_io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py index 539e661aa..b948b379f 100644 --- a/src/troute-network/troute/nhd_io.py +++ b/src/troute-network/troute/nhd_io.py @@ -2076,6 +2076,9 @@ def write_flowveldepth_csv_pkl(stream_output_directory, file_name, # Concatenate all temporary DataFrames vertically df = pd.concat(df_list) df.index.name = 'feature_id' + + df['current_time'] = pd.to_datetime(df['t0']) + pd.to_timedelta(df['time']) + df = df[['current_time', 'flow', 'velocity', 'depth', 'nudge']] # File format handling file_format = file_name.split('.')[-1] From f8e9f4521156456b44483d9216d37c488043da74 Mon Sep 17 00:00:00 2001 From: shorvath-noaa <103054653+shorvath-noaa@users.noreply.github.com> Date: Wed, 7 Aug 2024 14:08:46 -0600 Subject: [PATCH 19/29] update default values for parquet output (#821) * update default values for parquet output * remove 'Optional' from parameters --- src/troute-config/troute/config/output_parameters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index 983c1d9fb..f9f35a600 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -53,8 +53,8 @@ class ParquetOutput(BaseModel): # NOTE: required if writing results to parquet parquet_output_folder: Optional[DirectoryPath] = None parquet_output_segments: Optional[List[str]] = None - configuration: Optional[str] = None - prefix_ids: Optional[str] = None + configuration: str = 'None' + prefix_ids: str = 'wb-' class ChrtoutOutput(BaseModel): From 70a2d0c68f270cc22ad59d7635e7d4fa0cb21170 Mon Sep 17 00:00:00 2001 From: Austin Raney Date: Mon, 12 Aug 2024 13:33:13 -0400 Subject: [PATCH 20/29] fix: parquet prefix_ids defaults to 'wb' --- src/troute-config/troute/config/output_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py index f9f35a600..f56c89f6a 100644 --- a/src/troute-config/troute/config/output_parameters.py +++ b/src/troute-config/troute/config/output_parameters.py @@ -54,7 +54,7 @@ class ParquetOutput(BaseModel): parquet_output_folder: Optional[DirectoryPath] = None parquet_output_segments: Optional[List[str]] = None configuration: str = 'None' - prefix_ids: str = 'wb-' + prefix_ids: str = 'wb' class ChrtoutOutput(BaseModel): From 231e0f70b5289a1fe881c8ab260cc3e079165202 Mon Sep 17 00:00:00 2001 From: AminTorabi-NOAA Date: Wed, 7 Aug 2024 12:42:24 -0400 Subject: [PATCH 21/29] Optimizing read_geopkg function --- .../troute/HYFeaturesNetwork.py | 50 +++++++------------ 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py index 85644f3d1..a4b0927ea 100644 --- a/src/troute-network/troute/HYFeaturesNetwork.py +++ b/src/troute-network/troute/HYFeaturesNetwork.py @@ -27,8 +27,7 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool): # If waterbodies are being simulated, read lakes table if waterbody_parameters.get('break_network_at_waterbodies',False): - layers.append('lakes') - layers.append('nexus') + layers.extend(['lakes', 'nexus']) # If any DA is activated, read network table as well for gage information data_assimilation_parameters = compute_parameters.get('data_assimilation_parameters',{}) @@ -44,40 +43,27 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool): hybrid_routing = hybrid_parameters.get('run_hybrid_routing', False) if hybrid_routing & ('nexus' not in layers): layers.append('nexus') - + + # Define a function to read a layer from the geopackage + def read_layer(layer): + try: + return gpd.read_file(file_path, layer=layer) + except Exception as e: + return pd.DataFrame() + # Retrieve geopackage information: if cpu_pool > 1: - with Parallel(n_jobs=len(layers)) as parallel: - jobs = [] - for layer in layers: - jobs.append( - delayed(gpd.read_file) - #(f, qlat_file_value_col, gw_bucket_col, terrain_ro_col) - #delayed(nhd_io.get_ql_from_csv) - (filename=file_path, layer=layer) - ) - gpkg_list = parallel(jobs) + with Parallel(n_jobs=min(cpu_pool, len(layers))) as parallel: + gpkg_list = parallel(delayed(read_layer)(layer) for layer in layers) + table_dict = {layers[i]: gpkg_list[i] for i in range(len(layers))} - flowpaths = pd.merge(table_dict.get('flowpaths'), table_dict.get('flowpath_attributes'), on='id') - lakes = table_dict.get('lakes', pd.DataFrame()) - network = table_dict.get('network', pd.DataFrame()) - nexus = table_dict.get('nexus', pd.DataFrame()) else: - flowpaths = gpd.read_file(file_path, layer='flowpaths') - flowpath_attributes = gpd.read_file(file_path, layer='flowpath_attributes') - flowpaths = pd.merge(flowpaths, flowpath_attributes, on='id') - # If waterbodies are being simulated, read lakes table - lakes = pd.DataFrame() - if 'lakes' in layers: - lakes = gpd.read_file(file_path, layer='lakes') - # If any DA is activated, read network table as well for gage information - network = pd.DataFrame() - if 'network' in layers: - network = gpd.read_file(file_path, layer='network') - # If diffusive is activated, read nexus table for lat/lon information - nexus = pd.DataFrame() - if 'nexus' in layers: - nexus = gpd.read_file(file_path, layer='nexus') + table_dict = {layer: read_layer(layer) for layer in layers} + + flowpaths = pd.merge(table_dict.get('flowpaths', pd.DataFrame()), table_dict.get('flowpath_attributes', pd.DataFrame()), on='id', how='inner') + lakes = table_dict.get('lakes', pd.DataFrame()) + network = table_dict.get('network', pd.DataFrame()) + nexus = table_dict.get('nexus', pd.DataFrame()) return flowpaths, lakes, network, nexus From 999a456bb09514fca5e29fe99cb837d1288b272e Mon Sep 17 00:00:00 2001 From: AminTorabi-NOAA Date: Wed, 14 Aug 2024 12:48:43 -0400 Subject: [PATCH 22/29] reading new geopackage and renaming with regex --- .../troute/HYFeaturesNetwork.py | 113 ++++++++++++------ 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py index a4b0927ea..4466afcd8 100644 --- a/src/troute-network/troute/HYFeaturesNetwork.py +++ b/src/troute-network/troute/HYFeaturesNetwork.py @@ -13,54 +13,93 @@ from datetime import datetime from pprint import pformat import os - +import fiona import troute.nhd_io as nhd_io #FIXME from troute.nhd_network import reverse_dict, extract_connections, reverse_network, reachable from .rfc_lake_gage_crosswalk import get_rfc_lake_gage_crosswalk, get_great_lakes_climatology - +import re __verbose__ = False __showtiming__ = False +def find_layer_name(layers, pattern): + """ + Find a layer name in the list of layers that matches the regex pattern. + """ + for layer in layers: + if re.search(pattern, layer, re.IGNORECASE): + return layer + return None + def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool): - # Establish which layers we will read. We'll always need the flowpath tables - layers = ['flowpaths','flowpath_attributes'] - - # If waterbodies are being simulated, read lakes table - if waterbody_parameters.get('break_network_at_waterbodies',False): - layers.extend(['lakes', 'nexus']) - - # If any DA is activated, read network table as well for gage information - data_assimilation_parameters = compute_parameters.get('data_assimilation_parameters',{}) - streamflow_nudging = data_assimilation_parameters.get('streamflow_da',{}).get('streamflow_nudging',False) - usgs_da = data_assimilation_parameters.get('reservoir_da',{}).get('reservoir_persistence_usgs',False) - usace_da = data_assimilation_parameters.get('reservoir_da',{}).get('reservoir_persistence_usace',False) - rfc_da = data_assimilation_parameters.get('reservoir_da',{}).get('reservoir_rfc_da',{}).get('reservoir_rfc_forecasts',False) - if any([streamflow_nudging, usgs_da, usace_da, rfc_da]): - layers.append('network') + # Retrieve available layers from the GeoPackage + available_layers = fiona.listlayers(file_path) + + # patterns for the layers we want to find + layer_patterns = { + 'flowpaths': r'flow[-_]?paths?', + 'flowpath_attributes': r'flow[-_]?path[-_]?attributes?', + 'lakes': r'lakes?', + 'nexus': r'nexus?', + 'network': r'network' + } + + # Match available layers to the patterns + matched_layers = {key: find_layer_name(available_layers, pattern) + for key, pattern in layer_patterns.items()} - # If diffusive is activated, read nexus table for lat/lon information - hybrid_parameters = compute_parameters.get('hybrid_parameters',{}) - hybrid_routing = hybrid_parameters.get('run_hybrid_routing', False) - if hybrid_routing & ('nexus' not in layers): - layers.append('nexus') - - # Define a function to read a layer from the geopackage - def read_layer(layer): - try: - return gpd.read_file(file_path, layer=layer) - except Exception as e: - return pd.DataFrame() - - # Retrieve geopackage information: + layers_to_read = ['flowpaths', 'flowpath_attributes'] + + if waterbody_parameters.get('break_network_at_waterbodies', False): + layers_to_read.extend(['lakes', 'nexus']) + + data_assimilation_parameters = compute_parameters.get('data_assimilation_parameters', {}) + if any([ + data_assimilation_parameters.get('streamflow_da', {}).get('streamflow_nudging', False), + data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_usgs', False), + data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_usace', False), + data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_rfc_da', {}).get('reservoir_rfc_forecasts', False) + ]): + layers_to_read.append('network') + + hybrid_parameters = compute_parameters.get('hybrid_parameters', {}) + if hybrid_parameters.get('run_hybrid_routing', False) and 'nexus' not in layers_to_read: + layers_to_read.append('nexus') + + # Function that read a layer from the geopackage + def read_layer(layer_name): + if layer_name: + try: + return gpd.read_file(file_path, layer=layer_name) + except Exception as e: + print(f"Error reading {layer_name}: {e}") + return pd.DataFrame() + return pd.DataFrame() + + # Retrieve geopackage information using matched layer names if cpu_pool > 1: - with Parallel(n_jobs=min(cpu_pool, len(layers))) as parallel: - gpkg_list = parallel(delayed(read_layer)(layer) for layer in layers) - - table_dict = {layers[i]: gpkg_list[i] for i in range(len(layers))} + with Parallel(n_jobs=min(cpu_pool, len(layers_to_read))) as parallel: + gpkg_list = parallel(delayed(read_layer)(matched_layers[layer]) for layer in layers_to_read) + + table_dict = {layers_to_read[i]: gpkg_list[i] for i in range(len(layers_to_read))} else: - table_dict = {layer: read_layer(layer) for layer in layers} + table_dict = {layer: read_layer(matched_layers[layer]) for layer in layers_to_read} + + # Handle different key column names between flowpaths and flowpath_attributes + flowpaths_df = table_dict.get('flowpaths', pd.DataFrame()) + flowpath_attributes_df = table_dict.get('flowpath_attributes', pd.DataFrame()) + + # Check if 'link' column exists and rename it to 'id' + if 'link' in flowpath_attributes_df.columns: + flowpath_attributes_df.rename(columns={'link': 'id'}, inplace=True) + + # Merge flowpaths and flowpath_attributes + flowpaths = pd.merge( + flowpaths_df, + flowpath_attributes_df, + on='id', + how='inner' + ) - flowpaths = pd.merge(table_dict.get('flowpaths', pd.DataFrame()), table_dict.get('flowpath_attributes', pd.DataFrame()), on='id', how='inner') lakes = table_dict.get('lakes', pd.DataFrame()) network = table_dict.get('network', pd.DataFrame()) nexus = table_dict.get('nexus', pd.DataFrame()) From 007d0732726a2f762106d4004e3ffedb5da85187 Mon Sep 17 00:00:00 2001 From: AminTorabi-NOAA Date: Wed, 14 Aug 2024 13:51:27 -0400 Subject: [PATCH 23/29] update for reading flowlines instead of flowpaths --- src/troute-network/troute/HYFeaturesNetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py index 4466afcd8..dbaf20253 100644 --- a/src/troute-network/troute/HYFeaturesNetwork.py +++ b/src/troute-network/troute/HYFeaturesNetwork.py @@ -36,7 +36,7 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool): # patterns for the layers we want to find layer_patterns = { - 'flowpaths': r'flow[-_]?paths?', + 'flowpaths': r'flow[-_]?paths?|flow[-_]?lines?', 'flowpath_attributes': r'flow[-_]?path[-_]?attributes?', 'lakes': r'lakes?', 'nexus': r'nexus?', From 7a6e25d835762b78d634925482ac6653b6cd948e Mon Sep 17 00:00:00 2001 From: AminTorabi-NOAA Date: Wed, 14 Aug 2024 15:22:00 -0400 Subject: [PATCH 24/29] update for reading flowline_attribute instead of flowpath_attribute --- src/troute-network/troute/HYFeaturesNetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py index dbaf20253..fb8a19225 100644 --- a/src/troute-network/troute/HYFeaturesNetwork.py +++ b/src/troute-network/troute/HYFeaturesNetwork.py @@ -37,7 +37,7 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool): # patterns for the layers we want to find layer_patterns = { 'flowpaths': r'flow[-_]?paths?|flow[-_]?lines?', - 'flowpath_attributes': r'flow[-_]?path[-_]?attributes?', + 'flowpath_attributes': r'flow[-_]?path[-_]?attributes?|flow[-_]?line[-_]?attributes?', 'lakes': r'lakes?', 'nexus': r'nexus?', 'network': r'network' From bb7d6ec3c6ba0d7ba0333106c5b5e82150ab7f3d Mon Sep 17 00:00:00 2001 From: "Justin Singh-M. - NOAA" Date: Wed, 14 Aug 2024 13:52:31 -0700 Subject: [PATCH 25/29] fix(troute.network): handle `fiona` dependency for `geopandas>=1.0.0` --- src/troute-network/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/troute-network/pyproject.toml b/src/troute-network/pyproject.toml index bb80a35ef..77ad761c0 100644 --- a/src/troute-network/pyproject.toml +++ b/src/troute-network/pyproject.toml @@ -8,6 +8,7 @@ version = "0.0.0" dependencies = [ "deprecated", "geopandas", + "fiona", "joblib", "netcdf4", "numpy", From 452a35b6e4096279a27730a3605964658fd560e7 Mon Sep 17 00:00:00 2001 From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com> Date: Thu, 15 Aug 2024 12:36:47 -0400 Subject: [PATCH 26/29] update for csv_output (#828) --- src/troute-network/troute/nhd_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py index b948b379f..a30f3d1cf 100644 --- a/src/troute-network/troute/nhd_io.py +++ b/src/troute-network/troute/nhd_io.py @@ -2070,12 +2070,11 @@ def write_flowveldepth_csv_pkl(stream_output_directory, file_name, 'velocity': velocity.iloc[:, i], 'depth': depth.iloc[:, i], 'nudge': nudge_df.iloc[:, i] - }) + }, index=flow.index) df_list.append(df_temp) # Concatenate all temporary DataFrames vertically df = pd.concat(df_list) - df.index.name = 'feature_id' df['current_time'] = pd.to_datetime(df['t0']) + pd.to_timedelta(df['time']) df = df[['current_time', 'flow', 'velocity', 'depth', 'nudge']] @@ -2269,6 +2268,7 @@ def mask_find_seg(mask_list, nexus_dict, poi_crosswalk): return nex_id, seg_id def updated_flowveldepth(flowveldepth, nex_id, seg_id, mask_list): + flowveldepth = flowveldepth.copy(deep=True) flowveldepth.index.name = 'featureID' flowveldepth['Type'] = 'wb' flowveldepth.set_index('Type', append=True, inplace=True) From dfeb91571a37bb8f9f9718b30ea6cff8eb23d9fa Mon Sep 17 00:00:00 2001 From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com> Date: Thu, 15 Aug 2024 13:33:30 -0400 Subject: [PATCH 27/29] issue 826 (#832) --- src/troute-nwm/src/nwm_routing/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py index 893825015..824e79129 100644 --- a/src/troute-nwm/src/nwm_routing/output.py +++ b/src/troute-nwm/src/nwm_routing/output.py @@ -93,7 +93,7 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration, pref for i in range(num_time_variables): start_idx = i * num_locations end_idx = start_idx + num_locations - value_time[start_idx:end_idx] = start_datetime + pd.to_timedelta(timesteps[i] * dt, unit='s') + value_time[start_idx:end_idx] = start_datetime + pd.to_timedelta((timesteps[i] + 1) * dt, unit='s') variable_name = variable_to_name_map[variables[i]] unit = variable_to_units_map[variable_name] variable_names[start_idx:end_idx] = variable_name From 72e6e9b271bd803eba063bd7722df026bf592f56 Mon Sep 17 00:00:00 2001 From: AminTorabi-NOAA Date: Thu, 15 Aug 2024 12:45:09 -0400 Subject: [PATCH 28/29] fix to issue 829_ --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3cfb7d2e9..6ae078abe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pip>=22.1.0 setuptools>=64.0,<69.0 wheel -numpy +numpy>=1.21.0,<2.0.0 Cython>3,!=3.0.4 pandas >=1.1.5, <=2.2.0 xarray From 9d9d711435c553633bedc6056c9827dfb9e7843e Mon Sep 17 00:00:00 2001 From: AminTorabi-NOAA Date: Thu, 15 Aug 2024 14:15:06 -0400 Subject: [PATCH 29/29] numpy fix --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6ae078abe..6963bc407 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pip>=22.1.0 setuptools>=64.0,<69.0 wheel -numpy>=1.21.0,<2.0.0 +numpy~=1.0 Cython>3,!=3.0.4 pandas >=1.1.5, <=2.2.0 xarray