From e6a1b85b6b4d1172ac6a75939001a9d486f3dedb Mon Sep 17 00:00:00 2001
From: Neptune-Meister <140385333+JurgenZach-NOAA@users.noreply.github.com>
Date: Thu, 25 Jul 2024 09:00:49 -0700
Subject: [PATCH 01/29] Deleted file with obsolete routines (#807)

---
 src/troute-routing/troute/routing/utils.py | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 src/troute-routing/troute/routing/utils.py

diff --git a/src/troute-routing/troute/routing/utils.py b/src/troute-routing/troute/routing/utils.py
deleted file mode 100644
index 0c5a8e10b..000000000
--- a/src/troute-routing/troute/routing/utils.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import pandas as pd
-import numpy as np
-
-
-def writetoFile(file, writeString):
-    file.write(writeString)
-    file.write("\n")
-
-
-def constant_qlats(index_dataset, nsteps, qlat):
-    q = np.full((len(index_dataset.index), nsteps), qlat, dtype="float32")
-    ql = pd.DataFrame(q, index=index_dataset.index, columns=range(nsteps))
-    return ql

From 33017508ce2a70e4ce609ee6eb4e453cfbb07c9d Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Tue, 5 Mar 2024 11:42:33 -0500
Subject: [PATCH 02/29] Added functionality to write flow, velocity and depth
 to parquet

---
 .../troute/config/output_parameters.py        |  16 +-
 src/troute-nwm/src/nwm_routing/input.py       |  19 ++
 src/troute-nwm/src/nwm_routing/output.py      | 213 ++++++++++++++----
 test/LowerColorado_TX/test_AnA_V4_NHD.yaml    |   5 +-
 4 files changed, 200 insertions(+), 53 deletions(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index 61119c109..9c8ce421b 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -5,13 +5,16 @@
 from typing_extensions import Annotated, Literal
 from .types import FilePath, DirectoryPath
 
-streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl']
+streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl', '.parquet']
+
 
 class OutputParameters(BaseModel):
     chanobs_output: Optional["ChanobsOutput"] = None
     # NOTE: this appears to be optional. See nwm_routing/input.py ~:477
     csv_output: Optional["CsvOutput"] = None
     # NOTE: this appears to be optional. See nwm_routing/input.py ~:496
+    parquet_output: Optional["ParquetOutput"] = None
+    # NOTE: this appears to be optional. See nwm_routing/input.py ~:563
     chrtout_output: Optional["ChrtoutOutput"] = None
     lite_restart: Optional["LiteRestart"] = None
     # NOTE: this appears to be optional. See nwm_routing/input.py ~:520
@@ -46,7 +49,13 @@ class CsvOutput(BaseModel):
     csv_output_segments: Optional[List[str]] = None
 
 
-class ChrtoutOutput(BaseModel):
+class ParquetOutput(BaseModel, extra='forbid'):
+    # NOTE: required if writing results to parquet
+    parquet_output_folder: Optional[DirectoryPath] = None
+    parquet_output_segments: Optional[List[str]] = None
+
+
+class ChrtoutOutput(BaseModel, extra='forbid'):
     # NOTE: mandatory if writing results to CHRTOUT.
     wrf_hydro_channel_output_source_folder: Optional[DirectoryPath] = None
 
@@ -79,6 +88,7 @@ class WrfHydroParityCheck(BaseModel):
 class ParityCheckCompareFileSet(BaseModel):
     validation_files: List[FilePath]
 
+
 class StreamOutput(BaseModel):
     # NOTE: required if writing StreamOutput files
     stream_output_directory: Optional[DirectoryPath] = None
@@ -93,7 +103,7 @@ def validate_stream_output_internal_frequency(cls, value, values):
             if values.get('stream_output_time') != -1 and value / 60 > values['stream_output_time']:
                 raise ValueError("stream_output_internal_frequency should be less than or equal to stream_output_time in minutes.")
         return value
- 
+
 
 OutputParameters.update_forward_refs()
 WrfHydroParityCheck.update_forward_refs()
diff --git a/src/troute-nwm/src/nwm_routing/input.py b/src/troute-nwm/src/nwm_routing/input.py
index 8a5f55f24..3e1ba4170 100644
--- a/src/troute-nwm/src/nwm_routing/input.py
+++ b/src/troute-nwm/src/nwm_routing/input.py
@@ -559,6 +559,25 @@ def check_inputs(
 
     else:
         LOG.debug('No csv output folder specified. Results will NOT be written to csv')
+
+    if output_parameters.get('parquet_output', None):
+
+        if output_parameters['parquet_output'].get('parquet_output_folder', None):
+
+            _does_path_exist(
+                'parquet_output_folder',
+                output_parameters['parquet_output']['parquet_output_folder']
+            )
+
+            output_segs = output_parameters['parquet_output'].get('parquet_output_segments', None)
+            if not output_segs:
+                LOG.debug('No parquet output segments specified. Results for all domain segments will be written')
+
+        else:
+            LOG.debug('No parquet output folder specified. Results will NOT be written in parquet format')
+
+    else:
+        LOG.debug('No parquet output folder specified. Results will NOT be written in parquet format')
         
     if output_parameters.get('chrtout_output', None):
         
diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 2404e7683..80a1a4e39 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -7,9 +7,9 @@
 from build_tests import parity_check
 import logging
 
-
 LOG = logging.getLogger('')
 
+
 def _reindex_lake_to_link_id(target_df, crosswalk):
     '''
     Utility function for replacing lake ID index values
@@ -29,23 +29,24 @@ def _reindex_lake_to_link_id(target_df, crosswalk):
 
     # evaluate intersection of lake ids and target_df index values
     # i.e. what are the index positions of lake ids that need replacing?
-    lakeids = np.fromiter(crosswalk.keys(), dtype = int)
+    lakeids = np.fromiter(crosswalk.keys(), dtype=int)
     idxs = target_df.index.to_numpy()
     lake_index_intersect = np.intersect1d(
-        idxs, 
-        lakeids, 
-        return_indices = True
+        idxs,
+        lakeids,
+        return_indices=True
     )
 
     # replace lake ids with link IDs in the target_df index array
-    linkids = np.fromiter(crosswalk.values(), dtype = int)
+    linkids = np.fromiter(crosswalk.values(), dtype=int)
     idxs[lake_index_intersect[1]] = linkids[lake_index_intersect[2]]
 
     # (re) set the target_df index
-    target_df.set_index(idxs, inplace = True)
-    
+    target_df.set_index(idxs, inplace=True)
+
     return target_df
 
+<<<<<<< HEAD
 def nwm_output_generator(
     run,
     results,
@@ -66,7 +67,27 @@ def nwm_output_generator(
     link_lake_crosswalk = None,
     logFileName='NONE' 
 ):
+=======
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
+def nwm_output_generator(
+        run,
+        results,
+        supernetwork_parameters,
+        output_parameters,
+        parity_parameters,
+        restart_parameters,
+        parity_set,
+        qts_subdivisions,
+        return_courant,
+        cpu_pool,
+        waterbodies_df,
+        waterbody_types_df,
+        data_assimilation_parameters=False,
+        lastobs_df=None,
+        link_gage_df=None,
+        link_lake_crosswalk=None,
+):
     dt = run.get("dt")
     nts = run.get("nts")
     t0 = run.get("t0")
@@ -104,13 +125,15 @@ def nwm_output_generator(
         )
 
     ################### Output Handling
-    
+
     start_time = time.time()
 
     LOG.info(f"Handling output ...")
 
     csv_output = output_parameters.get("csv_output", None)
     csv_output_folder = None
+    parquet_output = output_parameters.get("parquet_output", None)
+    parquet_output_folder = None
     rsrto = output_parameters.get("hydro_rst_output", None)
     chrto = output_parameters.get("chrtout_output", None)
     test = output_parameters.get("test_output", None)
@@ -125,8 +148,19 @@ def nwm_output_generator(
         )
         csv_output_segments = csv_output.get("csv_output_segments", None)
 
+<<<<<<< HEAD
     if csv_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output:
         
+=======
+    if parquet_output:
+        parquet_output_folder = output_parameters["parquet_output"].get(
+            "parquet_output_folder", None
+        )
+        parquet_output_segments = parquet_output.get("parquet_output_segments", None)
+
+    if csv_output_folder or parquet_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output:
+
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
         start = time.time()
         qvd_columns = pd.MultiIndex.from_product(
             [range(nts), ["q", "v", "d"]]
@@ -138,7 +172,6 @@ def nwm_output_generator(
         )
 
         if wbdyo and not waterbodies_df.empty:
-            
             # create waterbody dataframe for output to netcdf file
             i_columns = pd.MultiIndex.from_product(
                 [range(nts), ["i"]]
@@ -163,20 +196,20 @@ def nwm_output_generator(
             timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0)
             ts_set = set(timestep_index[0].tolist())
             flow_df_col_index = [i for i, e in enumerate(timestep) if e in ts_set]
-            flow_df = flow_df.iloc[:,flow_df_col_index]
+            flow_df = flow_df.iloc[:, flow_df_col_index]
 
             timestep, variable = zip(*wbdy.columns.tolist())
             timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0)
             ts_set = set(timestep_index[0].tolist())
             wbdy_col_index = [i for i, e in enumerate(timestep) if e in ts_set]
-            i_df = wbdy.iloc[:,wbdy_col_index]
-            q_df = flow_df.iloc[:,0::3]
-            d_df = flow_df.iloc[:,2::3]
+            i_df = wbdy.iloc[:, wbdy_col_index]
+            q_df = flow_df.iloc[:, 0::3]
+            d_df = flow_df.iloc[:, 2::3]
 
         # replace waterbody lake_ids with outlet link ids
         if (link_lake_crosswalk):
             flowveldepth = _reindex_lake_to_link_id(flowveldepth, link_lake_crosswalk)
-            
+
         # todo: create a unit test by saving FVD array to disk and then checking that
         # it matches FVD array from parent branch or other configurations. 
         # flowveldepth.to_pickle(output_parameters['test_output'])
@@ -192,15 +225,14 @@ def nwm_output_generator(
                 ],
                 copy=False,
             )
-            
+
             # replace waterbody lake_ids with outlet link ids
             if link_lake_crosswalk:
-                
                 # (re) set the flowveldepth index
-                courant.set_index(fvdidxs, inplace = True)
-            
+                courant.set_index(fvdidxs, inplace=True)
+
         LOG.debug("Constructing the FVD DataFrame took %s seconds." % (time.time() - start))
-    
+
     if stream_output:
         stream_output_directory = stream_output['stream_output_directory']
         stream_output_timediff = stream_output['stream_output_time']
@@ -209,6 +241,7 @@ def nwm_output_generator(
 
         nudge = np.concatenate([r[8] for r in results])
         usgs_positions_id = np.concatenate([r[3][0] for r in results])
+<<<<<<< HEAD
         nhd_io.write_flowveldepth(
             Path(stream_output_directory), 
             flowveldepth, 
@@ -234,11 +267,23 @@ def nwm_output_generator(
                 preRunLog.write("Output of FVD data every "+str(fCalc)+" minutes\n")
                 preRunLog.write("Writing "+str(nTimeBins)+" time bins for "+str(len(flowveldepth.index))+" segments per FVD output file\n")               
             preRunLog.close()      
+=======
+        nhd_io.write_flowveldepth_netcdf(Path(stream_output_directory),
+                                         flowveldepth,
+                                         nudge,
+                                         usgs_positions_id,
+                                         t0,
+                                         int(stream_output_timediff),
+                                         stream_output_type,
+                                         stream_output_internal_frequency,
+                                         cpu_pool=cpu_pool)
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
     if test:
         flowveldepth.to_pickle(Path(test))
-    
+
     if wbdyo and not waterbodies_df.empty:
+<<<<<<< HEAD
         
         time_index, tmp_variable = map(list,zip(*i_df.columns.tolist()))
         if not duplicate_ids_df.empty:
@@ -247,10 +292,15 @@ def nwm_output_generator(
         else:
             output_waterbodies_df = waterbodies_df
             output_waterbody_types_df = waterbody_types_df
+=======
+
+        time_index, tmp_variable = map(list, zip(*i_df.columns.tolist()))
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
         LOG.info("- writing t-route flow results to LAKEOUT files")
         start = time.time()
-        for i in range(i_df.shape[1]):            
+        for i in range(i_df.shape[1]):
             nhd_io.write_waterbody_netcdf(
+<<<<<<< HEAD
                 wbdyo, 
                 i_df.iloc[:,[i]],
                 q_df.iloc[:,[i]],
@@ -271,13 +321,27 @@ def nwm_output_generator(
                 preRunLog.write("-----\n") 
             preRunLog.close()  
          
+=======
+                wbdyo,
+                i_df.iloc[:, [i]],
+                q_df.iloc[:, [i]],
+                d_df.iloc[:, [i]],
+                waterbodies_df,
+                waterbody_types_df,
+                t0,
+                dt,
+                nts,
+                time_index[i],
+            )
+
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
         LOG.debug("writing LAKEOUT files took a total time of %s seconds." % (time.time() - start))
-    
+
     if rsrto:
 
         LOG.info("- writing restart files")
         start = time.time()
-        
+
         wrf_hydro_restart_dir = rsrto.get(
             "wrf_hydro_channel_restart_source_directory", None
         )
@@ -315,24 +379,25 @@ def nwm_output_generator(
                     preRunLog.close()  
 
             else:
-                LOG.critical('Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.')
-                
+                LOG.critical(
+                    'Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.')
+
         else:
-            LOG.critical('wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.')
-            
+            LOG.critical(
+                'wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.')
+
         LOG.debug("writing restart files took %s seconds." % (time.time() - start))
 
     if chrto:
-        
+
         LOG.info("- writing t-route flow results to CHRTOUT files")
         start = time.time()
-        
+
         chrtout_read_folder = chrto.get(
             "wrf_hydro_channel_output_source_folder", None
         )
-        
-        if chrtout_read_folder:
 
+        if chrtout_read_folder:
             chrtout_files = sorted(
                 Path(chrtout_read_folder) / f for f in run["qlat_files"]
             )
@@ -343,6 +408,7 @@ def nwm_output_generator(
                 qts_subdivisions,
                 cpu_pool,
             )
+<<<<<<< HEAD
         
             if (not logFileName == 'NONE'):
                 with open(logFileName, 'a') as preRunLog:
@@ -351,11 +417,13 @@ def nwm_output_generator(
                     preRunLog.write("Output of FVD files into folder: "+str(chrtout_read_folder)+"\n") 
                     preRunLog.write("-----\n") 
                 preRunLog.close()  
+=======
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
         LOG.debug("writing CHRTOUT files took a total time of %s seconds." % (time.time() - start))
 
-    if csv_output_folder: 
-    
+    if csv_output_folder:
+
         LOG.info("- writing flow, velocity, and depth results to .csv")
         start = time.time()
 
@@ -363,10 +431,10 @@ def nwm_output_generator(
         # TO DO: create more descriptive filenames
         if supernetwork_parameters.get("title_string", None):
             filename_fvd = (
-                "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv"
+                    "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv"
             )
             filename_courant = (
-                "courant_" + supernetwork_parameters["title_string"] + ".csv"
+                    "courant_" + supernetwork_parameters["title_string"] + ".csv"
             )
         else:
             run_time_stamp = datetime.now().isoformat()
@@ -378,7 +446,7 @@ def nwm_output_generator(
         # no csv_output_segments are specified, then write results for all segments
         if not csv_output_segments:
             csv_output_segments = flowveldepth.index
-        
+
         flowveldepth = flowveldepth.sort_index()
         flowveldepth.loc[csv_output_segments].to_csv(output_path.joinpath(filename_fvd))
 
@@ -389,12 +457,46 @@ def nwm_output_generator(
         LOG.debug("writing CSV file took %s seconds." % (time.time() - start))
         # usgs_df_filtered = usgs_df[usgs_df.index.isin(csv_output_segments)]
         # usgs_df_filtered.to_csv(output_path.joinpath("usgs_df.csv"))
-        
+
+    if parquet_output_folder:
+
+        LOG.info("- writing flow, velocity, and depth results to .parquet")
+        start = time.time()
+
+        # create filenames
+        # TO DO: create more descriptive filenames
+        if supernetwork_parameters.get("title_string", None):
+            filename_fvd = (
+                    "flowveldepth_" + supernetwork_parameters["title_string"] + ".parquet"
+            )
+            filename_courant = (
+                    "courant_" + supernetwork_parameters["title_string"] + ".parquet"
+            )
+        else:
+            run_time_stamp = datetime.now().isoformat()
+            filename_fvd = "flowveldepth_" + run_time_stamp + ".parquet"
+            filename_courant = "courant_" + run_time_stamp + ".parquet"
+
+        output_path = Path(parquet_output_folder).resolve()
+
+        # no csv_output_segments are specified, then write results for all segments
+        if not parquet_output_segments:
+            parquet_output_segments = flowveldepth.index
+
+        flowveldepth = flowveldepth.sort_index()
+        flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd))
+
+        if return_courant:
+            courant = courant.sort_index()
+            courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant))
+
+        LOG.debug("writing parquet file took %s seconds." % (time.time() - start))
+
     if chano:
 
         LOG.info("- writing t-route flow results at gage locations to CHANOBS file")
         start = time.time()
-        
+
         # replace waterbody lake_ids with outlet link ids
         if link_lake_crosswalk:
             link_gage_df = _reindex_lake_to_link_id(link_gage_df, link_lake_crosswalk)
@@ -404,15 +506,16 @@ def nwm_output_generator(
             chano['chanobs_filepath'] = str(chano['chanobs_filepath'])
 
         nhd_io.write_chanobs(
-            Path(chano['chanobs_output_directory'] + chano['chanobs_filepath']), 
-            flowveldepth, 
-            link_gage_df, 
-            t0, 
-            dt, 
+            Path(chano['chanobs_output_directory'] + chano['chanobs_filepath']),
+            flowveldepth,
+            link_gage_df,
+            t0,
+            dt,
             nts,
             # TODO allow user to pass a list of segments at which they would like to print results
             # rather than just printing at gages. 
         )
+<<<<<<< HEAD
 
         if (not logFileName == 'NONE'):
             with open(logFileName, 'a') as preRunLog:
@@ -423,8 +526,12 @@ def nwm_output_generator(
             preRunLog.close()  
 
         LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start))       
+=======
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
-    if lastobso:        
+        LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start))
+
+    if lastobso:
         # Write out LastObs as netcdf when using main_v04 or troute_model with HYfeature.
         # This is only needed if 1) streamflow nudging is ON and 2) a lastobs output
         # folder is provided by the user.
@@ -437,8 +544,12 @@ def nwm_output_generator(
 
             nudging_true = streamflow_da.get('streamflow_nudging', None)
 
+<<<<<<< HEAD
         if nudging_true and lastobs_output_folder and not lastobs_df.empty:
 
+=======
+        if nudging_true and lastobs_output_folder:
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
             LOG.info("- writing lastobs files")
             start = time.time()
 
@@ -461,24 +572,28 @@ def nwm_output_generator(
 
             LOG.debug("writing lastobs files took %s seconds." % (time.time() - start))
 
+<<<<<<< HEAD
 
     # if 'flowveldepth' in locals():
     #     LOG.debug(flowveldepth)
+=======
+    if 'flowveldepth' in locals():
+        LOG.debug(flowveldepth)
+>>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
     LOG.debug("output complete in %s seconds." % (time.time() - start_time))
 
     ################### Parity Check
 
     if parity_set:
-        
         LOG.info(
             "conducting parity check, comparing WRF Hydro results against t-route results"
         )
-    
+
         start_time = time.time()
-       
+
         parity_check(
             parity_set, results,
         )
 
-        LOG.debug("parity check complete in %s seconds." % (time.time() - start_time))
\ No newline at end of file
+        LOG.debug("parity check complete in %s seconds." % (time.time() - start_time))
diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
index 0d09cbc82..f42cf940c 100644
--- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
+++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
@@ -105,7 +105,7 @@ compute_parameters:
                 reservoir_rfc_forecasts_lookback_hours  : 48
             
 #--------------------------------------------------------------------------------
-# output_parameters:
+output_parameters:
 #     #----------
 #     test_output: output/lcr_flowveldepth.pkl
 #     lite_restart:
@@ -124,5 +124,8 @@ compute_parameters:
 #         stream_output_time: 1 #[hr]
 #         stream_output_type: '.nc' #please select only between netcdf '.nc' or '.csv' or '.pkl'
 #         stream_output_internal_frequency: 30 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60
+    parquet_output:
+        #---------
+        parquet_output_folder: output/
 
     
\ No newline at end of file

From 4e7cb462ca06b1d462fce0f0ce9422dd8bb55180 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 25 Mar 2024 20:48:26 -0400
Subject: [PATCH 03/29] Modified parquet output format to match TEEHR input

---
 .../troute/config/output_parameters.py        |  1 +
 src/troute-nwm/src/nwm_routing/output.py      | 75 ++++++++++++++++++-
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index 9c8ce421b..ff4722d1a 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -53,6 +53,7 @@ class ParquetOutput(BaseModel, extra='forbid'):
     # NOTE: required if writing results to parquet
     parquet_output_folder: Optional[DirectoryPath] = None
     parquet_output_segments: Optional[List[str]] = None
+    configuration: Optional[str] = None
 
 
 class ChrtoutOutput(BaseModel, extra='forbid'):
diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 80a1a4e39..11106a870 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -70,6 +70,48 @@ def nwm_output_generator(
 =======
 >>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
+def _parquet_output_format_converter(df, start_datetime, dt, configuration):
+    '''
+    Utility function for convert flowveldepth dataframe
+    to a timeseries and to match parquet input format
+    of TEEHR
+
+    Arguments:
+    ----------
+    - df (DataFrame): Data frame to be converted
+    - start_datetime: Date time from restart parameters
+    - dt: Time step
+    - configuration: configuration (for instance- short_range, medium_range)
+
+    Returns:
+    --------
+    - timeseries_df (DataFrame): Converted timeseries data frame
+    '''
+
+    df.index.name = 'location_id'
+    df.reset_index(inplace=True)
+    timeseries_df = df.melt(id_vars=['location_id'], var_name='var')
+    timeseries_df['var'] = timeseries_df['var'].astype('string')
+    timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True)
+    timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "")
+    timeseries_df['timestep'] = timeseries_df['timestep'].astype('int')
+    timeseries_df['value_time'] = (start_datetime + pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s'))
+    variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"}
+    timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map)
+    timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True)
+    timeseries_df['configuration'] = configuration
+    variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"}
+    timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map)
+    timeseries_df['reference_time'] = start_datetime.date()
+    timeseries_df['location_id'] = timeseries_df['location_id'].astype('string')
+    timeseries_df['location_id'] = 'nex-' + timeseries_df['location_id']
+    timeseries_df['value'] = timeseries_df['value'].astype('double')
+    timeseries_df['reference_time'] = timeseries_df['reference_time'].astype('datetime64[us]')
+    timeseries_df['value_time'] = timeseries_df['value_time'].astype('datetime64[us]')
+
+    return timeseries_df
+
+
 def nwm_output_generator(
         run,
         results,
@@ -484,11 +526,40 @@ def nwm_output_generator(
             parquet_output_segments = flowveldepth.index
 
         flowveldepth = flowveldepth.sort_index()
-        flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd))
+        timeseries_df = _parquet_output_format_converter(flowveldepth, restart_parameters.get("start_datetime"), dt,
+                                                         output_parameters["parquet_output"].get("configuration"))
+        """
+        flowveldepth.index.name = 'Location_ID'
+        flowveldepth.reset_index(inplace=True)
+        timeseries_df = flowveldepth.melt(id_vars=['Location_ID'], var_name='var')
+        timeseries_df['var'] = timeseries_df['var'].astype('string')
+        timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True)
+        # timeseries_df[['timestep', 'variable_name']]= pd.DataFrame([x.strip("()").split(',') for x in list(
+        # timeseries_df['variable'])])
+        timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "")
+        timeseries_df['timestep'] = timeseries_df['timestep'].astype('int')
+        timeseries_df['value_time'] = (restart_parameters.get("start_datetime") +
+                                       pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s'))
+        variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"}
+        timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map)
+        timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True)
+        timeseries_df['configuration'] = output_parameters["parquet_output"].get("configuration")
+        variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"}
+        timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map)
+        timeseries_df['reference_time'] = restart_parameters.get("start_datetime").date()
+        """
+        parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments]
+        timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet(
+            output_path.joinpath(filename_fvd), allow_truncated_timestamps=True)
+        # flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd))
 
         if return_courant:
             courant = courant.sort_index()
-            courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant))
+            timeseries_courant = _parquet_output_format_converter(courant, restart_parameters.get("start_datetime"), dt,
+                                                         output_parameters["parquet_output"].get("configuration"))
+            timeseries_courant.loc[timeseries_courant['location_id'].isin(parquet_output_segments_str)].to_parquet(
+                output_path.joinpath(filename_courant), allow_truncated_timestamps=True)
+            #courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant))
 
         LOG.debug("writing parquet file took %s seconds." % (time.time() - start))
 

From 4b2bb010e474f1c8b2ec47ebfd2407813dd5a579 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 1 Apr 2024 10:35:51 -0400
Subject: [PATCH 04/29] cleaned the code

---
 src/troute-nwm/src/nwm_routing/output.py | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 11106a870..c20281d67 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -528,26 +528,7 @@ def nwm_output_generator(
         flowveldepth = flowveldepth.sort_index()
         timeseries_df = _parquet_output_format_converter(flowveldepth, restart_parameters.get("start_datetime"), dt,
                                                          output_parameters["parquet_output"].get("configuration"))
-        """
-        flowveldepth.index.name = 'Location_ID'
-        flowveldepth.reset_index(inplace=True)
-        timeseries_df = flowveldepth.melt(id_vars=['Location_ID'], var_name='var')
-        timeseries_df['var'] = timeseries_df['var'].astype('string')
-        timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True)
-        # timeseries_df[['timestep', 'variable_name']]= pd.DataFrame([x.strip("()").split(',') for x in list(
-        # timeseries_df['variable'])])
-        timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "")
-        timeseries_df['timestep'] = timeseries_df['timestep'].astype('int')
-        timeseries_df['value_time'] = (restart_parameters.get("start_datetime") +
-                                       pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s'))
-        variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"}
-        timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map)
-        timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True)
-        timeseries_df['configuration'] = output_parameters["parquet_output"].get("configuration")
-        variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"}
-        timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map)
-        timeseries_df['reference_time'] = restart_parameters.get("start_datetime").date()
-        """
+
         parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments]
         timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet(
             output_path.joinpath(filename_fvd), allow_truncated_timestamps=True)

From 1bb44f50fc5fa246a2b8dc38a9c9dea85f543142 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 1 Apr 2024 10:37:24 -0400
Subject: [PATCH 05/29] cleaned the code

---
 src/troute-nwm/src/nwm_routing/output.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index c20281d67..d19d5c7dc 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -521,7 +521,7 @@ def nwm_output_generator(
 
         output_path = Path(parquet_output_folder).resolve()
 
-        # no csv_output_segments are specified, then write results for all segments
+        # no parquet_output_segments are specified, then write results for all segments
         if not parquet_output_segments:
             parquet_output_segments = flowveldepth.index
 
@@ -532,7 +532,6 @@ def nwm_output_generator(
         parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments]
         timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet(
             output_path.joinpath(filename_fvd), allow_truncated_timestamps=True)
-        # flowveldepth.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_fvd))
 
         if return_courant:
             courant = courant.sort_index()
@@ -540,7 +539,6 @@ def nwm_output_generator(
                                                          output_parameters["parquet_output"].get("configuration"))
             timeseries_courant.loc[timeseries_courant['location_id'].isin(parquet_output_segments_str)].to_parquet(
                 output_path.joinpath(filename_courant), allow_truncated_timestamps=True)
-            #courant.loc[parquet_output_segments].to_parquet(output_path.joinpath(filename_courant))
 
         LOG.debug("writing parquet file took %s seconds." % (time.time() - start))
 

From 16362ec7e64c300851d8f68a8acfddc253f3a128 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Tue, 23 Apr 2024 11:38:27 -0400
Subject: [PATCH 06/29] sample yaml file changes for parquet output

---
 test/LowerColorado_TX/test_AnA_V4_NHD.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
index f42cf940c..5ab341fe6 100644
--- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
+++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
@@ -127,5 +127,6 @@ output_parameters:
     parquet_output:
         #---------
         parquet_output_folder: output/
+        configuration: short_range
 
     
\ No newline at end of file

From 29a1bc6ade2f4e4134bf1e3da1782afdfeeec963 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Tue, 7 May 2024 13:38:16 -0400
Subject: [PATCH 07/29] added functionality to include user defined prefix for
 IDs

---
 .../troute/config/output_parameters.py        |  1 +
 src/troute-nwm/src/nwm_routing/output.py      | 95 +++----------------
 test/LowerColorado_TX/test_AnA_V4_NHD.yaml    |  1 +
 3 files changed, 13 insertions(+), 84 deletions(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index ff4722d1a..eb19fea3d 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -54,6 +54,7 @@ class ParquetOutput(BaseModel, extra='forbid'):
     parquet_output_folder: Optional[DirectoryPath] = None
     parquet_output_segments: Optional[List[str]] = None
     configuration: Optional[str] = None
+    prefix_ids: Optional[str] = None
 
 
 class ChrtoutOutput(BaseModel, extra='forbid'):
diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index d19d5c7dc..f54ad8117 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -46,31 +46,8 @@ def _reindex_lake_to_link_id(target_df, crosswalk):
 
     return target_df
 
-<<<<<<< HEAD
-def nwm_output_generator(
-    run,
-    results,
-    supernetwork_parameters,
-    output_parameters,
-    parity_parameters,
-    restart_parameters,
-    parity_set,
-    qts_subdivisions,
-    return_courant,
-    cpu_pool,
-    waterbodies_df,
-    waterbody_types_df,
-    duplicate_ids_df,
-    data_assimilation_parameters=False,
-    lastobs_df = None,
-    link_gage_df = None,
-    link_lake_crosswalk = None,
-    logFileName='NONE' 
-):
-=======
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
-def _parquet_output_format_converter(df, start_datetime, dt, configuration):
+def _parquet_output_format_converter(df, start_datetime, dt, configuration, prefix_ids):
     '''
     Utility function for convert flowveldepth dataframe
     to a timeseries and to match parquet input format
@@ -104,7 +81,7 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration):
     timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map)
     timeseries_df['reference_time'] = start_datetime.date()
     timeseries_df['location_id'] = timeseries_df['location_id'].astype('string')
-    timeseries_df['location_id'] = 'nex-' + timeseries_df['location_id']
+    timeseries_df['location_id'] = prefix_ids + '-' + timeseries_df['location_id']
     timeseries_df['value'] = timeseries_df['value'].astype('double')
     timeseries_df['reference_time'] = timeseries_df['reference_time'].astype('datetime64[us]')
     timeseries_df['value_time'] = timeseries_df['value_time'].astype('datetime64[us]')
@@ -129,6 +106,7 @@ def nwm_output_generator(
         lastobs_df=None,
         link_gage_df=None,
         link_lake_crosswalk=None,
+        logFileName='NONE'
 ):
     dt = run.get("dt")
     nts = run.get("nts")
@@ -190,10 +168,6 @@ def nwm_output_generator(
         )
         csv_output_segments = csv_output.get("csv_output_segments", None)
 
-<<<<<<< HEAD
-    if csv_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output:
-        
-=======
     if parquet_output:
         parquet_output_folder = output_parameters["parquet_output"].get(
             "parquet_output_folder", None
@@ -202,7 +176,6 @@ def nwm_output_generator(
 
     if csv_output_folder or parquet_output_folder or rsrto or chrto or chano or test or wbdyo or stream_output:
 
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
         start = time.time()
         qvd_columns = pd.MultiIndex.from_product(
             [range(nts), ["q", "v", "d"]]
@@ -283,7 +256,7 @@ def nwm_output_generator(
 
         nudge = np.concatenate([r[8] for r in results])
         usgs_positions_id = np.concatenate([r[3][0] for r in results])
-<<<<<<< HEAD
+
         nhd_io.write_flowveldepth(
             Path(stream_output_directory), 
             flowveldepth, 
@@ -309,23 +282,11 @@ def nwm_output_generator(
                 preRunLog.write("Output of FVD data every "+str(fCalc)+" minutes\n")
                 preRunLog.write("Writing "+str(nTimeBins)+" time bins for "+str(len(flowveldepth.index))+" segments per FVD output file\n")               
             preRunLog.close()      
-=======
-        nhd_io.write_flowveldepth_netcdf(Path(stream_output_directory),
-                                         flowveldepth,
-                                         nudge,
-                                         usgs_positions_id,
-                                         t0,
-                                         int(stream_output_timediff),
-                                         stream_output_type,
-                                         stream_output_internal_frequency,
-                                         cpu_pool=cpu_pool)
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
-
+            
     if test:
         flowveldepth.to_pickle(Path(test))
 
     if wbdyo and not waterbodies_df.empty:
-<<<<<<< HEAD
         
         time_index, tmp_variable = map(list,zip(*i_df.columns.tolist()))
         if not duplicate_ids_df.empty:
@@ -334,15 +295,11 @@ def nwm_output_generator(
         else:
             output_waterbodies_df = waterbodies_df
             output_waterbody_types_df = waterbody_types_df
-=======
 
-        time_index, tmp_variable = map(list, zip(*i_df.columns.tolist()))
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
         LOG.info("- writing t-route flow results to LAKEOUT files")
         start = time.time()
         for i in range(i_df.shape[1]):
             nhd_io.write_waterbody_netcdf(
-<<<<<<< HEAD
                 wbdyo, 
                 i_df.iloc[:,[i]],
                 q_df.iloc[:,[i]],
@@ -362,21 +319,7 @@ def nwm_output_generator(
                 preRunLog.write("Output of waterbody files into folder: "+str(wbdyo)+"\n") 
                 preRunLog.write("-----\n") 
             preRunLog.close()  
-         
-=======
-                wbdyo,
-                i_df.iloc[:, [i]],
-                q_df.iloc[:, [i]],
-                d_df.iloc[:, [i]],
-                waterbodies_df,
-                waterbody_types_df,
-                t0,
-                dt,
-                nts,
-                time_index[i],
-            )
 
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
         LOG.debug("writing LAKEOUT files took a total time of %s seconds." % (time.time() - start))
 
     if rsrto:
@@ -450,7 +393,6 @@ def nwm_output_generator(
                 qts_subdivisions,
                 cpu_pool,
             )
-<<<<<<< HEAD
         
             if (not logFileName == 'NONE'):
                 with open(logFileName, 'a') as preRunLog:
@@ -459,8 +401,6 @@ def nwm_output_generator(
                     preRunLog.write("Output of FVD files into folder: "+str(chrtout_read_folder)+"\n") 
                     preRunLog.write("-----\n") 
                 preRunLog.close()  
-=======
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
         LOG.debug("writing CHRTOUT files took a total time of %s seconds." % (time.time() - start))
 
@@ -526,17 +466,19 @@ def nwm_output_generator(
             parquet_output_segments = flowveldepth.index
 
         flowveldepth = flowveldepth.sort_index()
+        configuration = output_parameters["parquet_output"].get("configuration")
+        prefix_ids = output_parameters["parquet_output"].get("prefix_ids")
         timeseries_df = _parquet_output_format_converter(flowveldepth, restart_parameters.get("start_datetime"), dt,
-                                                         output_parameters["parquet_output"].get("configuration"))
+                                                         configuration, prefix_ids)
 
-        parquet_output_segments_str = ['nex-' + str(segment) for segment in parquet_output_segments]
+        parquet_output_segments_str = [prefix_ids + '-' + str(segment) for segment in parquet_output_segments]
         timeseries_df.loc[timeseries_df['location_id'].isin(parquet_output_segments_str)].to_parquet(
             output_path.joinpath(filename_fvd), allow_truncated_timestamps=True)
 
         if return_courant:
             courant = courant.sort_index()
             timeseries_courant = _parquet_output_format_converter(courant, restart_parameters.get("start_datetime"), dt,
-                                                         output_parameters["parquet_output"].get("configuration"))
+                                                                  configuration, prefix_ids)
             timeseries_courant.loc[timeseries_courant['location_id'].isin(parquet_output_segments_str)].to_parquet(
                 output_path.joinpath(filename_courant), allow_truncated_timestamps=True)
 
@@ -565,8 +507,7 @@ def nwm_output_generator(
             # TODO allow user to pass a list of segments at which they would like to print results
             # rather than just printing at gages. 
         )
-<<<<<<< HEAD
-
+        
         if (not logFileName == 'NONE'):
             with open(logFileName, 'a') as preRunLog:
                 preRunLog.write("\n") 
@@ -576,10 +517,6 @@ def nwm_output_generator(
             preRunLog.close()  
 
         LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start))       
-=======
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
-
-        LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start))
 
     if lastobso:
         # Write out LastObs as netcdf when using main_v04 or troute_model with HYfeature.
@@ -594,12 +531,8 @@ def nwm_output_generator(
 
             nudging_true = streamflow_da.get('streamflow_nudging', None)
 
-<<<<<<< HEAD
         if nudging_true and lastobs_output_folder and not lastobs_df.empty:
 
-=======
-        if nudging_true and lastobs_output_folder:
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
             LOG.info("- writing lastobs files")
             start = time.time()
 
@@ -622,14 +555,8 @@ def nwm_output_generator(
 
             LOG.debug("writing lastobs files took %s seconds." % (time.time() - start))
 
-<<<<<<< HEAD
-
-    # if 'flowveldepth' in locals():
-    #     LOG.debug(flowveldepth)
-=======
     if 'flowveldepth' in locals():
         LOG.debug(flowveldepth)
->>>>>>> 184493d (Added functionality to write flow, velocity and depth to parquet)
 
     LOG.debug("output complete in %s seconds." % (time.time() - start_time))
 
diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
index 5ab341fe6..b8a906517 100644
--- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
+++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
@@ -128,5 +128,6 @@ output_parameters:
         #---------
         parquet_output_folder: output/
         configuration: short_range
+        prefix_ids: nex
 
     
\ No newline at end of file

From d8310243c0e48817a16d7fd47e977043743d0759 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 20 May 2024 10:23:20 -0400
Subject: [PATCH 08/29] reverted back the formatting changes

---
 src/troute-nwm/src/nwm_routing/output.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index f54ad8117..cd50ed17c 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -34,7 +34,7 @@ def _reindex_lake_to_link_id(target_df, crosswalk):
     lake_index_intersect = np.intersect1d(
         idxs,
         lakeids,
-        return_indices=True
+        return_indices = True
     )
 
     # replace lake ids with link IDs in the target_df index array
@@ -300,7 +300,7 @@ def nwm_output_generator(
         start = time.time()
         for i in range(i_df.shape[1]):
             nhd_io.write_waterbody_netcdf(
-                wbdyo, 
+                wbdyo,
                 i_df.iloc[:,[i]],
                 q_df.iloc[:,[i]],
                 d_df.iloc[:,[i]],

From 4e42b63d2e8486f4e75f4e5c3b4150d2d099ab17 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 20 May 2024 10:31:55 -0400
Subject: [PATCH 09/29] reverted back the formatting changes

---
 src/troute-nwm/src/nwm_routing/output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index cd50ed17c..1ab22d2b0 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -572,5 +572,5 @@ def nwm_output_generator(
         parity_check(
             parity_set, results,
         )
-
+        
         LOG.debug("parity check complete in %s seconds." % (time.time() - start_time))

From 75d798d7946c557bc9d543e69e4131dc8a0beee7 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 20 May 2024 10:34:52 -0400
Subject: [PATCH 10/29] reverted back the formatting changes

---
 src/troute-config/troute/config/output_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index eb19fea3d..f8710bcf5 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -105,7 +105,7 @@ def validate_stream_output_internal_frequency(cls, value, values):
             if values.get('stream_output_time') != -1 and value / 60 > values['stream_output_time']:
                 raise ValueError("stream_output_internal_frequency should be less than or equal to stream_output_time in minutes.")
         return value
-
+  
 
 OutputParameters.update_forward_refs()
 WrfHydroParityCheck.update_forward_refs()

From de4364035efc4962ec34592b4482cbc9cb5ca6bc Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Mon, 20 May 2024 10:36:17 -0400
Subject: [PATCH 11/29] reverted back the formatting changes

---
 src/troute-config/troute/config/output_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index f8710bcf5..5b91c6116 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -105,7 +105,7 @@ def validate_stream_output_internal_frequency(cls, value, values):
             if values.get('stream_output_time') != -1 and value / 60 > values['stream_output_time']:
                 raise ValueError("stream_output_internal_frequency should be less than or equal to stream_output_time in minutes.")
         return value
-  
+ 
 
 OutputParameters.update_forward_refs()
 WrfHydroParityCheck.update_forward_refs()

From c959cb37b7d3f37152fd2db1cb59981c504f64d1 Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Fri, 26 Jul 2024 13:12:54 -0400
Subject: [PATCH 12/29] removed parquet from streamOutput_allowedTypes

---
 src/troute-config/troute/config/output_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index 5b91c6116..0b5919dfb 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -5,7 +5,7 @@
 from typing_extensions import Annotated, Literal
 from .types import FilePath, DirectoryPath
 
-streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl', '.parquet']
+streamOutput_allowedTypes = Literal['.csv', '.nc', '.pkl']
 
 
 class OutputParameters(BaseModel):

From 0a5046c99e6b6b3583227c25e4fc777975d625af Mon Sep 17 00:00:00 2001
From: karnesh <jain.karnesh@gmail.com>
Date: Fri, 26 Jul 2024 14:24:35 -0400
Subject: [PATCH 13/29] added missing parameter in nwm_output_generator
 function and fixed formatting

---
 .../troute/config/output_parameters.py        |  4 +-
 src/troute-nwm/src/nwm_routing/output.py      | 94 +++++++++----------
 2 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index 0b5919dfb..b367582d2 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -49,7 +49,7 @@ class CsvOutput(BaseModel):
     csv_output_segments: Optional[List[str]] = None
 
 
-class ParquetOutput(BaseModel, extra='forbid'):
+class ParquetOutput(BaseModel):
     # NOTE: required if writing results to parquet
     parquet_output_folder: Optional[DirectoryPath] = None
     parquet_output_segments: Optional[List[str]] = None
@@ -57,7 +57,7 @@ class ParquetOutput(BaseModel, extra='forbid'):
     prefix_ids: Optional[str] = None
 
 
-class ChrtoutOutput(BaseModel, extra='forbid'):
+class ChrtoutOutput(BaseModel):
     # NOTE: mandatory if writing results to CHRTOUT.
     wrf_hydro_channel_output_source_folder: Optional[DirectoryPath] = None
 
diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 1ab22d2b0..08bef8c78 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -7,8 +7,8 @@
 from build_tests import parity_check
 import logging
 
-LOG = logging.getLogger('')
 
+LOG = logging.getLogger('')
 
 def _reindex_lake_to_link_id(target_df, crosswalk):
     '''
@@ -29,20 +29,20 @@ def _reindex_lake_to_link_id(target_df, crosswalk):
 
     # evaluate intersection of lake ids and target_df index values
     # i.e. what are the index positions of lake ids that need replacing?
-    lakeids = np.fromiter(crosswalk.keys(), dtype=int)
+    lakeids = np.fromiter(crosswalk.keys(), dtype = int)
     idxs = target_df.index.to_numpy()
     lake_index_intersect = np.intersect1d(
-        idxs,
-        lakeids,
+        idxs, 
+        lakeids, 
         return_indices = True
     )
 
     # replace lake ids with link IDs in the target_df index array
-    linkids = np.fromiter(crosswalk.values(), dtype=int)
+    linkids = np.fromiter(crosswalk.values(), dtype = int)
     idxs[lake_index_intersect[1]] = linkids[lake_index_intersect[2]]
 
     # (re) set the target_df index
-    target_df.set_index(idxs, inplace=True)
+    target_df.set_index(idxs, inplace = True)
 
     return target_df
 
@@ -102,6 +102,7 @@ def nwm_output_generator(
         cpu_pool,
         waterbodies_df,
         waterbody_types_df,
+        duplicate_ids_df,
         data_assimilation_parameters=False,
         lastobs_df=None,
         link_gage_df=None,
@@ -145,7 +146,7 @@ def nwm_output_generator(
         )
 
     ################### Output Handling
-
+    
     start_time = time.time()
 
     LOG.info(f"Handling output ...")
@@ -187,6 +188,7 @@ def nwm_output_generator(
         )
 
         if wbdyo and not waterbodies_df.empty:
+            
             # create waterbody dataframe for output to netcdf file
             i_columns = pd.MultiIndex.from_product(
                 [range(nts), ["i"]]
@@ -211,20 +213,20 @@ def nwm_output_generator(
             timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0)
             ts_set = set(timestep_index[0].tolist())
             flow_df_col_index = [i for i, e in enumerate(timestep) if e in ts_set]
-            flow_df = flow_df.iloc[:, flow_df_col_index]
+            flow_df = flow_df.iloc[:,flow_df_col_index]
 
             timestep, variable = zip(*wbdy.columns.tolist())
             timestep_index = np.where(((np.array(list(set(list(timestep)))) + 1) * dt) % (dt * qts_subdivisions) == 0)
             ts_set = set(timestep_index[0].tolist())
             wbdy_col_index = [i for i, e in enumerate(timestep) if e in ts_set]
-            i_df = wbdy.iloc[:, wbdy_col_index]
-            q_df = flow_df.iloc[:, 0::3]
-            d_df = flow_df.iloc[:, 2::3]
+            i_df = wbdy.iloc[:,wbdy_col_index]
+            q_df = flow_df.iloc[:,0::3]
+            d_df = flow_df.iloc[:,2::3]
 
         # replace waterbody lake_ids with outlet link ids
         if (link_lake_crosswalk):
             flowveldepth = _reindex_lake_to_link_id(flowveldepth, link_lake_crosswalk)
-
+            
         # todo: create a unit test by saving FVD array to disk and then checking that
         # it matches FVD array from parent branch or other configurations. 
         # flowveldepth.to_pickle(output_parameters['test_output'])
@@ -240,14 +242,14 @@ def nwm_output_generator(
                 ],
                 copy=False,
             )
-
+            
             # replace waterbody lake_ids with outlet link ids
             if link_lake_crosswalk:
                 # (re) set the flowveldepth index
-                courant.set_index(fvdidxs, inplace=True)
-
+                courant.set_index(fvdidxs, inplace = True)
+            
         LOG.debug("Constructing the FVD DataFrame took %s seconds." % (time.time() - start))
-
+    
     if stream_output:
         stream_output_directory = stream_output['stream_output_directory']
         stream_output_timediff = stream_output['stream_output_time']
@@ -256,7 +258,6 @@ def nwm_output_generator(
 
         nudge = np.concatenate([r[8] for r in results])
         usgs_positions_id = np.concatenate([r[3][0] for r in results])
-
         nhd_io.write_flowveldepth(
             Path(stream_output_directory), 
             flowveldepth, 
@@ -282,10 +283,10 @@ def nwm_output_generator(
                 preRunLog.write("Output of FVD data every "+str(fCalc)+" minutes\n")
                 preRunLog.write("Writing "+str(nTimeBins)+" time bins for "+str(len(flowveldepth.index))+" segments per FVD output file\n")               
             preRunLog.close()      
-            
+
     if test:
         flowveldepth.to_pickle(Path(test))
-
+    
     if wbdyo and not waterbodies_df.empty:
         
         time_index, tmp_variable = map(list,zip(*i_df.columns.tolist()))
@@ -295,12 +296,11 @@ def nwm_output_generator(
         else:
             output_waterbodies_df = waterbodies_df
             output_waterbody_types_df = waterbody_types_df
-
         LOG.info("- writing t-route flow results to LAKEOUT files")
         start = time.time()
-        for i in range(i_df.shape[1]):
+        for i in range(i_df.shape[1]):              
             nhd_io.write_waterbody_netcdf(
-                wbdyo,
+                wbdyo, 
                 i_df.iloc[:,[i]],
                 q_df.iloc[:,[i]],
                 d_df.iloc[:,[i]],
@@ -319,14 +319,14 @@ def nwm_output_generator(
                 preRunLog.write("Output of waterbody files into folder: "+str(wbdyo)+"\n") 
                 preRunLog.write("-----\n") 
             preRunLog.close()  
-
+        
         LOG.debug("writing LAKEOUT files took a total time of %s seconds." % (time.time() - start))
-
+    
     if rsrto:
 
         LOG.info("- writing restart files")
         start = time.time()
-
+        
         wrf_hydro_restart_dir = rsrto.get(
             "wrf_hydro_channel_restart_source_directory", None
         )
@@ -364,12 +364,10 @@ def nwm_output_generator(
                     preRunLog.close()  
 
             else:
-                LOG.critical(
-                    'Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.')
+                LOG.critical('Did not find any restart files in wrf_hydro_channel_restart_source_directory. Aborting restart write sequence.')
 
         else:
-            LOG.critical(
-                'wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.')
+            LOG.critical('wrf_hydro_channel_restart_source_directory not specified in configuration file. Aborting restart write sequence.')
 
         LOG.debug("writing restart files took %s seconds." % (time.time() - start))
 
@@ -377,12 +375,13 @@ def nwm_output_generator(
 
         LOG.info("- writing t-route flow results to CHRTOUT files")
         start = time.time()
-
+        
         chrtout_read_folder = chrto.get(
             "wrf_hydro_channel_output_source_folder", None
         )
 
         if chrtout_read_folder:
+            
             chrtout_files = sorted(
                 Path(chrtout_read_folder) / f for f in run["qlat_files"]
             )
@@ -404,8 +403,8 @@ def nwm_output_generator(
 
         LOG.debug("writing CHRTOUT files took a total time of %s seconds." % (time.time() - start))
 
-    if csv_output_folder:
-
+    if csv_output_folder: 
+    
         LOG.info("- writing flow, velocity, and depth results to .csv")
         start = time.time()
 
@@ -413,10 +412,10 @@ def nwm_output_generator(
         # TO DO: create more descriptive filenames
         if supernetwork_parameters.get("title_string", None):
             filename_fvd = (
-                    "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv"
+                "flowveldepth_" + supernetwork_parameters["title_string"] + ".csv"
             )
             filename_courant = (
-                    "courant_" + supernetwork_parameters["title_string"] + ".csv"
+                "courant_" + supernetwork_parameters["title_string"] + ".csv"
             )
         else:
             run_time_stamp = datetime.now().isoformat()
@@ -428,7 +427,7 @@ def nwm_output_generator(
         # no csv_output_segments are specified, then write results for all segments
         if not csv_output_segments:
             csv_output_segments = flowveldepth.index
-
+        
         flowveldepth = flowveldepth.sort_index()
         flowveldepth.loc[csv_output_segments].to_csv(output_path.joinpath(filename_fvd))
 
@@ -488,7 +487,7 @@ def nwm_output_generator(
 
         LOG.info("- writing t-route flow results at gage locations to CHANOBS file")
         start = time.time()
-
+        
         # replace waterbody lake_ids with outlet link ids
         if link_lake_crosswalk:
             link_gage_df = _reindex_lake_to_link_id(link_gage_df, link_lake_crosswalk)
@@ -499,15 +498,15 @@ def nwm_output_generator(
 
         nhd_io.write_chanobs(
             Path(chano['chanobs_output_directory'] + chano['chanobs_filepath']),
-            flowveldepth,
-            link_gage_df,
-            t0,
-            dt,
+            flowveldepth, 
+            link_gage_df, 
+            t0, 
+            dt, 
             nts,
             # TODO allow user to pass a list of segments at which they would like to print results
             # rather than just printing at gages. 
         )
-        
+
         if (not logFileName == 'NONE'):
             with open(logFileName, 'a') as preRunLog:
                 preRunLog.write("\n") 
@@ -518,7 +517,7 @@ def nwm_output_generator(
 
         LOG.debug("writing flow data to CHANOBS took %s seconds." % (time.time() - start))       
 
-    if lastobso:
+    if lastobso:      
         # Write out LastObs as netcdf when using main_v04 or troute_model with HYfeature.
         # This is only needed if 1) streamflow nudging is ON and 2) a lastobs output
         # folder is provided by the user.
@@ -555,22 +554,23 @@ def nwm_output_generator(
 
             LOG.debug("writing lastobs files took %s seconds." % (time.time() - start))
 
-    if 'flowveldepth' in locals():
-        LOG.debug(flowveldepth)
+    # if 'flowveldepth' in locals():
+    #    LOG.debug(flowveldepth)
 
     LOG.debug("output complete in %s seconds." % (time.time() - start_time))
 
     ################### Parity Check
 
     if parity_set:
+
         LOG.info(
             "conducting parity check, comparing WRF Hydro results against t-route results"
         )
-
+    
         start_time = time.time()
-
+        
         parity_check(
             parity_set, results,
         )
-        
+
         LOG.debug("parity check complete in %s seconds." % (time.time() - start_time))

From 11556c0fcde67476bb58a6313df468a82dcedae8 Mon Sep 17 00:00:00 2001
From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com>
Date: Fri, 26 Jul 2024 16:37:26 -0400
Subject: [PATCH 14/29] mask output  (#806)

* mask_output

* deleting pdb

* error on NHD

* change in yaml file

* handling some edge cases

* add 9999 to record all nex

* delete extra pdb

* update to run v3

* adding 9999 for wb

* speeding up the process
---
 .../troute/config/output_parameters.py        |   1 +
 src/troute-network/troute/AbstractNetwork.py  |  10 +-
 .../troute/HYFeaturesNetwork.py               |  12 ++
 src/troute-network/troute/NHDNetwork.py       |   3 +-
 src/troute-network/troute/nhd_io.py           | 113 +++++++++++++++++-
 src/troute-nwm/src/nwm_routing/__main__.py    |  16 ++-
 src/troute-nwm/src/nwm_routing/output.py      |  51 ++++----
 .../domain/mask_output.yaml                   |  25 ++++
 .../test_AnA_V4_HYFeature.yaml                |   1 +
 9 files changed, 203 insertions(+), 29 deletions(-)
 create mode 100644 test/LowerColorado_TX_v4/domain/mask_output.yaml

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index b367582d2..983c1d9fb 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -94,6 +94,7 @@ class ParityCheckCompareFileSet(BaseModel):
 class StreamOutput(BaseModel):
     # NOTE: required if writing StreamOutput files
     stream_output_directory: Optional[DirectoryPath] = None
+    mask_output: Optional[FilePath] = None
     stream_output_time: int = 1
     stream_output_type:streamOutput_allowedTypes = ".nc"
     stream_output_internal_frequency: Annotated[int, Field(strict=True, ge=5)] = 5
diff --git a/src/troute-network/troute/AbstractNetwork.py b/src/troute-network/troute/AbstractNetwork.py
index 344bd9919..5bbdffa4b 100644
--- a/src/troute-network/troute/AbstractNetwork.py
+++ b/src/troute-network/troute/AbstractNetwork.py
@@ -34,7 +34,7 @@ class AbstractNetwork(ABC):
                 "supernetwork_parameters", "waterbody_parameters","data_assimilation_parameters",
                 "restart_parameters", "compute_parameters", "forcing_parameters",
                 "hybrid_parameters", "preprocessing_parameters", "output_parameters",
-                "verbose", "showtiming", "break_points", "_routing"]
+                "verbose", "showtiming", "break_points", "_routing", "_nexus_dict", "_poi_nex_dict"]
     
     def __init__(self, from_files=True, value_dict={}):
 
@@ -371,6 +371,14 @@ def gages(self):
     @property
     def dataframe(self):
         return self._dataframe
+    
+    @property
+    def nexus_dict(self):
+        return self._nexus_dict
+    
+    @property
+    def poi_nex_dict(self):
+        return self._poi_nex_dict
 
     @property
     def terminal_codes(self):
diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
index b476df709..f5eb91bc7 100644
--- a/src/troute-network/troute/HYFeaturesNetwork.py
+++ b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -282,6 +282,8 @@ def __init__(self,
 
             # Preprocess network objects
             self.preprocess_network(flowpaths, nexus)
+            
+            self.crosswalk_nex_flowpath_poi(flowpaths, nexus)
 
             # Preprocess waterbody objects
             self.preprocess_waterbodies(lakes, nexus)
@@ -415,6 +417,16 @@ def preprocess_network(self, flowpaths, nexus):
         # to the model engine/coastal models
         self._nexus_latlon = nexus
 
+    def crosswalk_nex_flowpath_poi(self, flowpaths, nexus):
+        
+        mask_flowpaths = flowpaths['toid'].str.startswith(('nex-', 'tnex-'))
+        filtered_flowpaths = flowpaths[mask_flowpaths]
+        self._nexus_dict = filtered_flowpaths.groupby('toid')['id'].apply(list).to_dict()  ##{id: toid}
+        if 'poi_id' in nexus.columns:
+            self._poi_nex_dict = nexus.groupby('poi_id')['id'].apply(list).to_dict()
+        else:
+            self._poi_nex_dict = None
+
     def preprocess_waterbodies(self, lakes, nexus):
         # If waterbodies are being simulated, create waterbody dataframes and dictionaries
         if not lakes.empty:
diff --git a/src/troute-network/troute/NHDNetwork.py b/src/troute-network/troute/NHDNetwork.py
index 115280b2c..ee4afcf52 100644
--- a/src/troute-network/troute/NHDNetwork.py
+++ b/src/troute-network/troute/NHDNetwork.py
@@ -45,7 +45,8 @@ def __init__(
         self.output_parameters = output_parameters
         self.verbose = verbose
         self.showtiming = showtiming
-
+        self._poi_nex_dict = None
+        self._nexus_dict = None
         if self.verbose:
             print("creating supernetwork connections set")
         if self.showtiming:
diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py
index 645af585f..de50b0847 100644
--- a/src/troute-network/troute/nhd_io.py
+++ b/src/troute-network/troute/nhd_io.py
@@ -2007,6 +2007,8 @@ def write_flowveldepth_netcdf(stream_output_directory, file_name,
         # ============ DIMENSIONS ===================
         _ = ncfile.createDimension('feature_id', len(flow))
         _ = ncfile.createDimension('time', len(timestamps))
+        max_str_len = max(len(str(x)) for x in flow.index.get_level_values('Type'))
+        _ = ncfile.createDimension('type_strlen', max_str_len)
         #_ = ncfile.createDimension('gage', gage)
         #_ = ncfile.createDimension('nudge_timestep', nudge_timesteps)  # Add dimension for nudge time steps
         
@@ -2048,12 +2050,24 @@ def write_flowveldepth_netcdf(stream_output_directory, file_name,
             datatype = 'int64',
             dimensions = ("feature_id",),
         )
-        FEATURE_ID[:] = flow.index
+        FEATURE_ID[:] = flow.index.get_level_values('featureID')
         ncfile['feature_id'].setncatts(
             {
                 'long_name': 'Segment ID',
             }
         )
+        # =========== type VARIABLE ===============
+        TYPE = ncfile.createVariable(
+            varname="type",
+            datatype=f'S{max_str_len}',
+            dimensions=("feature_id",),
+        )
+        TYPE[:] = np.array(flow.index.get_level_values('Type').astype(str).tolist(), dtype=f'S{max_str_len}')
+        ncfile['type'].setncatts(
+            {
+                'long_name': 'Type',
+            }
+        )
 
         # =========== flow VARIABLE ===============            
         flow_var = ncfile.createVariable(
@@ -2128,9 +2142,99 @@ def write_flowveldepth_netcdf(stream_output_directory, file_name,
                 'code_version': '',
             }
         )
+def stream_output_mask_reader(stream_output_mask):
+    if not stream_output_mask:
+        return {}
+    with open(stream_output_mask, 'r') as file:
+        mask_list = yaml.safe_load(file)
+    
+    return mask_list
+
+def mask_find_seg(mask_list, nexus_dict, poi_crosswalk):
+    seg_id = []
+    nex_id = {}
+    
+    if not mask_list:
+        return nex_id, seg_id
+    if 'wb' in mask_list and mask_list['wb']:
+        if 9999 not in mask_list['wb']: 
+            seg_id.extend(mask_list['wb'])
+        else:
+            seg_id.extend([9999])
+
+    if 'nex' in mask_list and mask_list['nex']:
+        if 9999 in mask_list['nex']:
+            
+            for key, val in nexus_dict.items():
+                nex_key = int(key.split('-')[-1])
+                nex_id[nex_key] = [int(v.split('-')[-1]) for v in val]
+        else:
+            for key in mask_list['nex']:
+                item = f'nex-{key}'
+                nex_id[key] = [int(val.split('-')[-1]) for val in nexus_dict.get(item, [])]
+
+    return nex_id, seg_id
+
+def updated_flowveldepth(flowveldepth, nex_id, seg_id, mask_list):
+    flowveldepth.index.name = 'featureID'
+    flowveldepth['Type'] = 'wb'
+    flowveldepth.set_index('Type', append=True, inplace=True)
+
+    def create_mask(ids):
+        if 9999 in ids:
+            ids = flowveldepth.index.get_level_values('featureID')
+        
+        masked = (flowveldepth.index.get_level_values('featureID').isin(ids)) & (flowveldepth.index.get_level_values('Type') == 'wb')
+        return masked
+    
+    flowveldepth_seg = flowveldepth[create_mask(seg_id)] if seg_id else pd.DataFrame()
+
+    if nex_id:    
+        nex_df = pd.DataFrame([(nex, wb) for nex, wbs in nex_id.items() for wb in wbs], columns=['nex', 'featureID'])
+        flowveldepth_reset = flowveldepth.reset_index()
+        merge_flowveldepth_reset = flowveldepth_reset.merge(nex_df, on='featureID', how='left')
+        
+        merge_flowveldepth_reset = merge_flowveldepth_reset.dropna(subset=['nex'])
+        merge_flowveldepth_reset['nex'] = merge_flowveldepth_reset['nex'].astype(int)
+        # Define custom aggregation functions
+        def sum_q_columns(group):
+            q_columns = [col for col in group.columns if col[1] == 'q']
+            return group[q_columns].sum()
+
+        def custom_v(group):
+            v_columns = [col for col in group.columns if col[1] == 'v']
+            v_values = group[v_columns]
+            if len(v_values) > 1:
+                return pd.Series({col: np.nan for col in v_values.columns})
+            else:
+                return v_values.iloc[0]
+
+        def avg_d_columns(group):
+            d_columns = [col for col in group.columns if col[1] == 'd']
+            return group[d_columns].mean()
+
+        # Apply the groupby with the custom aggregation functions
+        def custom_agg(group):
+            result = pd.concat([sum_q_columns(group), custom_v(group), avg_d_columns(group)])
+            return result 
+            
+        all_nex_data = merge_flowveldepth_reset.groupby('nex').apply(custom_agg)
+        all_nex_data.index = all_nex_data.index.rename('featureID')
+        all_nex_data['Type'] = 'nex'
+        # Set the new 'Type' column as an index
+        all_nex_data = all_nex_data.set_index('Type', append=True)
+        
+    else:
+        all_nex_data = pd.DataFrame()
+    
+    if not flowveldepth_seg.empty or not all_nex_data.empty:
+        flowveldepth = pd.concat([flowveldepth_seg, all_nex_data])
+    
+    return flowveldepth
 
 def write_flowveldepth(
     stream_output_directory,
+    stream_output_mask,
     flowveldepth,
     nudge,
     usgs_positions_id,
@@ -2140,6 +2244,8 @@ def write_flowveldepth(
     stream_output_type,
     stream_output_internal_frequency = 5,
     cpu_pool = 1,
+    poi_crosswalk = None,
+    nexus_dict= None,
     ):
     '''
     Write the results of flowveldepth and nudge to netcdf- break. 
@@ -2150,6 +2256,11 @@ def write_flowveldepth(
     nudge (numpy.ndarray) - nudge data with shape (76, 289)
     usgs_positions_id (array) - Position ids of usgs gages
     '''
+    
+    mask_list = stream_output_mask_reader(stream_output_mask)
+    nex_id, seg_id = mask_find_seg(mask_list, nexus_dict, poi_crosswalk)
+    flowveldepth = updated_flowveldepth(flowveldepth, nex_id, seg_id, mask_list)
+
     # timesteps, variable = zip(*flowveldepth.columns.tolist())
     # timesteps = list(timesteps)
     n_timesteps = flowveldepth.shape[1]//3
diff --git a/src/troute-nwm/src/nwm_routing/__main__.py b/src/troute-nwm/src/nwm_routing/__main__.py
index 4b2150240..01dbd4bad 100644
--- a/src/troute-nwm/src/nwm_routing/__main__.py
+++ b/src/troute-nwm/src/nwm_routing/__main__.py
@@ -109,10 +109,10 @@ def main_v04(argv):
     
     network_end_time = time.time()
     task_times['network_creation_time'] = network_end_time - network_start_time
-
+    
     # Create run_sets: sets of forcing files for each loop
     run_sets = network.build_forcing_sets()
-
+    
     # Create da_sets: sets of TimeSlice files for each loop
     if "data_assimilation_parameters" in compute_parameters:
         da_sets = hnu.build_da_sets(data_assimilation_parameters, run_sets, network.t0)
@@ -290,9 +290,13 @@ def main_v04(argv):
             forcing_end_time = time.time()
             task_times['forcing_time'] += forcing_end_time - route_end_time
 
-        
-        output_start_time = time.time()  
+        if network.poi_nex_dict:
+            poi_crosswalk = network.poi_nex_dict
+        else:
+            poi_crosswalk = dict()
 
+        output_start_time = time.time()  
+        
         #TODO Update this to work with either network type...
         nwm_output_generator(
             run,
@@ -311,7 +315,9 @@ def main_v04(argv):
             data_assimilation_parameters,
             data_assimilation.lastobs_df,
             network.link_gage_df,
-            network.link_lake_crosswalk, 
+            network.link_lake_crosswalk,
+            network.nexus_dict,
+            poi_crosswalk, 
             logFileName            
         )
         
diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 08bef8c78..38a6e6547 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -90,25 +90,28 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration, pref
 
 
 def nwm_output_generator(
-        run,
-        results,
-        supernetwork_parameters,
-        output_parameters,
-        parity_parameters,
-        restart_parameters,
-        parity_set,
-        qts_subdivisions,
-        return_courant,
-        cpu_pool,
-        waterbodies_df,
-        waterbody_types_df,
-        duplicate_ids_df,
-        data_assimilation_parameters=False,
-        lastobs_df=None,
-        link_gage_df=None,
-        link_lake_crosswalk=None,
-        logFileName='NONE'
+    run,
+    results,
+    supernetwork_parameters,
+    output_parameters,
+    parity_parameters,
+    restart_parameters,
+    parity_set,
+    qts_subdivisions,
+    return_courant,
+    cpu_pool,
+    waterbodies_df,
+    waterbody_types_df,
+    duplicate_ids_df,
+    data_assimilation_parameters=False,
+    lastobs_df = None,
+    link_gage_df = None,
+    link_lake_crosswalk = None,
+    nexus_dict = None,
+    poi_crosswalk = None,
+    logFileName='NONE' 
 ):
+  
     dt = run.get("dt")
     nts = run.get("nts")
     t0 = run.get("t0")
@@ -252,14 +255,18 @@ def nwm_output_generator(
     
     if stream_output:
         stream_output_directory = stream_output['stream_output_directory']
+        stream_output_mask = stream_output.get('mask_output',)
         stream_output_timediff = stream_output['stream_output_time']
         stream_output_type = stream_output['stream_output_type']
         stream_output_internal_frequency = stream_output['stream_output_internal_frequency']
-
+        if stream_output_mask:
+            stream_output_mask = Path(stream_output_mask)
+        
         nudge = np.concatenate([r[8] for r in results])
         usgs_positions_id = np.concatenate([r[3][0] for r in results])
         nhd_io.write_flowveldepth(
-            Path(stream_output_directory), 
+            Path(stream_output_directory),
+            stream_output_mask, 
             flowveldepth, 
             nudge, 
             usgs_positions_id, 
@@ -268,7 +275,9 @@ def nwm_output_generator(
             int(stream_output_timediff), 
             stream_output_type,
             stream_output_internal_frequency,
-            cpu_pool = cpu_pool
+            cpu_pool = cpu_pool,
+            poi_crosswalk = poi_crosswalk,
+            nexus_dict= nexus_dict,
             )
 
         if (not logFileName == 'NONE'):
diff --git a/test/LowerColorado_TX_v4/domain/mask_output.yaml b/test/LowerColorado_TX_v4/domain/mask_output.yaml
new file mode 100644
index 000000000..f5abd1588
--- /dev/null
+++ b/test/LowerColorado_TX_v4/domain/mask_output.yaml
@@ -0,0 +1,25 @@
+##### Description #####
+# If you want to mask flowpaths, make the list below.
+# please provide in either POI, NEXUS or flowpath ID.
+# by putting 9999 in nex or wb, it will record all the nexus or wb points
+#######################
+# mask:
+wb:
+  # - 9999
+  # - 2427667
+  # - 2427668
+  # - 2427671
+  - 2420805
+  - 2420804
+  - 2426394
+
+nex:
+  # - 9999       
+  # - 2427668
+  # - 2427669
+  - 2420805
+  - 2420803
+  # - 2420970
+  # - 2420863
+
+
diff --git a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
index 7f1990c8e..0e00ef5cd 100644
--- a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
+++ b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
@@ -131,6 +131,7 @@ compute_parameters:
 #     lastobs_output: lastobs/
 #     stream_output : 
 #         stream_output_directory: output/
+#         mask_output: domain/mask_output.yaml
 #         stream_output_time: 1 #[hr] ** Consider `stream_output_time = -1` means save everything in one file **
 #         stream_output_type: '.nc' #please select only between netcdf '.nc' or '.csv' or '.pkl'
 #         stream_output_internal_frequency: 60 #[min] it should be order of 5 minutes. For instance if you want to output every hour put 60    

From 0e6095943715283040b603c3b90f26c45106a9dd Mon Sep 17 00:00:00 2001
From: Neptune-Meister <140385333+JurgenZach-NOAA@users.noreply.github.com>
Date: Mon, 29 Jul 2024 07:13:39 -0700
Subject: [PATCH 15/29] Included pip & venv based instructions for WSL2 (#812)
 (#813)

* Included pip & venv based instructions for WSL2 (#812)

Possible Cython compile errors in T-route caused by PR784 (https://github.com/NOAA-OWP/t-route/pull/784) have resulted in a recommendation to compile in "no-editable' mode (PR 801, https://github.com/NOAA-OWP/t-route/pull/801). The no-e option, in turn, has resulted in the necessity to recompile T-route after each edit of python code.

The instructions provided here showcase how T-route can be installed in a manner compatible with PR 784 while avoiding the non-editable compile option. It is based on pip and venv, avoiding conda altogether. It has been implemented on WSL, but should be readily adaptable to the same recent long-term stable Ubuntu distros (22.04 and 20.04).

* Updated shell formatting

* Update readme.md

* Update readme.md

* Only clone single branch

* Included warning about mixing pip and conda
---
 readme.md | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/readme.md b/readme.md
index 1e68b5ed3..a0bc3fe45 100644
--- a/readme.md
+++ b/readme.md
@@ -111,6 +111,120 @@ python3 -m nwm_routing -f -V4 test_AnA_V4_HYFeature.yaml
 
 **Note**: The following instructions are for setting up T-Route on a Linux environment (standalone, no MPI). If you are using Windows, please install WSL (Windows Subsystem for Linux) before proceeding.
 
+### T-Route Setup and Testing Guide for Windows Users WITHOUT conda [based on pip and venv - only widely available dependencies].
+### WARNING: INSTALLATION WITHIN EXISTING MINICONDA/CONDA VIRTUAL ENVIRONMENT NOT RECOMMENDED, PIP AND CONDA DO NOT MIX WELL, AND YOU MAY BREAK YOUR CONDA ENVIRONMENT!
+
+1. **Install Recommended distro:**
+   - Download and install WSL2 for your Windows OS
+   - We recommend long-term stable (LTS) Ubuntu distribution 22.04 or 20.04 (24.04 not recommended yet)
+   - Open Windows Power Shell and issue
+      ``` shell
+      wsl --install Ubuntu-22.04
+      ```
+   - Enter (root) username and password (2x) of your choice
+     
+2. **Set up venv-based virtual environment:**
+   - From root (the username you created under 1):
+      - Update Linux distro:
+        ```shell
+        sudo apt update
+        ```
+      - Install pip (package manager):
+        ```shell
+        sudo apt install python3-pip
+        ```
+      - Install venv:
+         ```shell
+         sudo apt install python3.10-venv
+         ```
+      - Create a virtual environment for T-route (named here 'troute-env1'):
+         ```shell
+         python3 -m venv troute_env1
+         ```
+      - Activate your shiny new virtual environment:
+         ```shell
+         source troute_env1/bin/activate
+         ```
+      - Now, the command prompts in the Shell window should start with (troute-env1)
+ 
+3. **Clone T-route:**
+   - Go to a folder of your choice (here, your home folder) and create a T-route directory
+      ```shell
+      mkdir troute1
+      cd troute1
+      ```
+   - Clone a T-route repository (the current main branch is used as an example):
+      ```shell
+      git clone --progress --single-branch --branch master http://github.com/NOAA-OWP/t-route.git
+      cd troute1
+      ```
+   - Install python packages per requirements file
+      ```shell
+      pip install -r requirements.txt
+      ```
+
+4. **Download & build netcdf fortran libraries from UCAR:**
+   - Go to a folder of your choice (here, your home folder) and download the source code:
+      ```shell
+      cd ~
+      wget https://downloads.unidata.ucar.edu/netcdf-fortran/4.6.1/netcdf-fortran-4.6.1.tar.gz
+      ```
+   - Unzip it:
+      ```shell
+      tar xvf netcdf-fortran-4.6.1.tar.gz
+      ```
+   - Enter the directory:
+      ```shell
+      cd netcdf-fortran-4.6.1/
+      ```
+   - Install some prerequisites (Fortran compiler, build essentials, standard C-netcdf library):
+      ```shell
+      sudo apt install gfortran
+      sudo apt install build-essential
+      sudo apt-get install libnetcdf-dev
+      ```
+   - Configure the fortran-netcdf libraries:
+      ```shell
+      ./configure
+      ```
+   - There should be no error message, and the output log should end up with something like:
+     ![image](https://github.com/user-attachments/assets/48268212-0b74-4f75-9d52-97f68e6c80d0)
+   - Check the installation (running two sets of examples):
+      ```shell
+      make check
+      ```
+   - Again, there should be no error message (expect some warnings, though), and output should end with "passing" of two sets:
+     ![image](https://github.com/user-attachments/assets/83745989-9f14-4c1b-a2a1-675aa94e5181)
+   - Finally, install the libraries:
+      ```shell
+      sudo make install
+      ```
+   - Output should be something like:
+      ![image](https://github.com/user-attachments/assets/57e48501-18f4-4004-9b10-5a9245186e38)
+
+5. **Build and test T-route:**
+   - Go to your T-route folder:
+      ```shell
+      cd ~/troute1
+      ```
+   - Compile T-route (may take a few minutes, depending on the machine):
+      ```shell
+      ./compiler.sh
+      ```
+   - Set path to runtime netcdf-Fortran library [recommend including this in the .bashrc file or your equivalent]:
+      ```shell
+      export LD_LIBRARY_PATH=/usr/local/lib/
+      ```
+   - Run one of the demo examples provided:
+      ```shell
+      cd test/LowerColorado_TX
+      python -m nwm_routing -f -V3 test_AnA.yaml
+      ```
+   - The latter is a hybrid (MC + diffusive) routing example that should run within a few minutes at most
+
+
+### T-Route Setup Instructions and Troubleshooting Guide for Windows Users - Legacy Conda Version [may have to be built with compiler.sh no-e option]
+
 1. **Install Required Components:**
    - Open the WSL terminal.
    - Install Miniconda, Python, Pip, and Git.

From a7260b78531f6dfb39638b07189310affdafb179 Mon Sep 17 00:00:00 2001
From: shorvath-noaa <103054653+shorvath-noaa@users.noreply.github.com>
Date: Wed, 31 Jul 2024 10:49:10 -0600
Subject: [PATCH 16/29] Great Lakes Data Assimilation (#808)

* update creation of great lakes climatology

* pass great lakes DA info through to compute

* update properties handling great lakes info

* set default empty df for great lakes info

* preprocess great lakes DA info, create 'update_after_compute()' function

* create new get_timeslice_obs function specific for great lakes

* add option for great lakes DA

* function for performing great lakes DA

* finalize great_lakes.update_after_compute() function

* define after compute functions for great lakes DA

* update _prep_reservoir_da_dataframes() to include great lakes

* refine passing objects to great lakes da module

* finalize da function for great lakes

* update creation of great lakes climatology

* pass great lakes DA info through to compute

* update properties handling great lakes info

* set default empty df for great lakes info

* preprocess great lakes DA info, create 'update_after_compute()' function

* create new get_timeslice_obs function specific for great lakes

* add option for great lakes DA

* function for performing great lakes DA

* finalize great_lakes.update_after_compute() function

* define after compute functions for great lakes DA

* update _prep_reservoir_da_dataframes() to include great lakes

* refine passing objects to great lakes da module

* finalize da function for great lakes

* add default empty dataframe for gl_climatology_df_sub

* add column names for empty data frame gl_df_sub

* set default empty data frames for great lakes dfs

* move _canadian_gage_df slot from HYFeatures to AbstractNetwork

* check if list of new great_lakes_param_dfs is empty before trying to concat

* set default _canadian_gage_link_df in NHDNetwork

* update DA settings for test cases

* add empty placeholders in diffusive results for great lakes DA
---
 src/troute-network/troute/AbstractNetwork.py  |  12 +-
 src/troute-network/troute/DataAssimilation.py | 302 ++++++++++----
 .../troute/HYFeaturesNetwork.py               |  14 +-
 src/troute-network/troute/NHDNetwork.py       |   2 +
 src/troute-network/troute/nhd_io.py           |  90 +++++
 .../troute/rfc_lake_gage_crosswalk.py         |  17 +-
 src/troute-nwm/src/nwm_routing/__main__.py    |  12 +
 src/troute-routing/troute/routing/compute.py  | 107 ++++-
 .../troute/routing/fast_reach/mc_reach.pyx    | 376 ++++++++++--------
 .../routing/fast_reach/reservoir_GL_da.py     | 131 ++++++
 test/LowerColorado_TX/test_AnA_V4_NHD.yaml    |   6 +-
 .../test_AnA_V4_HYFeature.yaml                |   2 +-
 12 files changed, 818 insertions(+), 253 deletions(-)
 create mode 100644 src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py

diff --git a/src/troute-network/troute/AbstractNetwork.py b/src/troute-network/troute/AbstractNetwork.py
index 5bbdffa4b..830be578e 100644
--- a/src/troute-network/troute/AbstractNetwork.py
+++ b/src/troute-network/troute/AbstractNetwork.py
@@ -27,6 +27,7 @@ class AbstractNetwork(ABC):
     __slots__ = ["_dataframe", "_waterbody_connections", "_gages",  
                 "_terminal_codes", "_connections", "_waterbody_df", 
                 "_waterbody_types_df", "_waterbody_type_specified", "_link_gage_df",
+                "_canadian_gage_link_df",
                 "_independent_networks", "_reaches_by_tw", "_flowpath_dict",
                 "_reverse_network", "_q0", "_t0", "_link_lake_crosswalk",
                 "_usgs_lake_gage_crosswalk", "_usace_lake_gage_crosswalk", "_rfc_lake_gage_crosswalk",
@@ -34,7 +35,8 @@ class AbstractNetwork(ABC):
                 "supernetwork_parameters", "waterbody_parameters","data_assimilation_parameters",
                 "restart_parameters", "compute_parameters", "forcing_parameters",
                 "hybrid_parameters", "preprocessing_parameters", "output_parameters",
-                "verbose", "showtiming", "break_points", "_routing", "_nexus_dict", "_poi_nex_dict"]
+                "verbose", "showtiming", "break_points", "_routing", "_gl_climatology_df", "_nexus_dict", "_poi_nex_dict"]
+
     
     def __init__(self, from_files=True, value_dict={}):
 
@@ -410,6 +412,14 @@ def refactored_reaches(self):
     @property
     def unrefactored_topobathy_df(self):
         return self._routing.unrefactored_topobathy_df
+    
+    @property
+    def great_lakes_climatology_df(self):
+        return self._gl_climatology_df
+    
+    @property
+    def canadian_gage_df(self):
+        return self._canadian_gage_link_df
 
         
     def set_synthetic_wb_segments(self, synthetic_wb_segments, synthetic_wb_id_offset):
diff --git a/src/troute-network/troute/DataAssimilation.py b/src/troute-network/troute/DataAssimilation.py
index 7dd89a8bd..0c7971616 100644
--- a/src/troute-network/troute/DataAssimilation.py
+++ b/src/troute-network/troute/DataAssimilation.py
@@ -35,7 +35,7 @@ class AbstractDA(ABC):
                  "_reservoir_usgs_df", "_reservoir_usgs_param_df", 
                  "_reservoir_usace_df", "_reservoir_usace_param_df",
                  "_reservoir_rfc_df", "_reservoir_rfc_synthetic",
-                 "_reservoir_rfc_param_df", 
+                 "_reservoir_rfc_param_df", "_great_lakes_df", "_great_lakes_param_df",
                  "_dateNull", 
                  "_datesSecondsArray_usgs", "_nDates_usgs", "_stationArray_usgs", 
                  "_stationStringLengthArray_usgs", "_nStations_usgs",
@@ -213,7 +213,7 @@ def __init__(self, network, from_files, value_dict, da_run=[]):
                     self._last_obs_df = _reindex_link_to_lake_id(self._last_obs_df, network.link_lake_crosswalk)
                 
                 self._usgs_df = _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run)
-                if 'canada_timeslice_files' in da_run:
+                if ('canada_timeslice_files' in da_run) & (not network.canadian_gage_df.empty):
                     self._canada_df = _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run)
                     self._canada_is_created = True                    
         LOG.debug("NudgingDA class is completed in %s seconds." % (time.time() - main_start_time))
@@ -272,7 +272,7 @@ def update_for_next_loop(self, network, da_run,):
         
         if streamflow_da_parameters.get('streamflow_nudging', False):
             self._usgs_df = _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run)
-            if 'canada_timeslice_files' in da_run:
+            if ('canada_timeslice_files' in da_run) & (not network.canadian_gage_df.empty):
                 self._canada_df = _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run)
             else:
                 self._canada_df = pd.DataFrame()
@@ -543,6 +543,7 @@ def __init__(self, network, from_files, value_dict, da_run=[]):
         if not self._usgs_df.empty:
             self._usgs_df = self._usgs_df.loc[:,network.t0:]
         LOG.debug("PersistenceDA class is completed in %s seconds." % (time.time() - PersistenceDA_start_time))
+    
     def update_after_compute(self, run_results,):
         '''
         Function to update data assimilation object after running routing module.
@@ -592,12 +593,12 @@ def update_for_next_loop(self, network, da_run,):
         run_parameters = self._run_parameters
 
         # update usgs_df if it is not empty
-        streamflow_da_parameters = data_assimilation_parameters.get('streamflow_da', None)
-        reservoir_da_parameters = data_assimilation_parameters.get('reservoir_da', None)
+        streamflow_da_parameters = data_assimilation_parameters.get('streamflow_da', {})
+        reservoir_da_parameters = data_assimilation_parameters.get('reservoir_da', {})
         
-        if not self._usgs_df.empty:
-            
-            if reservoir_da_parameters.get('reservoir_persistence_da').get('reservoir_persistence_usgs', False):
+        if not self.usgs_df.empty:
+
+            if reservoir_da_parameters.get('reservoir_persistence_da',{}).get('reservoir_persistence_usgs', False):
                 
                 gage_lake_df = (
                     network.usgs_lake_gage_crosswalk.
@@ -630,17 +631,16 @@ def update_for_next_loop(self, network, da_run,):
                 
                 # subset and re-index `usgs_df`, using the segID <> lakeID crosswalk
                 self._reservoir_usgs_df = (
-                    usgs_df_15min.join(link_lake_df, how = 'inner').
-                    reset_index().
-                    set_index('usgs_lake_id').
-                    drop(['link'], axis = 1)
-                )
-
+                        usgs_df_15min.join(link_lake_df, how = 'inner').
+                        reset_index(drop=True).
+                        set_index('usgs_lake_id')
+                    )
+                
                 # replace link ids with lake ids, for gages at waterbody outlets, 
                 # otherwise, gage data will not be assimilated at waterbody outlet
                 # segments.
                 if network.link_lake_crosswalk:
-                    self._usgs_df = _reindex_link_to_lake_id(self._usgs_df, network.link_lake_crosswalk)
+                    self._usgs_df = _reindex_link_to_lake_id(self.usgs_df, network.link_lake_crosswalk)
         
         elif reservoir_da_parameters.get('reservoir_persistence_usgs', False):
             (
@@ -655,6 +655,12 @@ def update_for_next_loop(self, network, da_run,):
                 da_run,
                 lake_gage_crosswalk = network.usgs_lake_gage_crosswalk,
                 res_source = 'usgs')
+            
+            # replace link ids with lake ids, for gages at waterbody outlets, 
+            # otherwise, gage data will not be assimilated at waterbody outlet
+            # segments.
+            if network.link_lake_crosswalk:
+                usgs_df = _reindex_link_to_lake_id(usgs_df, network.link_lake_crosswalk)
         
         # USACE
         if reservoir_da_parameters.get('reservoir_persistence_da').get('reservoir_persistence_usace', False):
@@ -717,62 +723,114 @@ def __init__(self, network, from_files, value_dict, da_run):
         run_parameters = self._run_parameters
         reservoir_persistence_da = data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_da', {})
 
+        self._great_lakes_df = pd.DataFrame()
+        self._great_lakes_param_df = pd.DataFrame()
+        
         if reservoir_persistence_da:
             greatLake = reservoir_persistence_da.get('reservoir_persistence_greatLake', False)
 
         if greatLake:
 
-            streamflow_da_parameters = data_assimilation_parameters.get('streamflow_da', {})
+            GL_crosswalk_df = pd.DataFrame(
+                {
+                    'link': [4800002,4800004,4800006],
+                    'gages': ['04127885','04159130','02HA013']
+                }
+            ).set_index('link')
             
-            if not self._canada_is_created and ('canada_timeslice_files' in da_run):
-                self._canada_df = _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run)
-                self._canada_is_created = True
-
-            if 'LakeOntario_outflow' in da_run:
-                self._lake_ontario_df = _create_LakeOntario_df(run_parameters, network, da_run)
-            else:
-                self._lake_ontario_df = pd.DataFrame()        
-
-            lake_ontario_df = self._lake_ontario_df
-            canada_df = self._canada_df
-            
-            ids_to_check = {'4800002': '04127885', '4800004': '13196034'}
-            # the segment corresponding to 04127885 gages isn't exist as of now. Should be replaced in future
-            # Initialize an empty DataFrame with the same columns as the usgs DataFrame
-            if self._usgs_df.empty:
-                self._usgs_df = _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run)
-            
-            usgs_df_GL = pd.DataFrame(columns=self._usgs_df.columns)
-
-            # Check if any ids are present in the index
-            for key, value in ids_to_check.items():
-                if value in self._usgs_df.index:
-                    temp_df = (self._usgs_df.loc[[value]]
-                            .transpose()
-                            .resample('15min')
-                            .asfreq()
-                            .transpose())
-                    temp_df.index = [key]
-                else:
-                    temp_df = pd.DataFrame(index=[key], columns=self._usgs_df.columns)
-                
-                usgs_df_GL = pd.concat([usgs_df_GL, temp_df], axis=0)   
+            self._great_lakes_df, self._great_lakes_param_df = _create_GL_dfs(
+                GL_crosswalk_df,
+                data_assimilation_parameters,
+                run_parameters,
+                da_run,
+                network.t0,
+            )
             
-            if not lake_ontario_df.empty:
-                lake_ontario_df = lake_ontario_df.T.reset_index().drop('index', axis = 1)
-            else:
-                lake_ontario_df = pd.DataFrame(columns=self._usgs_df.columns)
-            lake_ontario_df['link'] = 4800007
-            lake_ontario_df.set_index('link', inplace=True)
+        LOG.debug("great_lake class is completed in %s seconds." % (time.time() - great_lake_start_time))
+    
+    def update_after_compute(self, run_results, time_increment):
+        '''
+        Function to update data assimilation object after running routing module.
+        
+        Arguments:
+        ----------
+        - run_results (list): output from the compute kernel sequence,
+                              organized (because that is how it comes 
+                              out of the kernel) by network.
+                              For each item in the result, there are 
+                              10 elements, the 9th  of which are lists of 
+                              four elements containing: 
+                              1) a list of the segments ids where data 
+                              assimilation was performed (if any) in that network; 
+                              2) a list of the previously persisted outflow;
+                              3) a list of the previously assimilated observation times; 
+                              4) a list of the update time.
+        
+        Returns:
+        --------
+        - data_assimilation            (Object): Object containing all data assimilation information
+            - _great_lakes_param_df (DataFrame): Great Lakes reservoir DA parameters
+        '''
+        # get reservoir DA initial parameters for next loop iteration
+        great_lakes_param_df = pd.DataFrame()
+        tmp_list = []
+        for r in run_results:
             
-            if canada_df.empty:
-                canada_df = pd.DataFrame(columns=self._usgs_df.columns, index=pd.Index([4800006], name='link'))
+            if len(r[9][0]) > 0:
+                tmp_df = pd.DataFrame(data = r[9][0], columns = ['lake_id'])
+                tmp_df['previous_assimilated_outflows'] = r[9][1]
+                tmp_df['previous_assimilated_time'] = r[9][2]
+                tmp_df['update_time'] = r[9][3]
+                tmp_list.append(tmp_df)
+        
+        if tmp_list:
+            great_lakes_param_df = pd.concat(tmp_list)
+            great_lakes_param_df['previous_assimilated_time'] = great_lakes_param_df['previous_assimilated_time'] - time_increment
+            great_lakes_param_df['update_time'] = great_lakes_param_df['update_time'] - time_increment
+        
+        self._great_lakes_param_df = great_lakes_param_df
+
+    def update_for_next_loop(self, network, da_run,):
+        '''
+        Function to update data assimilation object for the next loop iteration.
+        
+        Arguments:
+        ----------
+        - network                    (Object): network object created from abstract class
+        - da_run                       (list): list of data assimilation files separated
+                                               by for loop chunks
+        
+        Returns:
+        --------
+        - data_assimilation               (Object): Object containing all data assimilation information
+            - reservoir_usgs_df        (DataFrame): USGS reservoir observations
+            - reservoir_usace_df       (DataFrame): USACE reservoir observations
+        '''
+        greatLake = False
+        data_assimilation_parameters = self._data_assimilation_parameters
+        run_parameters = self._run_parameters
+        reservoir_persistence_da = data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_da', {})
 
-            # List of DataFrames
-            dfs = [lake_ontario_df, canada_df, usgs_df_GL]
+        if reservoir_persistence_da:
+            greatLake = reservoir_persistence_da.get('reservoir_persistence_greatLake', False)
+        
+        if greatLake:
+
+            GL_crosswalk_df = pd.DataFrame(
+                {
+                    'link': [4800002,4800004,4800006],
+                    'gages': ['04127885','04159130','02HA013']
+                }
+            ).set_index('link')
+            
+            self._great_lakes_df, _ = _create_GL_dfs(
+                GL_crosswalk_df,
+                data_assimilation_parameters,
+                run_parameters,
+                da_run,
+                network.t0,
+            )
             
-            self.great_lake_all = pd.concat(dfs, axis=0, join='outer', ignore_index=False)
-        LOG.debug("great_lake class is completed in %s seconds." % (time.time() - great_lake_start_time))
 
 class RFCDA(AbstractDA):
     """
@@ -877,6 +935,7 @@ def __init__(self, network, from_files, value_dict):
                 self._reservoir_rfc_df = pd.DataFrame()
                 self._reservoir_rfc_param_df = pd.DataFrame()
         LOG.debug("RFCDA class is completed in %s seconds." % (time.time() - RFCDA_start_time))
+    
     def update_after_compute(self, run_results):
         '''
         Function to update data assimilation object after running routing module.
@@ -939,6 +998,7 @@ def update_after_compute(self, run_results, time_increment):
         NudgingDA.update_after_compute(self, run_results, time_increment)
         PersistenceDA.update_after_compute(self, run_results)
         RFCDA.update_after_compute(self, run_results)
+        great_lake.update_after_compute(self, run_results, time_increment)
 
     def update_for_next_loop(self, network, da_run,):
         '''
@@ -947,6 +1007,7 @@ def update_for_next_loop(self, network, da_run,):
         NudgingDA.update_for_next_loop(self, network, da_run)
         PersistenceDA.update_for_next_loop(self, network, da_run)
         RFCDA.update_for_next_loop(self)
+        great_lake.update_for_next_loop(self, network, da_run)
     
 
     @property
@@ -984,6 +1045,14 @@ def reservoir_rfc_df(self):
     @property
     def reservoir_rfc_param_df(self):
         return self._reservoir_rfc_param_df
+    
+    @property
+    def great_lakes_df(self):
+        return self._great_lakes_df
+
+    @property
+    def great_lakes_param_df(self):
+        return self._great_lakes_param_df
 
 
 # --------------------------------------------------------------
@@ -1083,19 +1152,13 @@ def _create_usgs_df(data_assimilation_parameters, streamflow_da_parameters, run_
     LOG.debug("Reading and preprocessing usgs timeslice files is completed in %s seconds." % (time.time() - usgs_df_start_time))
     return usgs_df
 
-def _create_LakeOntario_df(run_parameters, network, da_run):
+def _create_LakeOntario_df(run_parameters, t0, da_run):
     LOG.info("Creating Lake Ontario dataframe is started.")
     LakeOntario_df_start_time = time.time()    
-    t0 = network.t0
+    start_time = t0 - pd.Timedelta(weeks = 10)
     nts = run_parameters.get('nts')
     dt = run_parameters.get('dt')
-    end_time = pd.to_datetime(t0) + pd.Timedelta(hours = nts/(3600/dt))
-    time_total = []
-    t = t0
-    while t < end_time:
-        time_total.append(t)
-        t += pd.Timedelta(minutes=15)
-
+    end_time = t0 + pd.Timedelta(hours = nts/(3600/dt))
 
     lake_ontario_df = pd.read_csv(da_run.get('LakeOntario_outflow'))
     
@@ -1113,17 +1176,22 @@ def _create_LakeOntario_df(run_parameters, network, da_run):
     lake_ontario_df = lake_ontario_df.drop_duplicates()
     lake_ontario_df = lake_ontario_df.drop(['Date', 'Hour'], axis=1)
 
-    # Filter DataFrame based on extracted times
-    time_total_df = pd.DataFrame(time_total, columns=['Datetime'])
-    time_total_df['Outflow(m3/s)'] = None  # Initialize with None or NaN
-    time_total_df = time_total_df.set_index('Datetime')
+    # Rename outflow column to discharge
+    lake_ontario_df = lake_ontario_df.rename(columns={'Outflow(m3/s)': 'Discharge'})
     
+    # Filter for needed time stamps
+    lake_ontario_df = lake_ontario_df[(lake_ontario_df.index>=start_time) & (lake_ontario_df.index<=end_time)]
+
+    # Add 'link' column with waterbody ID
+    lake_ontario_df['link'] = 4800007
+    
+    # Reset index and convert Datetimes to strings
+    lake_ontario_df.reset_index(inplace=True)
+    lake_ontario_df['Datetime'] = lake_ontario_df['Datetime'].dt.strftime('%Y-%m-%d_%H:%M:%S')
     
-    filtered_df = lake_ontario_df.loc[(lake_ontario_df.index >= t0) & (lake_ontario_df.index < end_time)]
-    total_df = pd.merge(time_total_df, filtered_df, left_index=True, right_index=True, how='left')
-    total_df = total_df.rename(columns={'Outflow(m3/s)_y': 'Outflow(m3/s)'}).drop(columns='Outflow(m3/s)_x')
     LOG.debug("Creating Lake Ontario dataframe is completed in %s seconds." % (time.time() - LakeOntario_df_start_time))
-    return total_df
+    
+    return lake_ontario_df
 
 def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, run_parameters, network, da_run):
     '''
@@ -1141,7 +1209,7 @@ def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, ru
     
     Returns:
     --------
-    - usgs_df (DataFrame): dataframe of USGS gage observations
+    - canada_df (DataFrame): dataframe of Canadian gage observations
     '''
     canada_timeslices_folder = data_assimilation_parameters.get("canada_timeslices_folder", None)
     #lastobs_file           = streamflow_da_parameters.get("wrf_hydro_lastobs_file", None)
@@ -1159,11 +1227,10 @@ def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, ru
     canada_df_start_time = time.time()
     canada_files = [canada_timeslices_folder.joinpath(f) for f in da_run['canada_timeslice_files']]
     
-
     if canada_files:
         canada_df = (
             nhd_io.get_obs_from_timeslices(
-                network.link_gage_df,
+                network.canadian_gage_df,
                 crosswalk_gage_field,
                 crosswalk_segID_field,
                 canada_files,
@@ -1173,7 +1240,7 @@ def _create_canada_df(data_assimilation_parameters, streamflow_da_parameters, ru
                 network.t0,
                 run_parameters.get("cpu_pool", None)
             ).
-            loc[network.link_gage_df.index]
+            loc[network.canadian_gage_df.index]
         )
 
     else:
@@ -1982,3 +2049,74 @@ def _read_lastobs_file(
     LOG.debug(f"Reading last observation file is completed in %s seconds." % (time.time() - read_lastobs_start_time))
     return lastobs_df
 
+def _create_GL_dfs(GL_crosswalk_df, data_assimilation_parameters, run_parameters, 
+                   da_run, t0):
+
+    # USGS gages:
+    usgs_timeslices_folder = data_assimilation_parameters.get("usgs_timeslices_folder", None)
+
+    # TODO: join timeslice folder and files into complete path upstream
+    usgs_timeslices_folder = pathlib.Path(usgs_timeslices_folder)
+    usgs_files = [usgs_timeslices_folder.joinpath(f) for f in 
+                da_run['usgs_timeslice_files']]
+
+    usgs_GL_crosswalk_df = GL_crosswalk_df[GL_crosswalk_df.index.isin([4800002,4800004])]
+    if usgs_files:
+        usgs_GL_df = (
+            nhd_io.get_GL_obs_from_timeslices(
+                usgs_GL_crosswalk_df,
+                usgs_files,
+                cpu_pool=run_parameters.get("cpu_pool", 1),
+            )
+        )
+        usgs_GL_df = pd.melt(usgs_GL_df, 
+                             var_name='Datetime',
+                             value_name='Discharge',
+                             ignore_index=False).dropna().reset_index()
+        
+    else:
+        usgs_GL_df = pd.DataFrame()
+
+    # Canadian gages:
+    canadian_timeslices_folder = data_assimilation_parameters.get("canada_timeslices_folder", None)
+    canadian_files = [canadian_timeslices_folder.joinpath(f) for f in 
+                da_run['canada_timeslice_files']]
+
+    canadian_GL_crosswalk_df = GL_crosswalk_df.loc[[4800006]]
+
+    if canadian_files:
+        canadian_GL_df = (
+            nhd_io.get_GL_obs_from_timeslices(
+                canadian_GL_crosswalk_df,
+                canadian_files,
+                cpu_pool=run_parameters.get("cpu_pool", 1),
+            )
+        )
+        canadian_GL_df = pd.melt(canadian_GL_df, 
+                                 var_name='Datetime',
+                                 value_name='Discharge',
+                                 ignore_index=False).dropna().reset_index()
+        
+    else:
+        canadian_GL_df = pd.DataFrame()
+    
+    # Lake Ontario data:
+    if 'LakeOntario_outflow' in da_run:
+        lake_ontario_df = _create_LakeOntario_df(run_parameters, t0, da_run)
+    else:
+        lake_ontario_df = pd.DataFrame()
+
+    great_lakes_df = pd.concat(
+        [usgs_GL_df, canadian_GL_df, lake_ontario_df]
+        ).rename(columns={'link': 'lake_id'}).sort_values(by=['lake_id','Datetime'])
+    
+    great_lakes_df['time'] = pd.to_datetime(great_lakes_df['Datetime'], format='%Y-%m-%d_%H:%M:%S') - t0
+    great_lakes_df['time'] = great_lakes_df.time.dt.total_seconds().astype(int)
+    great_lakes_df.drop('Datetime', axis=1, inplace=True)
+    
+    great_lakes_param_df = pd.DataFrame(great_lakes_df.lake_id.unique(), columns=['lake_id']).sort_values('lake_id')
+    great_lakes_param_df['previous_assimilated_outflows'] = np.nan
+    great_lakes_param_df['previous_assimilated_time'] = 0
+    great_lakes_param_df['update_time'] = 0
+    
+    return great_lakes_df, great_lakes_param_df
\ No newline at end of file
diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
index f5eb91bc7..85644f3d1 100644
--- a/src/troute-network/troute/HYFeaturesNetwork.py
+++ b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -216,7 +216,7 @@ class HYFeaturesNetwork(AbstractNetwork):
     """
     
     """
-    __slots__ = ["_upstream_terminal", "_nexus_latlon", "_duplicate_ids_df"]
+    __slots__ = ["_upstream_terminal", "_nexus_latlon", "_duplicate_ids_df",]
 
     def __init__(self, 
                  supernetwork_parameters, 
@@ -340,6 +340,7 @@ def gages(self):
     def waterbody_null(self):
         return np.nan #pd.NA
     
+    
     def preprocess_network(self, flowpaths, nexus):
         self._dataframe = flowpaths
         
@@ -541,9 +542,13 @@ def preprocess_waterbodies(self, lakes, nexus):
                         self.waterbody_dataframe,
                         gl_wbody_df
                     ]
-                )
+                ).sort_index()
+                
+                self._gl_climatology_df = get_great_lakes_climatology()
+                
             else:
                 gl_dict = {}
+                self._gl_climatology_df = pd.DataFrame()
             
             self._waterbody_types_df = pd.DataFrame(
                 data = 1, 
@@ -569,6 +574,7 @@ def preprocess_waterbodies(self, lakes, nexus):
             self._waterbody_type_specified = False
             self._link_lake_crosswalk = None
             self._duplicate_ids_df = pd.DataFrame()
+            self._gl_climatology_df = pd.DataFrame()
 
         self._dataframe = self.dataframe.drop('waterbody', axis=1).drop_duplicates()
 
@@ -605,6 +611,10 @@ def preprocess_data_assimilation(self, network):
                 .rename_axis(None, axis=0).to_dict()
             )
             
+            #FIXME: temporary solution, add canadian gage crosswalk dataframe. This should come from
+            # the hydrofabric.
+            self._canadian_gage_link_df = pd.DataFrame(columns=['gages','link']).set_index('link')
+            
             # Find furthest downstream gage and create our lake_gage_df to make crosswalk dataframes.
             lake_gage_hydroseq_df = gages_df[~gages_df['lake_id'].isnull()][['lake_id', 'value', 'hydroseq']].rename(columns={'value': 'gages'})
             lake_gage_hydroseq_df['lake_id'] = lake_gage_hydroseq_df['lake_id'].astype(int)
diff --git a/src/troute-network/troute/NHDNetwork.py b/src/troute-network/troute/NHDNetwork.py
index ee4afcf52..189393200 100644
--- a/src/troute-network/troute/NHDNetwork.py
+++ b/src/troute-network/troute/NHDNetwork.py
@@ -64,6 +64,8 @@ def __init__(
             print("... in %s seconds." % (time.time() - start_time))
 
         self._flowpath_dict = {}
+        self._gl_climatology_df = pd.DataFrame()
+        self._canadian_gage_link_df = pd.DataFrame(columns=['gages','link']).set_index('link')
 
         super().__init__()
 
diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py
index de50b0847..539e661aa 100644
--- a/src/troute-network/troute/nhd_io.py
+++ b/src/troute-network/troute/nhd_io.py
@@ -1249,6 +1249,96 @@ def get_obs_from_timeslices(
     return observation_df_new
 
 
+def get_GL_obs_from_timeslices(
+    crosswalk_df,
+    timeslice_files,
+    crosswalk_gage_field='gages',
+    crosswalk_dest_field='link',
+    qc_threshold=1,
+    cpu_pool=1, 
+):
+    """
+    Read observations from TimeSlice files and organize into a Pandas DataFrame.
+    This is specific to observations needed for Great Lakes data assimilation, 
+    so no interpolation happens.
+    
+    Aguments
+    --------                                          
+    - crosswalk_gage_field         (str): fieldname of gage ID data in crosswalk dataframe
+    - crosswalk_dest_field         (str): fieldname of destination data in link_gage_df. 
+                                          For streamflow DA, this is the field
+                                          containing segment IDs. For reservoir DA, 
+                                          this is the field containing waterbody IDs.                                 
+    - timeslice_files (list of PosixPath): Full paths to existing TimeSlice files
+    - qc_threshold                  (int): Numerical observation quality 
+                                           threshold. Observations with quality
+                                           flags less than this value will be 
+                                           removed and replaced witn nan.                                    
+    - cpu_pool                      (int): Number of CPUs used for parallel 
+                                           TimeSlice reading and interolation
+    
+    Returns
+    -------
+    - observation_df_new (Pandas DataFrame): 
+    
+    Notes
+    -----
+    """
+    # open TimeSlce files, organize data into dataframes
+    with Parallel(n_jobs=cpu_pool) as parallel:
+        jobs = []
+        for f in timeslice_files:
+            jobs.append(delayed(_read_timeslice_file)(f))
+        timeslice_dataframes = parallel(jobs)
+
+    all_empty = all(df.empty for tuple in timeslice_dataframes for df in tuple)
+    if all_empty:
+        LOG.debug(f'{crosswalk_gage_field} DataFrames is empty, check timeslice files.')
+        return pd.DataFrame()
+
+    # create lists of observations and obs quality dataframes returned 
+    # from _read_timeslice_file
+    timeslice_obs_frames = []
+    timeslice_qual_frames = []
+    for d in timeslice_dataframes:
+        timeslice_obs_frames.append(d[0])  # TimeSlice gage observation
+        timeslice_qual_frames.append(d[1]) # TimeSlice observation qual
+
+    # concatenate dataframes
+    timeslice_obs_df  = pd.concat(timeslice_obs_frames, axis = 1)
+    timeslice_qual_df = pd.concat(timeslice_qual_frames, axis = 1)   
+
+    # Link <> gage crosswalk data
+    df = crosswalk_df.reset_index()
+    df = df.set_index(crosswalk_gage_field)
+    df.index = df.index.str.strip()
+    # join crosswalk data with timeslice data, indexed on crosswalk destination field
+    observation_df = (df.join(timeslice_obs_df).
+               reset_index().
+               set_index(crosswalk_dest_field).
+               select_dtypes(include='number'))
+
+    observation_qual_df = (df.join(timeslice_qual_df).
+               reset_index().
+               set_index(crosswalk_dest_field).
+               select_dtypes(include='number'))
+
+    # ---- Laugh testing ------
+    # screen-out erroneous qc flags
+    observation_qual_df = (observation_qual_df.
+                           mask(observation_qual_df < 0, np.nan).
+                           mask(observation_qual_df > 1, np.nan)
+                          )
+
+    # screen-out poor quality flow observations
+    observation_df = (observation_df.
+                      mask(observation_qual_df < qc_threshold, np.nan).
+                      mask(observation_df <= 0, np.nan)
+                     )
+
+    return observation_df
+
+
 def get_param_str(target_file, param):
     # TODO: remove this duplicate function
     return get_attribute(target_file, param)
diff --git a/src/troute-network/troute/rfc_lake_gage_crosswalk.py b/src/troute-network/troute/rfc_lake_gage_crosswalk.py
index 082b03d3a..7d4b002ef 100644
--- a/src/troute-network/troute/rfc_lake_gage_crosswalk.py
+++ b/src/troute-network/troute/rfc_lake_gage_crosswalk.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import numpy as np
 
 def get_rfc_lake_gage_crosswalk():
     """
@@ -77,14 +78,14 @@ def get_rfc_lake_gage_crosswalk():
 
 def get_great_lakes_climatology():
     
-    df = pd.DataFrame(
-        {
-            'Month': ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],
-            'SMR': [1946, 1907, 1879, 1929, 2105, 2202, 2306, 2378, 2343, 2282, 2235, 2046],
-            'SCR': [4623, 4518, 4946, 5217, 5410, 5494, 5546, 5555, 5519, 5475, 5424, 5253],
-            'NIR': [5630, 5523, 5673, 5921, 6179, 6172, 6089, 5977, 5839, 5751, 5757, 5771],
-            'SLR': [6380, 6561, 6875, 7159, 7418, 7547, 7500, 7360, 7161, 6954, 6852, 6725]
-        }
+    outflow_data = np.array([[1946, 1907, 1879, 1929, 2105, 2202, 2306, 2378, 2343, 2282, 2235, 2046],
+                             [4623, 4518, 4946, 5217, 5410, 5494, 5546, 5555, 5519, 5475, 5424, 5253],
+                             [5630, 5523, 5673, 5921, 6179, 6172, 6089, 5977, 5839, 5751, 5757, 5771],
+                             [6380, 6561, 6875, 7159, 7418, 7547, 7500, 7360, 7161, 6954, 6852, 6725]])
+    df = (
+        pd.DataFrame(data=outflow_data, index=[4800002,4800004,4800006,4800007],
+                     columns=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"])
+        .astype('float32')
     )
     
     return df
\ No newline at end of file
diff --git a/src/troute-nwm/src/nwm_routing/__main__.py b/src/troute-nwm/src/nwm_routing/__main__.py
index 01dbd4bad..3cc6bba73 100644
--- a/src/troute-nwm/src/nwm_routing/__main__.py
+++ b/src/troute-nwm/src/nwm_routing/__main__.py
@@ -230,6 +230,9 @@ def main_v04(argv):
             data_assimilation.reservoir_usace_param_df,
             data_assimilation.reservoir_rfc_df,
             data_assimilation.reservoir_rfc_param_df,
+            data_assimilation.great_lakes_df,
+            data_assimilation.great_lakes_param_df,
+            network.great_lakes_climatology_df,
             data_assimilation.assimilation_parameters,
             assume_short_ts,
             return_courant,
@@ -1141,6 +1144,9 @@ def nwm_route(
     reservoir_usace_param_df,
     reservoir_rfc_df,
     reservoir_rfc_param_df,
+    great_lakes_df,
+    great_lakes_param_df,
+    great_lakes_climatology_df,
     da_parameter_dict,
     assume_short_ts,
     return_courant,
@@ -1231,6 +1237,9 @@ def nwm_route(
         reservoir_usace_param_df,
         reservoir_rfc_df,
         reservoir_rfc_param_df,
+        great_lakes_df,
+        great_lakes_param_df,
+        great_lakes_climatology_df,
         da_parameter_dict,
         assume_short_ts,
         return_courant,
@@ -1675,6 +1684,9 @@ def main_v03(argv):
             reservoir_usace_param_df,
             pd.DataFrame(), #empty dataframe for RFC data...not needed unless running via BMI
             pd.DataFrame(), #empty dataframe for RFC param data...not needed unless running via BMI
+            pd.DataFrame(), #empty dataframe for great lakes data...
+            pd.DataFrame(), #empty dataframe for great lakes param data...
+            pd.DataFrame(), #empty dataframe for great lakes climatology data...
             da_parameter_dict,
             assume_short_ts,
             return_courant,
diff --git a/src/troute-routing/troute/routing/compute.py b/src/troute-routing/troute/routing/compute.py
index e96aedf3b..55f529596 100644
--- a/src/troute-routing/troute/routing/compute.py
+++ b/src/troute-routing/troute/routing/compute.py
@@ -145,6 +145,9 @@ def _prep_reservoir_da_dataframes(reservoir_usgs_df,
                                   reservoir_usace_param_df,
                                   reservoir_rfc_df,
                                   reservoir_rfc_param_df,
+                                  great_lakes_df,
+                                  great_lakes_param_df,
+                                  great_lakes_climatology_df,
                                   waterbody_types_df_sub,
                                   t0, 
                                   from_files,
@@ -258,11 +261,36 @@ def _prep_reservoir_da_dataframes(reservoir_usgs_df,
         if not from_files:
             if not waterbody_types_df_sub.empty:
                 waterbody_types_df_sub.loc[waterbody_types_df_sub['reservoir_type'] == 4] = 1
+    
+    # Great Lakes
+    if not great_lakes_df.empty:
+        gl_wbodies_sub = waterbody_types_df_sub[
+            waterbody_types_df_sub['reservoir_type']==6
+            ].index
+        if exclude_segments:
+            gl_wbodies_sub = list(set(gl_wbodies_sub).difference(set(exclude_segments)))
+        gl_df_sub = great_lakes_df[great_lakes_df['lake_id'].isin(gl_wbodies_sub)]
+        gl_climatology_df_sub = great_lakes_climatology_df.loc[gl_wbodies_sub]
+        gl_param_df_sub = great_lakes_param_df[great_lakes_param_df['lake_id'].isin(gl_wbodies_sub)]
+        gl_parm_lake_id_sub = gl_param_df_sub.lake_id.to_numpy()
+        gl_param_flows_sub = gl_param_df_sub.previous_assimilated_outflows.to_numpy()
+        gl_param_time_sub = gl_param_df_sub.previous_assimilated_time.to_numpy()
+        gl_param_update_time_sub = gl_param_df_sub.update_time.to_numpy()
+    else:
+        gl_df_sub = pd.DataFrame(columns=['lake_id','time','Discharge'])
+        gl_climatology_df_sub = pd.DataFrame()
+        gl_parm_lake_id_sub = pd.DataFrame().to_numpy().reshape(0,)
+        gl_param_flows_sub = pd.DataFrame().to_numpy().reshape(0,)
+        gl_param_time_sub = pd.DataFrame().to_numpy().reshape(0,)
+        gl_param_update_time_sub = pd.DataFrame().to_numpy().reshape(0,)
+        if not waterbody_types_df_sub.empty:
+            waterbody_types_df_sub.loc[waterbody_types_df_sub['reservoir_type'] == 6] = 1
 
     return (
         reservoir_usgs_df_sub, reservoir_usgs_df_time, reservoir_usgs_update_time, reservoir_usgs_prev_persisted_flow, reservoir_usgs_persistence_update_time, reservoir_usgs_persistence_index,
         reservoir_usace_df_sub, reservoir_usace_df_time, reservoir_usace_update_time, reservoir_usace_prev_persisted_flow, reservoir_usace_persistence_update_time, reservoir_usace_persistence_index,
         reservoir_rfc_df_sub, reservoir_rfc_totalCounts, reservoir_rfc_file, reservoir_rfc_use_forecast, reservoir_rfc_timeseries_idx, reservoir_rfc_update_time, reservoir_rfc_da_timestep, reservoir_rfc_persist_days,
+        gl_df_sub, gl_parm_lake_id_sub, gl_param_flows_sub, gl_param_time_sub, gl_param_update_time_sub, gl_climatology_df_sub,
         waterbody_types_df_sub
         )
 
@@ -501,6 +529,9 @@ def compute_nhd_routing_v02(
     reservoir_usace_param_df,
     reservoir_rfc_df,
     reservoir_rfc_param_df,
+    great_lakes_df,
+    great_lakes_param_df,
+    great_lakes_climatology_df,
     da_parameter_dict,
     assume_short_ts,
     return_courant,
@@ -744,6 +775,12 @@ def compute_nhd_routing_v02(
                      reservoir_rfc_update_time, 
                      reservoir_rfc_da_timestep, 
                      reservoir_rfc_persist_days,
+                     gl_df_sub, 
+                     gl_parm_lake_id_sub, 
+                     gl_param_flows_sub, 
+                     gl_param_time_sub, 
+                     gl_param_update_time_sub,
+                     gl_climatology_df_sub,
                      waterbody_types_df_sub,
                      ) = _prep_reservoir_da_dataframes(
                         reservoir_usgs_df,
@@ -752,6 +789,9 @@ def compute_nhd_routing_v02(
                         reservoir_usace_param_df,
                         reservoir_rfc_df,
                         reservoir_rfc_param_df,
+                        great_lakes_df,
+                        great_lakes_param_df,
+                        great_lakes_climatology_df,
                         waterbody_types_df_sub, 
                         t0,
                         from_files,
@@ -818,6 +858,15 @@ def compute_nhd_routing_v02(
                             reservoir_rfc_update_time.astype("float32"),
                             reservoir_rfc_da_timestep.astype("int32"),
                             reservoir_rfc_persist_days.astype("int32"),
+                            # Great Lakes DA data
+                            gl_df_sub.lake_id.values.astype("int32"),
+                            gl_df_sub.time.values.astype("int32"),
+                            gl_df_sub.Discharge.values.astype("float32"),
+                            gl_parm_lake_id_sub.astype("int32"),
+                            gl_param_flows_sub.astype("float32"),
+                            gl_param_time_sub.astype("int32"),
+                            gl_param_update_time_sub.astype("int32"),
+                            gl_climatology_df_sub.values.astype("float32"),
                             {
                                 us: fvd
                                 for us, fvd in flowveldepth_interorder.items()
@@ -1032,6 +1081,12 @@ def compute_nhd_routing_v02(
                      reservoir_rfc_update_time, 
                      reservoir_rfc_da_timestep, 
                      reservoir_rfc_persist_days,
+                     gl_df_sub, 
+                     gl_parm_lake_id_sub, 
+                     gl_param_flows_sub, 
+                     gl_param_time_sub, 
+                     gl_param_update_time_sub,
+                     gl_climatology_df_sub,
                      waterbody_types_df_sub,
                      ) = _prep_reservoir_da_dataframes(
                         reservoir_usgs_df,
@@ -1040,6 +1095,9 @@ def compute_nhd_routing_v02(
                         reservoir_usace_param_df,
                         reservoir_rfc_df,
                         reservoir_rfc_param_df,
+                        great_lakes_df,
+                        great_lakes_param_df,
+                        great_lakes_climatology_df,
                         waterbody_types_df_sub, 
                         t0,
                         from_files,
@@ -1104,6 +1162,15 @@ def compute_nhd_routing_v02(
                             reservoir_rfc_update_time.astype("float32"),
                             reservoir_rfc_da_timestep.astype("int32"),
                             reservoir_rfc_persist_days.astype("int32"),
+                            # Great Lakes DA data
+                            gl_df_sub.lake_id.values.astype("int32"),
+                            gl_df_sub.time.values.astype("int32"),
+                            gl_df_sub.Discharge.values.astype("float32"),
+                            gl_parm_lake_id_sub.astype("int32"),
+                            gl_param_flows_sub.astype("float32"),
+                            gl_param_time_sub.astype("int32"),
+                            gl_param_update_time_sub.astype("int32"),
+                            gl_climatology_df_sub.values.astype("float32"),
                             {
                                 us: fvd
                                 for us, fvd in flowveldepth_interorder.items()
@@ -1235,6 +1302,12 @@ def compute_nhd_routing_v02(
                  reservoir_rfc_update_time, 
                  reservoir_rfc_da_timestep, 
                  reservoir_rfc_persist_days,
+                 gl_df_sub, 
+                 gl_parm_lake_id_sub, 
+                 gl_param_flows_sub,
+                 gl_param_time_sub,
+                 gl_param_update_time_sub,
+                 gl_climatology_df_sub,
                  waterbody_types_df_sub,
                 ) = _prep_reservoir_da_dataframes(
                     reservoir_usgs_df,
@@ -1243,6 +1316,9 @@ def compute_nhd_routing_v02(
                     reservoir_usace_param_df,
                     reservoir_rfc_df,
                     reservoir_rfc_param_df,
+                    great_lakes_df,
+                    great_lakes_param_df,
+                    great_lakes_climatology_df,
                     waterbody_types_df_sub, 
                     t0,
                     from_files,
@@ -1300,6 +1376,15 @@ def compute_nhd_routing_v02(
                         reservoir_rfc_update_time.astype("float32"),
                         reservoir_rfc_da_timestep.astype("int32"),
                         reservoir_rfc_persist_days.astype("int32"),
+                        # Great Lakes DA data
+                        gl_df_sub.lake_id.values.astype("int32"),
+                        gl_df_sub.time.values.astype("int32"),
+                        gl_df_sub.Discharge.values.astype("float32"),
+                        gl_parm_lake_id_sub.astype("int32"),
+                        gl_param_flows_sub.astype("float32"),
+                        gl_param_time_sub.astype("int32"),
+                        gl_param_update_time_sub.astype("int32"),
+                        gl_climatology_df_sub.values.astype("float32"),
                         {},
                         assume_short_ts,
                         return_courant,
@@ -1402,6 +1487,12 @@ def compute_nhd_routing_v02(
              reservoir_rfc_update_time, 
              reservoir_rfc_da_timestep, 
              reservoir_rfc_persist_days,
+             gl_df_sub,
+             gl_parm_lake_id_sub, 
+             gl_param_flows_sub,
+             gl_param_time_sub,
+             gl_param_update_time_sub,
+             gl_climatology_df_sub,
              waterbody_types_df_sub,
              ) = _prep_reservoir_da_dataframes(
                 reservoir_usgs_df,
@@ -1410,6 +1501,9 @@ def compute_nhd_routing_v02(
                 reservoir_usace_param_df,
                 reservoir_rfc_df,
                 reservoir_rfc_param_df,
+                great_lakes_df,
+                great_lakes_param_df,
+                great_lakes_climatology_df,
                 waterbody_types_df_sub, 
                 t0,
                 from_files,
@@ -1466,6 +1560,15 @@ def compute_nhd_routing_v02(
                     reservoir_rfc_update_time.astype("float32"),
                     reservoir_rfc_da_timestep.astype("int32"),
                     reservoir_rfc_persist_days.astype("int32"),
+                    # Great Lakes DA data
+                    gl_df_sub.lake_id.values.astype("int32"),
+                    gl_df_sub.time.values.astype("int32"),
+                    gl_df_sub.Discharge.values.astype("float32"),
+                    gl_parm_lake_id_sub.astype("int32"),
+                    gl_param_flows_sub.astype("float32"),
+                    gl_param_time_sub.astype("int32"),
+                    gl_param_update_time_sub.astype("int32"),
+                    gl_climatology_df_sub.values.astype("float32"),
                     {},
                     assume_short_ts,
                     return_courant,
@@ -1764,7 +1867,7 @@ def compute_diffusive_routing(
                 rch_list[~x], dat_all[~x,3:], 0,
                 # place-holder for streamflow DA parameters
                 (np.asarray([]), np.asarray([]), np.asarray([])),
-                # place-holder for reservoir DA paramters
+                # place-holder for reservoir DA parameters
                 (np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])),
                 (np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])),
                 # place holder for reservoir inflows
@@ -1773,6 +1876,8 @@ def compute_diffusive_routing(
                 (np.asarray([]), np.asarray([]), np.asarray([])),
                 # place-holder for nudge values
                 (np.empty(shape=(0, nts + 1), dtype='float32')),
+                # place-holder for great lakes DA values/parameters
+                (np.asarray([]), np.asarray([]), np.asarray([]), np.asarray([])),
             )
         )
 
diff --git a/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx b/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx
index 5d37e3f70..0d205d98e 100644
--- a/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx
+++ b/src/troute-routing/troute/routing/fast_reach/mc_reach.pyx
@@ -19,6 +19,7 @@ from troute.network.reservoirs.levelpool.levelpool cimport MC_Levelpool, run_lp_
 from troute.network.reservoirs.rfc.rfc cimport MC_RFC, run_rfc_c
 from troute.routing.fast_reach.reservoir_hybrid_da import reservoir_hybrid_da
 from troute.routing.fast_reach.reservoir_RFC_da import reservoir_RFC_da
+from troute.routing.fast_reach.reservoir_GL_da import great_lakes_da
 from cython.parallel import prange
 
 #import cProfile
@@ -207,6 +208,14 @@ cpdef object compute_network_structured(
     const float[:] reservoir_rfc_update_time,
     const int[:] reservoir_rfc_da_timestep,
     const int[:] reservoir_rfc_persist_days,
+    const int[:] great_lakes_idx,
+    const int[:] great_lakes_times,
+    const float[:] great_lakes_discharge,
+    const int[:] great_lakes_param_idx,
+    const float[:] great_lakes_param_prev_assim_flow,
+    const int[:] great_lakes_param_prev_assim_times,
+    const int[:] great_lakes_param_update_times,
+    const float[:,:] great_lakes_climatology,
     dict upstream_results={},
     bint assume_short_ts=False,
     bint return_courant=False,
@@ -425,6 +434,17 @@ cpdef object compute_network_structured(
     cdef np.ndarray[float, ndim=1] usace_prev_persistence_index = np.asarray(reservoir_usace_persistence_index)
     cdef np.ndarray[int, ndim=1] rfc_timeseries_idx             = np.asarray(reservoir_rfc_timeseries_idx)
 
+    # great lakes arrays
+    cdef np.ndarray[int, ndim=1] gl_idx = np.asarray(great_lakes_idx)
+    cdef np.ndarray[float, ndim=1] gl_obs = np.asarray(great_lakes_discharge)
+    cdef np.ndarray[int, ndim=1] gl_times = np.asarray(great_lakes_times)
+    cdef np.ndarray[int, ndim=1] gl_param_idx = np.asarray(great_lakes_param_idx)
+    cdef np.ndarray[int, ndim=1] gl_update_time = np.asarray(great_lakes_param_update_times)
+    cdef np.ndarray[float, ndim=1] gl_prev_assim_ouflow = np.asarray(great_lakes_param_prev_assim_flow)
+    cdef np.ndarray[int, ndim=1] gl_prev_assim_timestamp = np.asarray(great_lakes_param_prev_assim_times)
+    cdef np.ndarray[float, ndim=2] gl_climatology = np.asarray(great_lakes_climatology) 
+
+
     #---------------------------------------------------------------------------------------------
     #---------------------------------------------------------------------------------------------
     
@@ -486,168 +506,208 @@ cpdef object compute_network_structured(
 
             if r.type == compute_type.RESERVOIR_LP: 
                 
-                # water elevation before levelpool calculation
-                initial_water_elevation = r.reach.lp.water_elevation
-                
-                # levelpool reservoir storage/outflow calculation
-                run_lp_c(r, upstream_flows, 0.0, routing_period, &reservoir_outflow, &reservoir_water_elevation)
-                
-                # USGS reservoir hybrid DA inputs
-                if r.reach.lp.wbody_type_code == 2:
-                    # find index location of waterbody in reservoir_usgs_obs 
-                    # and reservoir_usgs_time
-                    res_idx = np.where(usgs_idx == r.reach.lp.lake_number)
-                    wbody_gage_obs          = reservoir_usgs_obs[res_idx[0][0],:]
-                    wbody_gage_time         = reservoir_usgs_time
-                    prev_persisted_outflow  = usgs_prev_persisted_ouflow[res_idx[0][0]]
-                    persistence_update_time = usgs_persistence_update_time[res_idx[0][0]] 
-                    persistence_index       = usgs_prev_persistence_index[res_idx[0][0]]
-                    update_time             = usgs_update_time[res_idx[0][0]] 
-                
-                # USACE reservoir hybrid DA inputs
-                if r.reach.lp.wbody_type_code == 3:
-                    # find index location of waterbody in reservoir_usgs_obs 
-                    # and reservoir_usgs_time
-                    res_idx = np.where(usace_idx == r.reach.lp.lake_number)
-                    wbody_gage_obs          = reservoir_usace_obs[res_idx[0][0],:]
-                    wbody_gage_time         = reservoir_usace_time
-                    prev_persisted_outflow  = usace_prev_persisted_ouflow[res_idx[0][0]]
-                    persistence_update_time = usace_persistence_update_time[res_idx[0][0]] 
-                    persistence_index       = usace_prev_persistence_index[res_idx[0][0]]
-                    update_time             = usace_update_time[res_idx[0][0]] 
-                    
-                # Execute reservoir DA - both USGS(2) and USACE(3) types
-                if r.reach.lp.wbody_type_code == 2 or r.reach.lp.wbody_type_code == 3:
-                    
-                    #print('***********************************************************')
-                    #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) 
-                    #print('before DA, simulated outflow = ', reservoir_outflow)
-                    #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation)
-                    
+                # Great Lake waterbody: doesn't actually route anything, default outflows
+                # are from climatology.
+                if r.reach.lp.wbody_type_code == 6:
+                    # find index location of waterbody in great_lakes_df 
+                    # and great_lakes_param_df
+                    res_idx                    = np.where(gl_idx == r.reach.lp.lake_number)
+                    wbody_gage_obs             = gl_obs[res_idx[0]]
+                    wbody_gage_time            = gl_times[res_idx[0]]
+                    res_param_idx              = np.where(gl_param_idx == r.reach.lp.lake_number)
+                    param_prev_assim_flow      = gl_prev_assim_ouflow[res_param_idx[0][0]]
+                    param_prev_assim_timestamp = gl_prev_assim_timestamp[res_param_idx[0][0]]
+                    param_update_time          = gl_update_time[res_param_idx[0][0]]
+                    climatology                = gl_climatology[res_param_idx[0][0],:]
+
                     (new_outflow,
-                     new_persisted_outflow,
-                     new_water_elevation, 
-                     new_update_time, 
-                     new_persistence_index, 
-                     new_persistence_update_time
-                    ) = reservoir_hybrid_da(
-                        r.reach.lp.lake_number,       # lake identification number
-                        wbody_gage_obs,               # gage observation values (cms)
-                        wbody_gage_time,              # gage observation times (sec)
-                        dt * timestep,                # model time (sec)
-                        prev_persisted_outflow,       # previously persisted outflow (cms)
-                        persistence_update_time,
-                        persistence_index,            # number of sequentially persisted update cycles
-                        reservoir_outflow,            # levelpool simulated outflow (cms)
-                        upstream_flows,               # waterbody inflow (cms)
-                        dt,                           # model timestep (sec)
-                        r.reach.lp.area,              # waterbody surface area (km2)
-                        r.reach.lp.max_depth,         # max waterbody depth (m)
-                        r.reach.lp.orifice_elevation, # orifice elevation (m)
-                        initial_water_elevation,      # water surface el., previous timestep (m)
-                        48.0,                         # gage lookback hours (hrs)
-                        update_time                   # waterbody update time (sec)
+                    new_assimilated_outflow, 
+                    new_assimilated_timestamp, 
+                    new_update_time
+                    ) = great_lakes_da(
+                        wbody_gage_obs,             # gage observations (cms)
+                        wbody_gage_time,            # timestamps of gage observations (datetime str)
+                        param_prev_assim_flow,      # last used observation (cms)
+                        param_prev_assim_timestamp, # timestamp of last used observation (datetime str)
+                        param_update_time,          # timestamp to determine whether to look for new observation (datetime str)
+                        model_start_time,           # model initilaization time (datetime str)
+                        dt * timestep,              # model time (sec)
+                        climatology,                # climatology outflows (cms)
                     )
+
+                    gl_update_time[res_param_idx[0][0]] = new_update_time
+                    gl_prev_assim_ouflow[res_param_idx[0][0]] = new_assimilated_outflow
+                    gl_prev_assim_timestamp[res_param_idx[0][0]] = new_assimilated_timestamp
+
+                    # populate flowveldepth array with levelpool or hybrid DA results 
+                    flowveldepth[r.id, timestep, 0] = new_outflow
+                    flowveldepth[r.id, timestep, 1] = 0.0
+                    flowveldepth[r.id, timestep, 2] = 0.0
+                    upstream_array[r.id, timestep, 0] = upstream_flows
+
+                else:
+                    # water elevation before levelpool calculation
+                    initial_water_elevation = r.reach.lp.water_elevation
                     
-                    #print('After DA, outflow = ', new_outflow)
-                    #print('After DA, water elevation =', new_water_elevation)
-                    
-                    # update levelpool water elevation state
-                    update_lp_c(r, new_water_elevation, &reservoir_water_elevation)
-                    
-                    # change reservoir_outflow
-                    reservoir_outflow = new_outflow
-                    
-                    #print('confirming DA elevation replacement:', reservoir_water_elevation)
-                    #print('===========================================================')
-                    
-                # update USGS DA reservoir state arrays
-                if r.reach.lp.wbody_type_code == 2:
-                    usgs_update_time[res_idx[0][0]]              = new_update_time
-                    usgs_prev_persisted_ouflow[res_idx[0][0]]    = new_persisted_outflow
-                    usgs_prev_persistence_index[res_idx[0][0]]   = new_persistence_index
-                    usgs_persistence_update_time[res_idx[0][0]]  = new_persistence_update_time
-                    
-                # update USACE DA reservoir state arrays
-                if r.reach.lp.wbody_type_code == 3:
-                    usace_update_time[res_idx[0][0]]             = new_update_time
-                    usace_prev_persisted_ouflow[res_idx[0][0]]   = new_persisted_outflow
-                    usace_prev_persistence_index[res_idx[0][0]]  = new_persistence_index
-                    usace_persistence_update_time[res_idx[0][0]] = new_persistence_update_time
-
-
-                # RFC reservoir hybrid DA inputs
-                if r.reach.lp.wbody_type_code == 4:
-                    # find index location of waterbody in reservoir_rfc_obs 
-                    # and reservoir_rfc_time
-                    res_idx            = np.where(rfc_idx == r.reach.lp.lake_number)
-                    wbody_gage_obs     = reservoir_rfc_obs[res_idx[0][0],:]
-                    totalCounts        = reservoir_rfc_totalCounts[res_idx[0][0]]
-                    rfc_file           = reservoir_rfc_file[res_idx[0][0]]
-                    use_RFC            = reservoir_rfc_use_forecast[res_idx[0][0]]
-                    current_timeseries_idx = rfc_timeseries_idx[res_idx[0][0]]
-                    update_time        = rfc_update_time[res_idx[0][0]]
-                    rfc_timestep       = reservoir_rfc_da_timestep[res_idx[0][0]]
-                    rfc_persist_days   = reservoir_rfc_persist_days[res_idx[0][0]]
-
-                # Execute RFC reservoir DA - both RFC(4) and Glacially Dammed Lake(5) types
-                if r.reach.lp.wbody_type_code == 4 or r.reach.lp.wbody_type_code == 5:
+                    # levelpool reservoir storage/outflow calculation
+                    run_lp_c(r, upstream_flows, 0.0, routing_period, &reservoir_outflow, &reservoir_water_elevation)
                     
-                    #print('***********************************************************')
-                    #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) 
-                    #print('before DA, simulated outflow = ', reservoir_outflow)
-                    #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation)
+                    # USGS reservoir hybrid DA inputs
+                    if r.reach.lp.wbody_type_code == 2:
+                        # find index location of waterbody in reservoir_usgs_obs 
+                        # and reservoir_usgs_time
+                        res_idx = np.where(usgs_idx == r.reach.lp.lake_number)
+                        wbody_gage_obs          = reservoir_usgs_obs[res_idx[0][0],:]
+                        wbody_gage_time         = reservoir_usgs_time
+                        prev_persisted_outflow  = usgs_prev_persisted_ouflow[res_idx[0][0]]
+                        persistence_update_time = usgs_persistence_update_time[res_idx[0][0]] 
+                        persistence_index       = usgs_prev_persistence_index[res_idx[0][0]]
+                        update_time             = usgs_update_time[res_idx[0][0]] 
                     
-                    (
-                        new_outflow, 
+                    # USACE reservoir hybrid DA inputs
+                    if r.reach.lp.wbody_type_code == 3:
+                        # find index location of waterbody in reservoir_usgs_obs 
+                        # and reservoir_usgs_time
+                        res_idx = np.where(usace_idx == r.reach.lp.lake_number)
+                        wbody_gage_obs          = reservoir_usace_obs[res_idx[0][0],:]
+                        wbody_gage_time         = reservoir_usace_time
+                        prev_persisted_outflow  = usace_prev_persisted_ouflow[res_idx[0][0]]
+                        persistence_update_time = usace_persistence_update_time[res_idx[0][0]] 
+                        persistence_index       = usace_prev_persistence_index[res_idx[0][0]]
+                        update_time             = usace_update_time[res_idx[0][0]] 
+                        
+                    # Execute reservoir DA - both USGS(2) and USACE(3) types
+                    if r.reach.lp.wbody_type_code == 2 or r.reach.lp.wbody_type_code == 3:
+                        
+                        #print('***********************************************************')
+                        #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) 
+                        #print('before DA, simulated outflow = ', reservoir_outflow)
+                        #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation)
+                        
+                        (new_outflow,
+                        new_persisted_outflow,
                         new_water_elevation, 
-                        new_update_time,
-                        new_timeseries_idx,
-                        dynamic_reservoir_type, 
-                        assimilated_value, 
-                        assimilated_source_file,
-                    ) = reservoir_RFC_da(
-                        use_RFC,                            # boolean whether to use RFC values or not
-                        wbody_gage_obs,              # gage observation values (cms)
-                        current_timeseries_idx,                     # index of for current time series observation
-                        totalCounts,                       # total number of observations in RFC timeseries
-                        routing_period,                          # routing period (sec)
-                        dt * timestep,                               # model time (sec)
-                        update_time,                        # time to advance to next time series index
-                        rfc_timestep,                       # frequency of DA observations (sec)
-                        rfc_persist_days*24*60*60, # max seconds RFC forecasts will be used/persisted (days -> seconds)
-                        r.reach.lp.wbody_type_code,                           # reservoir type
-                        upstream_flows,                                   # waterbody inflow (cms)
-                        initial_water_elevation,                  # water surface el., previous timestep (m)
-                        reservoir_outflow,                  # levelpool simulated outflow (cms)
-                        reservoir_water_elevation,                # levelpool simulated water elevation (m)
-                        r.reach.lp.area*1.0e6,          # waterbody surface area (km2 -> m2)
-                        r.reach.lp.max_depth,                # max waterbody depth (m)
-                        rfc_file,                # RFC file name
-                    )
+                        new_update_time, 
+                        new_persistence_index, 
+                        new_persistence_update_time
+                        ) = reservoir_hybrid_da(
+                            r.reach.lp.lake_number,       # lake identification number
+                            wbody_gage_obs,               # gage observation values (cms)
+                            wbody_gage_time,              # gage observation times (sec)
+                            dt * timestep,                # model time (sec)
+                            prev_persisted_outflow,       # previously persisted outflow (cms)
+                            persistence_update_time,
+                            persistence_index,            # number of sequentially persisted update cycles
+                            reservoir_outflow,            # levelpool simulated outflow (cms)
+                            upstream_flows,               # waterbody inflow (cms)
+                            dt,                           # model timestep (sec)
+                            r.reach.lp.area,              # waterbody surface area (km2)
+                            r.reach.lp.max_depth,         # max waterbody depth (m)
+                            r.reach.lp.orifice_elevation, # orifice elevation (m)
+                            initial_water_elevation,      # water surface el., previous timestep (m)
+                            48.0,                         # gage lookback hours (hrs)
+                            update_time                   # waterbody update time (sec)
+                        )
+                        
+                        #print('After DA, outflow = ', new_outflow)
+                        #print('After DA, water elevation =', new_water_elevation)
+                        
+                        # update levelpool water elevation state
+                        update_lp_c(r, new_water_elevation, &reservoir_water_elevation)
+                        
+                        # change reservoir_outflow
+                        reservoir_outflow = new_outflow
+                        
+                        #print('confirming DA elevation replacement:', reservoir_water_elevation)
+                        #print('===========================================================')
+                        
+                    # update USGS DA reservoir state arrays
+                    if r.reach.lp.wbody_type_code == 2:
+                        usgs_update_time[res_idx[0][0]]              = new_update_time
+                        usgs_prev_persisted_ouflow[res_idx[0][0]]    = new_persisted_outflow
+                        usgs_prev_persistence_index[res_idx[0][0]]   = new_persistence_index
+                        usgs_persistence_update_time[res_idx[0][0]]  = new_persistence_update_time
+                        
+                    # update USACE DA reservoir state arrays
+                    if r.reach.lp.wbody_type_code == 3:
+                        usace_update_time[res_idx[0][0]]             = new_update_time
+                        usace_prev_persisted_ouflow[res_idx[0][0]]   = new_persisted_outflow
+                        usace_prev_persistence_index[res_idx[0][0]]  = new_persistence_index
+                        usace_persistence_update_time[res_idx[0][0]] = new_persistence_update_time
+
+
+                    # RFC reservoir hybrid DA inputs
+                    if r.reach.lp.wbody_type_code == 4:
+                        # find index location of waterbody in reservoir_rfc_obs 
+                        # and reservoir_rfc_time
+                        res_idx            = np.where(rfc_idx == r.reach.lp.lake_number)
+                        wbody_gage_obs     = reservoir_rfc_obs[res_idx[0][0],:]
+                        totalCounts        = reservoir_rfc_totalCounts[res_idx[0][0]]
+                        rfc_file           = reservoir_rfc_file[res_idx[0][0]]
+                        use_RFC            = reservoir_rfc_use_forecast[res_idx[0][0]]
+                        current_timeseries_idx = rfc_timeseries_idx[res_idx[0][0]]
+                        update_time        = rfc_update_time[res_idx[0][0]]
+                        rfc_timestep       = reservoir_rfc_da_timestep[res_idx[0][0]]
+                        rfc_persist_days   = reservoir_rfc_persist_days[res_idx[0][0]]
+
+                    # Execute RFC reservoir DA - both RFC(4) and Glacially Dammed Lake(5) types
+                    if r.reach.lp.wbody_type_code == 4 or r.reach.lp.wbody_type_code == 5:
+                        
+                        #print('***********************************************************')
+                        #print('calling reservoir DA code for lake_id:', r.reach.lp.lake_number) 
+                        #print('before DA, simulated outflow = ', reservoir_outflow)
+                        #print('before DA, simulated water elevation = ', r.reach.lp.water_elevation)
+                        
+                        (
+                            new_outflow, 
+                            new_water_elevation, 
+                            new_update_time,
+                            new_timeseries_idx,
+                            dynamic_reservoir_type, 
+                            assimilated_value, 
+                            assimilated_source_file,
+                        ) = reservoir_RFC_da(
+                            use_RFC,                            # boolean whether to use RFC values or not
+                            wbody_gage_obs,              # gage observation values (cms)
+                            current_timeseries_idx,                     # index of for current time series observation
+                            totalCounts,                       # total number of observations in RFC timeseries
+                            routing_period,                          # routing period (sec)
+                            dt * timestep,                               # model time (sec)
+                            update_time,                        # time to advance to next time series index
+                            rfc_timestep,                       # frequency of DA observations (sec)
+                            rfc_persist_days*24*60*60, # max seconds RFC forecasts will be used/persisted (days -> seconds)
+                            r.reach.lp.wbody_type_code,                           # reservoir type
+                            upstream_flows,                                   # waterbody inflow (cms)
+                            initial_water_elevation,                  # water surface el., previous timestep (m)
+                            reservoir_outflow,                  # levelpool simulated outflow (cms)
+                            reservoir_water_elevation,                # levelpool simulated water elevation (m)
+                            r.reach.lp.area*1.0e6,          # waterbody surface area (km2 -> m2)
+                            r.reach.lp.max_depth,                # max waterbody depth (m)
+                            rfc_file,                # RFC file name
+                        )
 
-                    #print('After DA, outflow = ', new_outflow)
-                    #print('After DA, water elevation =', new_water_elevation)
-                    
-                    # update levelpool water elevation state
-                    update_lp_c(r, new_water_elevation, &reservoir_water_elevation)
-                    
-                    # change reservoir_outflow
-                    reservoir_outflow = new_outflow
-                    
-                    #print('confirming DA elevation replacement:', reservoir_water_elevation)
-                    #print('===========================================================')
-                    
-                    # update RFC DA reservoir state arrays
-                    rfc_update_time[res_idx[0][0]]    = new_update_time
-                    rfc_timeseries_idx[res_idx[0][0]] = new_timeseries_idx
+                        #print('After DA, outflow = ', new_outflow)
+                        #print('After DA, water elevation =', new_water_elevation)
+                        
+                        # update levelpool water elevation state
+                        update_lp_c(r, new_water_elevation, &reservoir_water_elevation)
+                        
+                        # change reservoir_outflow
+                        reservoir_outflow = new_outflow
+                        
+                        #print('confirming DA elevation replacement:', reservoir_water_elevation)
+                        #print('===========================================================')
+                        
+                        # update RFC DA reservoir state arrays
+                        rfc_update_time[res_idx[0][0]]    = new_update_time
+                        rfc_timeseries_idx[res_idx[0][0]] = new_timeseries_idx
+                        
                     
-                
-                # populate flowveldepth array with levelpool or hybrid DA results 
-                flowveldepth[r.id, timestep, 0] = reservoir_outflow
-                flowveldepth[r.id, timestep, 1] = 0.0
-                flowveldepth[r.id, timestep, 2] = reservoir_water_elevation
-                upstream_array[r.id, timestep, 0] = upstream_flows
+                    # populate flowveldepth array with levelpool or hybrid DA results 
+                    flowveldepth[r.id, timestep, 0] = reservoir_outflow
+                    flowveldepth[r.id, timestep, 1] = 0.0
+                    flowveldepth[r.id, timestep, 2] = reservoir_water_elevation
+                    upstream_array[r.id, timestep, 0] = upstream_flows
 
             elif r.type == compute_type.RESERVOIR_RFC:
                 run_rfc_c(r, upstream_flows, 0.0, routing_period, &reservoir_outflow, &reservoir_water_elevation)
@@ -775,5 +835,11 @@ cpdef object compute_network_structured(
             rfc_idx, rfc_update_time-((timestep-1)*dt), 
             rfc_timeseries_idx
         ),
-        np.asarray(nudge)
+        np.asarray(nudge),
+        (
+            gl_param_idx,
+            gl_prev_assim_ouflow,
+            gl_prev_assim_timestamp,
+            gl_update_time
+        )
     )
\ No newline at end of file
diff --git a/src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py b/src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py
new file mode 100644
index 000000000..6683e9ee3
--- /dev/null
+++ b/src/troute-routing/troute/routing/fast_reach/reservoir_GL_da.py
@@ -0,0 +1,131 @@
+import numpy as np
+import logging
+from datetime import datetime, timedelta
+LOG = logging.getLogger('')
+
+def great_lakes_da(
+    gage_obs,
+    gage_time,
+    previous_assimilated_outflow,
+    previous_assimilated_time,
+    update_time,
+    t0,
+    now,
+    climatology_outflows,
+    update_time_interval = 3600,
+    persistence_limit = 11,
+):
+
+    """
+    Perform persistence reservoir data assimilation for the Great Lakes.
+    
+    Arguments
+    ---------
+    - lake_number                          (int): unique lake identification
+                                                  number
+    - gage_obs                (memoryview slice): array of gage observations for
+                                                  at the gage associated with this
+                                                  waterbody
+    - gage_time               (memoryview slice): array of observation times
+                                                  (secs) relative to the model
+                                                  initialization time (t0).
+    - t0                                   (str): Initialization time (t0).
+    - now                                (float): Current time, seconds since in-
+                                                  itialization time (t0).
+    - previous_persisted_outflow (numpy.float32): Persisted outflow value from 
+                                                  last timestep.                                
+    - obs_lookback_hours                   (int): Maximum allowable lookback time
+                                                  when searching for new outflow
+                                                  observations
+    - update_time                (numpy.float32): time to search for new observ-
+                                                  ations, seconds since model 
+                                                  initialization time (t0)
+    - update_time_interval               (float): Time interval from current to
+                                                  next update time (secs)
+    
+    Returns
+    -------
+    - outflow             (float): Persisted reservoir outflow rate (m3/sec)
+    """
+
+    # LOG.debug('Great Lakes data assimilation for lake_id: %s at time %s from run start' % (lake_number, now))
+
+    # set new values as old values to start:
+    new_assimilated_outflow = previous_assimilated_outflow
+    new_assimilated_time = previous_assimilated_time
+    new_update_time = update_time
+
+    # determine which climatology value to use based on model time
+    t0_datetime = datetime.strptime(t0, '%Y-%m-%d_%H:%M:%S')
+    now_datetime = t0_datetime + timedelta(seconds=now)
+    month_idx = now_datetime.month - 1 # subtract 1 for python indexing
+    climatology_outflow = climatology_outflows[month_idx]
+
+    if np.isnan(previous_assimilated_outflow):
+        previous_assimilated_outflow = climatology_outflow
+
+    if now >= update_time:
+        # LOG.debug(
+        #     'Looking for observation to assimilate...'
+        # )
+
+        # initialize variable to store assimilated observations. We initialize
+        # as np.nan, so that if no good quality observations are found, we can
+        # easily catch it.
+        obs = np.nan
+
+        # identify location of gage_time that is nearest to, but not greater 
+        # than the update time
+        t_idxs = np.nonzero(((now - gage_time) >= 0))[0]
+        if len(t_idxs)>0:
+            t_idx = t_idxs[-1]
+
+            # record good observation to obs
+            obs = gage_obs[t_idx]
+
+            # determine how many seconds prior to the update_time the 
+            # observation was taken
+            t_obs = gage_time[t_idx]
+            gage_lookback_seconds = now - t_obs
+            
+        if np.isnan(obs):
+            '''
+            - If no good observation is found, then we do not update the 
+                update time. Consequently we will continue to search for a good
+                observation at each model timestep, before updating the update
+                time. 
+            '''
+            # LOG.debug(
+            #     'No good observation found, persisting previously assimilated flow'
+            # )
+            outflow = previous_assimilated_outflow
+
+        elif gage_lookback_seconds > (persistence_limit*60*60*24):
+            '''
+            If a good observation was found, but the time difference between
+            the current model time and the observation timestamp is greater than
+            the persistence limit, default to climatology.
+            '''
+            outflow = climatology_outflow
+
+        else:
+            '''
+            A good observation is found and it is within the persistence limit.
+            ''' 
+            outflow = obs
+            new_assimilated_outflow = obs
+            new_assimilated_time = t_obs
+            new_update_time = update_time + update_time_interval
+
+    else:
+        outflow = previous_assimilated_outflow
+
+        if (now - previous_assimilated_time) > (persistence_limit*60*60*24):
+            '''
+            If the time difference between the current model time and the 
+            observation timestamp is greater than
+            the persistence limit, default to climatology.
+            '''
+            outflow = climatology_outflow
+    
+    return outflow, new_assimilated_outflow, new_assimilated_time, new_update_time
\ No newline at end of file
diff --git a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
index b8a906517..c53cd8412 100644
--- a/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
+++ b/test/LowerColorado_TX/test_AnA_V4_NHD.yaml
@@ -95,9 +95,9 @@ compute_parameters:
             reservoir_parameter_file                : domain/reservoir_index_AnA.nc
             reservoir_persistence_da:
                 #----------
-                reservoir_persistence_usgs  : False
-                reservoir_persistence_usace : False
-                reservoir_persistence_greatLake : True
+                reservoir_persistence_usgs  : True
+                reservoir_persistence_usace : True
+                reservoir_persistence_greatLake : False
             reservoir_rfc_da:
                 #----------
                 reservoir_rfc_forecasts                 : False
diff --git a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
index 0e00ef5cd..6105673af 100644
--- a/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
+++ b/test/LowerColorado_TX_v4/test_AnA_V4_HYFeature.yaml
@@ -113,7 +113,7 @@ compute_parameters:
                 #----------
                 reservoir_persistence_usgs  : True
                 reservoir_persistence_usace : True
-                reservoir_persistence_greatLake : True
+                reservoir_persistence_greatLake : False
             reservoir_rfc_da:
                 #----------
                 reservoir_rfc_forecasts                 : True

From c2cd1b4f61773942f80f04b860d38406345fe4fc Mon Sep 17 00:00:00 2001
From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com>
Date: Thu, 1 Aug 2024 18:19:11 -0400
Subject: [PATCH 17/29] speeding up Parquet_output function by vectorizing
 (#816)

* speeding up Parquet_output function by vectorizing

* updating the index

* delete pdb
---
 src/troute-nwm/src/nwm_routing/output.py | 63 +++++++++++++++++-------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 38a6e6547..893825015 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -64,28 +64,53 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration, pref
     --------
     - timeseries_df (DataFrame): Converted timeseries data frame
     '''
-
-    df.index.name = 'location_id'
-    df.reset_index(inplace=True)
-    timeseries_df = df.melt(id_vars=['location_id'], var_name='var')
-    timeseries_df['var'] = timeseries_df['var'].astype('string')
-    timeseries_df[['timestep', 'variable']] = timeseries_df['var'].str.strip("()").str.split(",", n=1, expand=True)
-    timeseries_df['variable'] = timeseries_df['variable'].str.strip().str.replace("'", "")
-    timeseries_df['timestep'] = timeseries_df['timestep'].astype('int')
-    timeseries_df['value_time'] = (start_datetime + pd.to_timedelta(timeseries_df['timestep'] * dt, unit='s'))
     variable_to_name_map = {"q": "streamflow", "d": "depth", "v": "velocity"}
-    timeseries_df["variable_name"] = timeseries_df["variable"].map(variable_to_name_map)
-    timeseries_df.drop(['var', 'timestep', 'variable'], axis=1, inplace=True)
-    timeseries_df['configuration'] = configuration
     variable_to_units_map = {"streamflow": "m3/s", "velocity": "m/s", "depth": "m"}
-    timeseries_df['units'] = timeseries_df['variable_name'].map(variable_to_units_map)
-    timeseries_df['reference_time'] = start_datetime.date()
-    timeseries_df['location_id'] = timeseries_df['location_id'].astype('string')
-    timeseries_df['location_id'] = prefix_ids + '-' + timeseries_df['location_id']
-    timeseries_df['value'] = timeseries_df['value'].astype('double')
-    timeseries_df['reference_time'] = timeseries_df['reference_time'].astype('datetime64[us]')
-    timeseries_df['value_time'] = timeseries_df['value_time'].astype('datetime64[us]')
 
+    # Prepare the location_id with prefix
+    df.index.name = 'location_id'
+    df.reset_index(inplace=True)
+    location_ids = prefix_ids + '-' + df['location_id'].astype(str)
+
+    # Flatten the dataframe using NumPy
+    num_locations = df.shape[0]
+    num_time_variables = df.shape[1] - 1
+    num_records = num_locations * num_time_variables
+
+    # Prepare timestep and variable arrays
+    times = df.columns[1:]
+    timesteps = np.array([t[0] for t in times], dtype=int)
+    variables = np.array([t[1] for t in times])
+
+    # Preallocate arrays
+    location_ids_repeated = np.tile(location_ids, num_time_variables)
+    value_time = np.empty(num_records, dtype='datetime64[us]')
+    variable_names = np.empty(num_records, dtype=object)
+    units = np.empty(num_records, dtype=object)
+    values = np.empty(num_records, dtype=float)
+
+    # Calculate value_time, variable_names, units, and values in a vectorized manner
+    for i in range(num_time_variables):
+        start_idx = i * num_locations
+        end_idx = start_idx + num_locations
+        value_time[start_idx:end_idx] = start_datetime + pd.to_timedelta(timesteps[i] * dt, unit='s')
+        variable_name = variable_to_name_map[variables[i]]
+        unit = variable_to_units_map[variable_name]
+        variable_names[start_idx:end_idx] = variable_name
+        units[start_idx:end_idx] = unit
+        values[start_idx:end_idx] = df.iloc[:, i + 1].values
+
+    # Create the resulting DataFrame
+    timeseries_df = pd.DataFrame({
+        'location_id': location_ids_repeated,
+        'value': values,
+        'value_time': value_time,
+        'variable_name': variable_names,
+        'units': units,
+        'reference_time': start_datetime.date(),
+        'configuration': configuration
+    })
+    
     return timeseries_df
 
 

From 5865910b73865bedf281cd79d7de4463675a2834 Mon Sep 17 00:00:00 2001
From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com>
Date: Tue, 6 Aug 2024 08:28:59 -0400
Subject: [PATCH 18/29] updating time on csv output (#818)

---
 src/troute-network/troute/nhd_io.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py
index 539e661aa..b948b379f 100644
--- a/src/troute-network/troute/nhd_io.py
+++ b/src/troute-network/troute/nhd_io.py
@@ -2076,6 +2076,9 @@ def write_flowveldepth_csv_pkl(stream_output_directory, file_name,
     # Concatenate all temporary DataFrames vertically
     df = pd.concat(df_list)
     df.index.name = 'feature_id'
+    
+    df['current_time'] = pd.to_datetime(df['t0']) + pd.to_timedelta(df['time'])
+    df = df[['current_time', 'flow', 'velocity', 'depth', 'nudge']]
 
     # File format handling
     file_format = file_name.split('.')[-1]

From f8e9f4521156456b44483d9216d37c488043da74 Mon Sep 17 00:00:00 2001
From: shorvath-noaa <103054653+shorvath-noaa@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:08:46 -0600
Subject: [PATCH 19/29] update default values for parquet output (#821)

* update default values for parquet output

* remove 'Optional' from parameters
---
 src/troute-config/troute/config/output_parameters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index 983c1d9fb..f9f35a600 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -53,8 +53,8 @@ class ParquetOutput(BaseModel):
     # NOTE: required if writing results to parquet
     parquet_output_folder: Optional[DirectoryPath] = None
     parquet_output_segments: Optional[List[str]] = None
-    configuration: Optional[str] = None
-    prefix_ids: Optional[str] = None
+    configuration: str = 'None'
+    prefix_ids: str = 'wb-'
 
 
 class ChrtoutOutput(BaseModel):

From 70a2d0c68f270cc22ad59d7635e7d4fa0cb21170 Mon Sep 17 00:00:00 2001
From: Austin Raney <austin.raney@noaa.gov>
Date: Mon, 12 Aug 2024 13:33:13 -0400
Subject: [PATCH 20/29] fix: parquet prefix_ids defaults to 'wb'

---
 src/troute-config/troute/config/output_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-config/troute/config/output_parameters.py b/src/troute-config/troute/config/output_parameters.py
index f9f35a600..f56c89f6a 100644
--- a/src/troute-config/troute/config/output_parameters.py
+++ b/src/troute-config/troute/config/output_parameters.py
@@ -54,7 +54,7 @@ class ParquetOutput(BaseModel):
     parquet_output_folder: Optional[DirectoryPath] = None
     parquet_output_segments: Optional[List[str]] = None
     configuration: str = 'None'
-    prefix_ids: str = 'wb-'
+    prefix_ids: str = 'wb'
 
 
 class ChrtoutOutput(BaseModel):

From 231e0f70b5289a1fe881c8ab260cc3e079165202 Mon Sep 17 00:00:00 2001
From: AminTorabi-NOAA <amin.torabi@noaa.gov>
Date: Wed, 7 Aug 2024 12:42:24 -0400
Subject: [PATCH 21/29] Optimizing read_geopkg function

---
 .../troute/HYFeaturesNetwork.py               | 50 +++++++------------
 1 file changed, 18 insertions(+), 32 deletions(-)

diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
index 85644f3d1..a4b0927ea 100644
--- a/src/troute-network/troute/HYFeaturesNetwork.py
+++ b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -27,8 +27,7 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
 
     # If waterbodies are being simulated, read lakes table
     if waterbody_parameters.get('break_network_at_waterbodies',False):
-        layers.append('lakes')
-        layers.append('nexus')
+        layers.extend(['lakes', 'nexus'])
 
     # If any DA is activated, read network table as well for gage information
     data_assimilation_parameters = compute_parameters.get('data_assimilation_parameters',{})
@@ -44,40 +43,27 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
     hybrid_routing = hybrid_parameters.get('run_hybrid_routing', False)
     if hybrid_routing & ('nexus' not in layers):
         layers.append('nexus')
-    
+
+    # Define a function to read a layer from the geopackage
+    def read_layer(layer):
+        try:
+            return gpd.read_file(file_path, layer=layer)
+        except Exception as e:
+            return pd.DataFrame()
+        
     # Retrieve geopackage information:
     if cpu_pool > 1:
-        with Parallel(n_jobs=len(layers)) as parallel:
-            jobs = []
-            for layer in layers:
-                jobs.append(
-                    delayed(gpd.read_file)
-                    #(f, qlat_file_value_col, gw_bucket_col, terrain_ro_col)
-                    #delayed(nhd_io.get_ql_from_csv)
-                    (filename=file_path, layer=layer)                    
-                )
-            gpkg_list = parallel(jobs)
+        with Parallel(n_jobs=min(cpu_pool, len(layers))) as parallel:
+            gpkg_list = parallel(delayed(read_layer)(layer) for layer in layers)
+
         table_dict = {layers[i]: gpkg_list[i] for i in range(len(layers))}
-        flowpaths = pd.merge(table_dict.get('flowpaths'), table_dict.get('flowpath_attributes'), on='id')
-        lakes = table_dict.get('lakes', pd.DataFrame())
-        network = table_dict.get('network', pd.DataFrame())
-        nexus = table_dict.get('nexus', pd.DataFrame())
     else:
-        flowpaths = gpd.read_file(file_path, layer='flowpaths')
-        flowpath_attributes = gpd.read_file(file_path, layer='flowpath_attributes')
-        flowpaths = pd.merge(flowpaths, flowpath_attributes, on='id')
-        # If waterbodies are being simulated, read lakes table
-        lakes = pd.DataFrame()
-        if 'lakes' in layers:
-            lakes = gpd.read_file(file_path, layer='lakes')
-        # If any DA is activated, read network table as well for gage information
-        network = pd.DataFrame()
-        if 'network' in layers:
-            network = gpd.read_file(file_path, layer='network')
-        # If diffusive is activated, read nexus table for lat/lon information
-        nexus = pd.DataFrame()
-        if 'nexus' in layers:
-            nexus = gpd.read_file(file_path, layer='nexus')
+        table_dict = {layer: read_layer(layer) for layer in layers}
+
+    flowpaths = pd.merge(table_dict.get('flowpaths', pd.DataFrame()), table_dict.get('flowpath_attributes', pd.DataFrame()), on='id', how='inner')
+    lakes = table_dict.get('lakes', pd.DataFrame())
+    network = table_dict.get('network', pd.DataFrame())
+    nexus = table_dict.get('nexus', pd.DataFrame())
 
     return flowpaths, lakes, network, nexus
 

From 999a456bb09514fca5e29fe99cb837d1288b272e Mon Sep 17 00:00:00 2001
From: AminTorabi-NOAA <amin.torabi@noaa.gov>
Date: Wed, 14 Aug 2024 12:48:43 -0400
Subject: [PATCH 22/29] reading new geopackage and renaming with regex

---
 .../troute/HYFeaturesNetwork.py               | 113 ++++++++++++------
 1 file changed, 76 insertions(+), 37 deletions(-)

diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
index a4b0927ea..4466afcd8 100644
--- a/src/troute-network/troute/HYFeaturesNetwork.py
+++ b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -13,54 +13,93 @@
 from datetime import datetime
 from pprint import pformat
 import os
-
+import fiona
 import troute.nhd_io as nhd_io #FIXME
 from troute.nhd_network import reverse_dict, extract_connections, reverse_network, reachable
 from .rfc_lake_gage_crosswalk import get_rfc_lake_gage_crosswalk, get_great_lakes_climatology
-
+import re
 __verbose__ = False
 __showtiming__ = False
 
+def find_layer_name(layers, pattern):
+    """
+    Find a layer name in the list of layers that matches the regex pattern.
+    """
+    for layer in layers:
+        if re.search(pattern, layer, re.IGNORECASE):
+            return layer
+    return None
+
 def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
-    # Establish which layers we will read. We'll always need the flowpath tables
-    layers = ['flowpaths','flowpath_attributes']
-
-    # If waterbodies are being simulated, read lakes table
-    if waterbody_parameters.get('break_network_at_waterbodies',False):
-        layers.extend(['lakes', 'nexus'])
-
-    # If any DA is activated, read network table as well for gage information
-    data_assimilation_parameters = compute_parameters.get('data_assimilation_parameters',{})
-    streamflow_nudging = data_assimilation_parameters.get('streamflow_da',{}).get('streamflow_nudging',False)
-    usgs_da = data_assimilation_parameters.get('reservoir_da',{}).get('reservoir_persistence_usgs',False)
-    usace_da = data_assimilation_parameters.get('reservoir_da',{}).get('reservoir_persistence_usace',False)
-    rfc_da = data_assimilation_parameters.get('reservoir_da',{}).get('reservoir_rfc_da',{}).get('reservoir_rfc_forecasts',False)
-    if any([streamflow_nudging, usgs_da, usace_da, rfc_da]):
-        layers.append('network')
+    # Retrieve available layers from the GeoPackage
+    available_layers = fiona.listlayers(file_path)
+
+    # patterns for the layers we want to find
+    layer_patterns = {
+        'flowpaths': r'flow[-_]?paths?',
+        'flowpath_attributes': r'flow[-_]?path[-_]?attributes?',
+        'lakes': r'lakes?',
+        'nexus': r'nexus?',
+        'network': r'network'
+    }
+
+    # Match available layers to the patterns
+    matched_layers = {key: find_layer_name(available_layers, pattern) 
+                      for key, pattern in layer_patterns.items()}
     
-    # If diffusive is activated, read nexus table for lat/lon information
-    hybrid_parameters = compute_parameters.get('hybrid_parameters',{})
-    hybrid_routing = hybrid_parameters.get('run_hybrid_routing', False)
-    if hybrid_routing & ('nexus' not in layers):
-        layers.append('nexus')
-
-    # Define a function to read a layer from the geopackage
-    def read_layer(layer):
-        try:
-            return gpd.read_file(file_path, layer=layer)
-        except Exception as e:
-            return pd.DataFrame()
-        
-    # Retrieve geopackage information:
+    layers_to_read = ['flowpaths', 'flowpath_attributes']
+    
+    if waterbody_parameters.get('break_network_at_waterbodies', False):
+        layers_to_read.extend(['lakes', 'nexus'])
+
+    data_assimilation_parameters = compute_parameters.get('data_assimilation_parameters', {})
+    if any([
+        data_assimilation_parameters.get('streamflow_da', {}).get('streamflow_nudging', False),
+        data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_usgs', False),
+        data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_persistence_usace', False),
+        data_assimilation_parameters.get('reservoir_da', {}).get('reservoir_rfc_da', {}).get('reservoir_rfc_forecasts', False)
+    ]):
+        layers_to_read.append('network')
+
+    hybrid_parameters = compute_parameters.get('hybrid_parameters', {})
+    if hybrid_parameters.get('run_hybrid_routing', False) and 'nexus' not in layers_to_read:
+        layers_to_read.append('nexus')
+
+    # Function that read a layer from the geopackage
+    def read_layer(layer_name):
+        if layer_name:
+            try:
+                return gpd.read_file(file_path, layer=layer_name)
+            except Exception as e:
+                print(f"Error reading {layer_name}: {e}")
+                return pd.DataFrame()
+        return pd.DataFrame()
+       
+    # Retrieve geopackage information using matched layer names
     if cpu_pool > 1:
-        with Parallel(n_jobs=min(cpu_pool, len(layers))) as parallel:
-            gpkg_list = parallel(delayed(read_layer)(layer) for layer in layers)
-
-        table_dict = {layers[i]: gpkg_list[i] for i in range(len(layers))}
+        with Parallel(n_jobs=min(cpu_pool, len(layers_to_read))) as parallel:
+            gpkg_list = parallel(delayed(read_layer)(matched_layers[layer]) for layer in layers_to_read)
+        
+        table_dict = {layers_to_read[i]: gpkg_list[i] for i in range(len(layers_to_read))}
     else:
-        table_dict = {layer: read_layer(layer) for layer in layers}
+        table_dict = {layer: read_layer(matched_layers[layer]) for layer in layers_to_read}
+    
+    # Handle different key column names between flowpaths and flowpath_attributes
+    flowpaths_df = table_dict.get('flowpaths', pd.DataFrame())
+    flowpath_attributes_df = table_dict.get('flowpath_attributes', pd.DataFrame())
+
+    # Check if 'link' column exists and rename it to 'id'
+    if 'link' in flowpath_attributes_df.columns:
+        flowpath_attributes_df.rename(columns={'link': 'id'}, inplace=True) 
+     
+    # Merge flowpaths and flowpath_attributes 
+    flowpaths = pd.merge(
+        flowpaths_df, 
+        flowpath_attributes_df, 
+        on='id', 
+        how='inner'
+    )
 
-    flowpaths = pd.merge(table_dict.get('flowpaths', pd.DataFrame()), table_dict.get('flowpath_attributes', pd.DataFrame()), on='id', how='inner')
     lakes = table_dict.get('lakes', pd.DataFrame())
     network = table_dict.get('network', pd.DataFrame())
     nexus = table_dict.get('nexus', pd.DataFrame())

From 007d0732726a2f762106d4004e3ffedb5da85187 Mon Sep 17 00:00:00 2001
From: AminTorabi-NOAA <amin.torabi@noaa.gov>
Date: Wed, 14 Aug 2024 13:51:27 -0400
Subject: [PATCH 23/29] update for reading flowlines instead of flowpaths

---
 src/troute-network/troute/HYFeaturesNetwork.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
index 4466afcd8..dbaf20253 100644
--- a/src/troute-network/troute/HYFeaturesNetwork.py
+++ b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -36,7 +36,7 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
 
     # patterns for the layers we want to find
     layer_patterns = {
-        'flowpaths': r'flow[-_]?paths?',
+        'flowpaths': r'flow[-_]?paths?|flow[-_]?lines?',
         'flowpath_attributes': r'flow[-_]?path[-_]?attributes?',
         'lakes': r'lakes?',
         'nexus': r'nexus?',

From 7a6e25d835762b78d634925482ac6653b6cd948e Mon Sep 17 00:00:00 2001
From: AminTorabi-NOAA <amin.torabi@noaa.gov>
Date: Wed, 14 Aug 2024 15:22:00 -0400
Subject: [PATCH 24/29] update for reading flowline_attribute instead of
 flowpath_attribute

---
 src/troute-network/troute/HYFeaturesNetwork.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-network/troute/HYFeaturesNetwork.py b/src/troute-network/troute/HYFeaturesNetwork.py
index dbaf20253..fb8a19225 100644
--- a/src/troute-network/troute/HYFeaturesNetwork.py
+++ b/src/troute-network/troute/HYFeaturesNetwork.py
@@ -37,7 +37,7 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
     # patterns for the layers we want to find
     layer_patterns = {
         'flowpaths': r'flow[-_]?paths?|flow[-_]?lines?',
-        'flowpath_attributes': r'flow[-_]?path[-_]?attributes?',
+        'flowpath_attributes': r'flow[-_]?path[-_]?attributes?|flow[-_]?line[-_]?attributes?',
         'lakes': r'lakes?',
         'nexus': r'nexus?',
         'network': r'network'

From bb7d6ec3c6ba0d7ba0333106c5b5e82150ab7f3d Mon Sep 17 00:00:00 2001
From: "Justin Singh-M. - NOAA" <justin.singh-mohudpur@noaa.gov>
Date: Wed, 14 Aug 2024 13:52:31 -0700
Subject: [PATCH 25/29] fix(troute.network): handle `fiona` dependency for
 `geopandas>=1.0.0`

---
 src/troute-network/pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/troute-network/pyproject.toml b/src/troute-network/pyproject.toml
index bb80a35ef..77ad761c0 100644
--- a/src/troute-network/pyproject.toml
+++ b/src/troute-network/pyproject.toml
@@ -8,6 +8,7 @@ version = "0.0.0"
 dependencies = [
     "deprecated",
     "geopandas",
+    "fiona",
     "joblib",
     "netcdf4",
     "numpy",

From 452a35b6e4096279a27730a3605964658fd560e7 Mon Sep 17 00:00:00 2001
From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com>
Date: Thu, 15 Aug 2024 12:36:47 -0400
Subject: [PATCH 26/29] update for csv_output (#828)

---
 src/troute-network/troute/nhd_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/troute-network/troute/nhd_io.py b/src/troute-network/troute/nhd_io.py
index b948b379f..a30f3d1cf 100644
--- a/src/troute-network/troute/nhd_io.py
+++ b/src/troute-network/troute/nhd_io.py
@@ -2070,12 +2070,11 @@ def write_flowveldepth_csv_pkl(stream_output_directory, file_name,
             'velocity': velocity.iloc[:, i],
             'depth': depth.iloc[:, i],
             'nudge': nudge_df.iloc[:, i]  
-        })
+        }, index=flow.index)
         df_list.append(df_temp)
 
     # Concatenate all temporary DataFrames vertically
     df = pd.concat(df_list)
-    df.index.name = 'feature_id'
     
     df['current_time'] = pd.to_datetime(df['t0']) + pd.to_timedelta(df['time'])
     df = df[['current_time', 'flow', 'velocity', 'depth', 'nudge']]
@@ -2269,6 +2268,7 @@ def mask_find_seg(mask_list, nexus_dict, poi_crosswalk):
     return nex_id, seg_id
 
 def updated_flowveldepth(flowveldepth, nex_id, seg_id, mask_list):
+    flowveldepth = flowveldepth.copy(deep=True)
     flowveldepth.index.name = 'featureID'
     flowveldepth['Type'] = 'wb'
     flowveldepth.set_index('Type', append=True, inplace=True)

From dfeb91571a37bb8f9f9718b30ea6cff8eb23d9fa Mon Sep 17 00:00:00 2001
From: Amin Torabi <140189926+AminTorabi-NOAA@users.noreply.github.com>
Date: Thu, 15 Aug 2024 13:33:30 -0400
Subject: [PATCH 27/29] issue 826 (#832)

---
 src/troute-nwm/src/nwm_routing/output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/troute-nwm/src/nwm_routing/output.py b/src/troute-nwm/src/nwm_routing/output.py
index 893825015..824e79129 100644
--- a/src/troute-nwm/src/nwm_routing/output.py
+++ b/src/troute-nwm/src/nwm_routing/output.py
@@ -93,7 +93,7 @@ def _parquet_output_format_converter(df, start_datetime, dt, configuration, pref
     for i in range(num_time_variables):
         start_idx = i * num_locations
         end_idx = start_idx + num_locations
-        value_time[start_idx:end_idx] = start_datetime + pd.to_timedelta(timesteps[i] * dt, unit='s')
+        value_time[start_idx:end_idx] = start_datetime + pd.to_timedelta((timesteps[i] + 1) * dt, unit='s')
         variable_name = variable_to_name_map[variables[i]]
         unit = variable_to_units_map[variable_name]
         variable_names[start_idx:end_idx] = variable_name

From 72e6e9b271bd803eba063bd7722df026bf592f56 Mon Sep 17 00:00:00 2001
From: AminTorabi-NOAA <amin.torabi@noaa.gov>
Date: Thu, 15 Aug 2024 12:45:09 -0400
Subject: [PATCH 28/29] fix to issue 829_

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3cfb7d2e9..6ae078abe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pip>=22.1.0
 setuptools>=64.0,<69.0
 wheel
-numpy
+numpy>=1.21.0,<2.0.0
 Cython>3,!=3.0.4
 pandas >=1.1.5, <=2.2.0
 xarray

From 9d9d711435c553633bedc6056c9827dfb9e7843e Mon Sep 17 00:00:00 2001
From: AminTorabi-NOAA <amin.torabi@noaa.gov>
Date: Thu, 15 Aug 2024 14:15:06 -0400
Subject: [PATCH 29/29] numpy fix

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6ae078abe..6963bc407 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pip>=22.1.0
 setuptools>=64.0,<69.0
 wheel
-numpy>=1.21.0,<2.0.0
+numpy~=1.0
 Cython>3,!=3.0.4
 pandas >=1.1.5, <=2.2.0
 xarray