From 32191ec6c46e0008edb58ba5ae263d3c48b68020 Mon Sep 17 00:00:00 2001
From: hagne <hagen@hagnet.net>
Date: Thu, 3 Oct 2024 10:04:18 -0600
Subject: [PATCH] bugfix in opt_imports

---
 .../physics/column_optical_properties.py        |  6 +++---
 atmPy/opt_imports.py                            |  6 +++---
 .../observations/langley_calibration.py         | 17 ++++++++++-------
 .../observations/spectral_irradiance.py         | 16 +++++++++++-----
 4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/atmPy/aerosols/physics/column_optical_properties.py b/atmPy/aerosols/physics/column_optical_properties.py
index 2654f91..647f00c 100644
--- a/atmPy/aerosols/physics/column_optical_properties.py
+++ b/atmPy/aerosols/physics/column_optical_properties.py
@@ -12,7 +12,7 @@
 #import pathlib as _pl
 import xarray as xr
 # import statsmodels.nonparametric.smoothers_lowess as smlowess
-from atmPy.opt_imports import statsmodels_nonparametric_smoothers_lowess as smlowess
+from atmPy.opt_imports import statsmodels
 
 _colors = _plt.rcParams['axes.prop_cycle'].by_key()['color']
 
@@ -1118,7 +1118,7 @@ def cloud_screening_michalsky(dataset, testdev = 0.05,rollwindow = 15, channel =
 
     # generate the smooth aod only function
     daaod5_t1 = daaod5.where(test1.values).dropna('datetime')
-    out = smlowess.lowess(daaod5_t1.values, daaod5_t1.datetime.values,
+    out = statsmodels.nonparametric.smoothers_lowess.lowess(daaod5_t1.values, daaod5_t1.datetime.values,
                  frac = 2/3)
     lowess = _xr.DataArray(out[:,1], coords={'datetime':daaod5_t1.datetime})
     
@@ -1223,7 +1223,7 @@ def cloud_screening_michalsky_smoker(dataset, testdev = 0.05,rollwindow = 15, ch
 
     # generate the smooth aod only function
     daaod5_t1 = daaod5.where(test1.values).dropna('datetime')
-    out = smlowess.lowess(daaod5_t1.values, daaod5_t1.datetime.values,
+    out = statsmodels.nonparametric.smoothers_lowess.lowess.lowess(daaod5_t1.values, daaod5_t1.datetime.values,
                  frac = 2/3)
     lowess = _xr.DataArray(out[:,1], coords={'datetime':daaod5_t1.datetime})
     
diff --git a/atmPy/opt_imports.py b/atmPy/opt_imports.py
index 4d8f50e..fcd76a7 100755
--- a/atmPy/opt_imports.py
+++ b/atmPy/opt_imports.py
@@ -34,11 +34,11 @@ def __getattr__(self, item):
         return getattr(self.module, item)
 
 # Creating the pandas facade
-statsmodels = OptionalImport('statsmodels', submodules = ['api',])
+statsmodels = OptionalImport('statsmodels', submodules = ['api','nonparametric.smoothers_lowess'])
 
 #Todo: remove those and replace with the submodule kwarg
-statsmodels_api = OptionalImport('statsmodels.api')
-statsmodels_nonparametric_smoothers_lowess = OptionalImport('statsmodels.nonparametric.smoothers_lowess')
+# statsmodels_api = OptionalImport('statsmodels.api')
+# statsmodels_nonparametric_smoothers_lowess = OptionalImport('statsmodels.nonparametric.smoothers_lowess')
 # statsmodels_robust =  OptionalImport('statsmodels.robust')
 
 timezonefinder = OptionalImport('timezonefinder')
diff --git a/atmPy/radiation/observations/langley_calibration.py b/atmPy/radiation/observations/langley_calibration.py
index 4e540ff..6062343 100644
--- a/atmPy/radiation/observations/langley_calibration.py
+++ b/atmPy/radiation/observations/langley_calibration.py
@@ -26,7 +26,7 @@
 import matplotlib.dates as pltdates
 
 from atmPy.opt_imports import pptx
-from atmPy.opt_imports import statsmodels_api as sm
+from atmPy.opt_imports import statsmodels
 
 
 colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
@@ -182,7 +182,7 @@ def read_langley_params(p2f = '/home/grad/surfrad/aod/tbl_mfrhead', verbose = Fa
 
 def open_langley_dailys(start = None, #'20160101',  
                         end = None, #'20220101',
-                        p2fld = '/mnt/telg/data/grad/surfrad/mfrsr/langleys_concat/tbl/',
+                        p2fld = '/home/grad/htelg/data/grad/surfrad/mfrsr/langleys_concat/tbl/',
                         drop_variables = ['langley_residual_correlation_prop', 'valid_points', 'residual_stats', 'cleaning_iterations', 'status'], # they are not needed under normal conditions
                        ):
     p2fld = pl.Path(p2fld)
@@ -190,11 +190,14 @@ def open_langley_dailys(start = None, #'20160101',
     df.index = df.apply(lambda row: pd.to_datetime(row.p2f.name.split('_')[4] + '01'), axis =1)
     df.sort_index(inplace=True)
     df = df.truncate(start, end)
-    
+    assert(df.shape[0] != 0), f'no files found in {p2fld}.'
     df['serial_no'] = df.apply(lambda row: row.p2f.name.split('_')[3], axis = 1)
+    assert(df.serial_no.unique().shape[0] != 0), f'No serial_no found in {df.p2f}.'
     assert(df.serial_no.unique().shape[0] == 1), f'Files indicate more then one serial number (found {df.serial_no.unique()}), which should not be the case unless you updated the langley processing ... did you? ... programmin grequired.'   
     # return df
-    ds_langs = xr.open_mfdataset(df.p2f, drop_variables = drop_variables)
+    # ds_langs = xr.open_mfdataset(df.p2f, drop_variables = drop_variables)
+    with xr.open_mfdataset(df.p2f, drop_variables = drop_variables) as ds_langs:
+        ds_langs.load()
     return Langley_Timeseries(ds_langs)
 
 class CalibrationPrediction(object):
@@ -491,17 +494,17 @@ def fit_langley(langley,
     # langley = langley.truncate(*airmasslimits)
     y = langley
     x = langley.index
-    x = sm.add_constant(x)
+    x = statsmodels.api.add_constant(x)
     
     if not weighted:
-        mod = sm.OLS(y,x)
+        mod = statsmodels.api.OLS(y,x)
     else:
         idx = langley.index
         wt = idx[:-1] - idx[1:]
         wx = np.arange(wt.shape[0])
         f = sp.interpolate.interp1d(wx, wt, bounds_error=False, fill_value='extrapolate')
         w = np.append(wt, f(wx[-1] + 1))
-        mod = sm.WLS(y, x, weights = w)
+        mod = statsmodels.api.WLS(y, x, weights = w)
 
     try:
         res = mod.fit()
diff --git a/atmPy/radiation/observations/spectral_irradiance.py b/atmPy/radiation/observations/spectral_irradiance.py
index 089f343..b8a5ed8 100644
--- a/atmPy/radiation/observations/spectral_irradiance.py
+++ b/atmPy/radiation/observations/spectral_irradiance.py
@@ -511,7 +511,7 @@ def _get_surfrad_met_data(self):
         site = self.site.abb
         fns = []
         for dt in [start_dt, end_dt]:
-            fns.append(f'/export/htelg/data/grad/surfrad/radiation/{site}/srf_rad_full_{site}_{dt.year:04d}{dt.month:02d}{dt.day:02d}.nc')
+            fns.append(f'/nfs/grad/surfrad/products_level1/radiation_netcdf/{site}/srf_rad_full_{site}_{dt.year:04d}{dt.month:02d}{dt.day:02d}.nc')
         
         ds = xr.open_mfdataset(np.unique(fns)) # unique in case start and end is the same
         
@@ -568,11 +568,12 @@ def _apply_calibration_atm_gam(self,p2fld='/export/htelg/data/grad/surfrad/mfrsr
                                     ns_season=6,
                                     lands = None):
         if isinstance(lands, type(None)):
-            print('load lands')
+            print('langleys.load.', end = '', flush = True)
             lands = atmlangcalib.open_langley_dailys(#end = '20211001',
                                                   p2fld=p2fld,)
+
+            print('gampredict.', end = '')
             lands.predict_until = pd.Timestamp.now()
-            
             lands._get_v0_gam(th=th,
                       order_stderr=order_stderr,
                       lam_overtime=lam_overtime,
@@ -588,6 +589,7 @@ def _apply_calibration_atm_gam(self,p2fld='/export/htelg/data/grad/surfrad/mfrsr
             istderr = lands.dataset.sel(fit_results = 'intercept_stderr', wavelength = wl).langley_fitres
             date_predict = pd.to_datetime(lands.dataset.sel(fit_results = 'intercept', wavelength = wl).langley_fitres.where(istderr < th_predict).dropna('datetime').datetime[-req_no_good_langleys].values)
             lands.date_predict = date_predict
+            print('done')
         else:
             pass
         
@@ -802,7 +804,9 @@ def od_co2_ch4_h2o(self):
         if isinstance(self._od_co2ch4h2o, type(None)):
             # get the 1625 filter info based on the MFRSR instrument serial no
             if 1625 in self.raw_data.channel:
-                fn = '/export/htelg/projects/AOD_redesign/MFRSR_History.xlsx'
+                #### TODO: this file should be stored somewhere more meaning full
+                fn = '/home/grad/htelg/projects/AOD_redesign/MFRSR_History.xlsx'
+                
                 mfrsr_info = pd.read_excel(fn, sheet_name='Overview')
                 inst_info = mfrsr_info[mfrsr_info.Instrument == self.raw_data.serial_no]
                 
@@ -811,7 +815,9 @@ def od_co2_ch4_h2o(self):
                 filter_batch = ''.join([i for i in fab if not i.isnumeric()]).lower()
                 
                 # open the lookup dabel for the Optical depth correction
-                correction_info = xr.open_dataset(self.path2absorption_correction_ceoff_1625)
+                # correction_info = xr.open_dataset(self.path2absorption_correction_ceoff_1625)
+                with xr.open_dataset(self.path2absorption_correction_ceoff_1625) as correction_info:
+                    correction_info.load()
                 
                 ds = xr.Dataset()
                 params_dict = {}