From 1284753d0148f214b1566a8c84db7b9e3582bb7b Mon Sep 17 00:00:00 2001
From: jrobrien91 <obrienj@anl.gov>
Date: Thu, 12 Sep 2024 18:28:09 -0500
Subject: [PATCH 1/4] ADD: MPLNET Discovery Module

---
 act/discovery/__init__.py             |   3 +-
 act/discovery/nasa.py                 | 240 ++++++++++++++++++++++++++
 examples/discovery/download_mplnet.py |  28 +++
 tests/discovery/test_nasa.py          |  62 +++++++
 4 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 act/discovery/nasa.py
 create mode 100644 examples/discovery/download_mplnet.py
 create mode 100644 tests/discovery/test_nasa.py

diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py
index 8159f8f4d3..ff45240825 100644
--- a/act/discovery/__init__.py
+++ b/act/discovery/__init__.py
@@ -8,7 +8,7 @@
 
 __getattr__, __dir__, __all__ = lazy.attach(
     __name__,
-    submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad'],
+    submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad', 'nasa'],
     submod_attrs={
         'arm': ['download_arm_data', 'get_arm_doi'],
         'asos': ['get_asos_data'],
@@ -18,5 +18,6 @@
         'noaapsl': ['download_noaa_psl_data'],
         'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'],
         'surfrad': ['download_surfrad_data'],
+        'nasa' : ["download_mplnet_data", "get_mplnet_meta"]
     },
 )
diff --git a/act/discovery/nasa.py b/act/discovery/nasa.py
new file mode 100644
index 0000000000..b1d00aa484
--- /dev/null
+++ b/act/discovery/nasa.py
@@ -0,0 +1,240 @@
+"""
+Function for downloading data from the NASA Atmospheric Science Data Center
+(ASDC), which hosts data including the Atmopsheric Composition
+Ground Observation Network.
+
+"""
+import os
+import requests
+import re
+import shutil
+
+try:
+    from urllib.request import urlopen
+except ImportError:
+    from urllib import urlopen
+
+
+def download_mplnet_data(version=None,
+                         level=None,
+                         product=None,
+                         site=None,
+                         year=None,
+                         month=None,
+                         day=None,
+                         outdir=None,
+                         ):
+    """
+    Function to download data from the NASA MPL Network Data
+    https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?download
+
+    Downloaded Products are contained within NETCDF-4, CF compliant files.
+
+    Parameters
+    ----------
+    version : int
+        MPLNet Dataset Version Number (2 or 3).
+        All data from 2000 have been processed to Version 3.
+        Information on the MPLNet Dataset Version can be found here:
+        https://mplnet.gsfc.nasa.gov/versions.htm
+    level : int
+        MPLNet Product Levels (1, 15, 2).
+        MPLNet Levels used to differentiate quality assurance screens.
+        Information on the MPLNet Product levels can be found here:
+        https://mplnet.gsfc.nasa.gov/product-info/
+        Level 1 data should never be used for publication.
+    product : str
+        MPLNet Product (NRB, CLD, PBL, AER).
+            NRB - Lidar signals; volume depolarization ratos, diagnostics
+            CLD - Cloud Heights, thin cloud extinction and optical depths, cloud
+                    phase
+            AER - Aerosol heights; extinction, backscatter, and aerosol
+                    depolarization ratio profiles; lidar ratio
+            PBL - Surface-Attached Mixed Layer Top and estimated mixed layer AOD
+        Information on the MPLNet Products can be found here:
+        https://mplnet.gsfc.nasa.gov/product-info/
+    year : str
+        Four digit Year for desired product download (YYYY).
+        Note Level 1 and 1.5 products are available for
+        download the day after automated collection.
+        Information on the MPLNet naming convention can be found here:
+        https://mplnet.gsfc.nasa.gov/product-info/mplnet_file_name.htm
+    month : str
+        Two digit month for desired product download (MM).
+    day : str
+        Two digit desired day for product download (DD).
+        If day not supplied, will download all data for month supplied
+        in a zip file.
+    site : str
+        MPLNet four letter site identifier.
+    outdir : str
+        The output directory for the data. Set to None to make a folder in the
+        current working directory with the same name as *datastream* to place
+        the files in.
+
+    Returns
+    -------
+    files : list
+        Returns list of files retrieved.
+    """
+
+    # Generate the data policy agreement information
+    print("\nPlease Review the MPLNET Data Policy Prior to Use of MPLNET Data")
+    print("The MPLNET Data Policy can be found at:\n\thttps://mplnet.gsfc.nasa.gov/data-policy\n")
+
+    # Generate the data acknowledgement statement, might require site information.
+    print("Please Include the Following Acknowledgements in Any Publication \nor" +
+          " presentation of MPLNET data, regardless of co-authorship status:")
+    print("\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program" +
+          " \n\tand Earth Observing System.")
+    print("\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing" +
+          " \n\tand maintaining sites.\n")
+
+    # Define the base URL
+    base_url = "https://mplnet.gsfc.nasa.gov/download?"
+
+    # Add specific information to the base URL
+    if version is None:
+        raise ValueError("Please provide a MPLNet Product Version")
+    else:
+        base_url += "version=V" + str(version)
+
+    if level is None:
+        raise ValueError("Please provide a MPLNet Product Level")
+    else:
+        base_url += "&level=L" + str(level)
+
+    if product is None:
+        raise ValueError("Please provide a specific MPLNet Product identifer")
+    else:
+        base_url += "&product=" + str(product)
+
+    if site is None:
+        raise ValueError("Please provide a specific MPLNet site")
+    else:
+        base_url += "&site=" + str(site)
+
+    if year is None:
+        raise ValueError(
+            "Year of desired data download is required to download MPLNET data")
+    else:
+        base_url += "&year=" + str(year)
+
+    if month is None:
+        raise ValueError(
+            "Month of desired data download is required to download MPLNet data")
+    else:
+        base_url += "&month=" + str(month)
+
+    if day:
+        # Note: Day is not required for the MPLNet download
+        base_url += "&day=" + str(day)
+
+    # Construct output directory
+    if outdir:
+        # Output files to directory specified
+        output_dir = os.path.join(outdir)
+    else:
+        # If no folder given, add MPLNET folder
+        # to current working dir to prevent file mix-up
+        output_dir = os.path.join(os.getcwd(), "MPLNET")
+
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+
+    # Make a Request
+    files = []
+    with requests.get(base_url, stream=True) as r:
+        fname = re.findall("filename=(.+)",
+                           r.headers['Content-Disposition']
+                           )
+        # Check for successful file check
+        if fname[0][1:-1] == "MPLNET_download_fail.txt":
+            raise ValueError("Failed MPLNET Download\n" +
+                             " File could not be found for the desired input parameters" +
+                             " for MPLNET Download API"
+                             )
+        else:
+            output_filename = os.path.join(output_dir, fname[0][1:-1])
+            print("[DOWNLOADING] ", fname[0][1:-1])
+            with open(output_filename, 'wb') as f:
+                shutil.copyfileobj(r.raw, f)
+                files.append(output_filename)
+
+    return files
+
+
+def get_mplnet_meta(sites=None,
+                    method=None,
+                    year=None,
+                    month=None,
+                    day=None,
+                    print_to_screen=False):
+    """
+    Returns a list of meta data from the NASA MPL Network Data
+    https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?metadata
+
+
+    Parameters
+    ----------
+    sites : str
+        How to return MPLNET Site Information
+            all        - produces output on all sites (active and inactive)
+            active     - produces output file containing only active sites
+                       (if year, month, or day are not set then uses today's date)
+            inactive   - produces output file containing only inactive sites
+                       (if year, month, or day are not set then uses today's date)
+            planned    - produces output file containing only planned sites
+            site_name  - produces output file containing only requested site
+            collection - produces output file containing sites in pre-defined
+                         collections (e.g. field campaigns or regions)
+    year : str
+        Four digit Year for desired product download (YYYY).
+        Note Level 1 and 1.5 products are available for
+        download the day after automated collection.
+        Information on the MPLNet naming convention can be found here:
+        https://mplnet.gsfc.nasa.gov/product-info/mplnet_file_name.htm
+    month : str
+        Two digit month for desired product download (MM).
+    day : str
+        Two digit desired day for product download (DD).
+        If day not supplied, will download all data for month supplied
+        in a zip file.
+    method : str
+        Method for returning JSON list of MPLNET GALION format parameters.
+            station - returns GALION JSON with only station and PI contact info
+            data - return GALION JSON with data elements, station, date and PI
+                contact information
+    print_to_screen : Boolean
+        If true, print MPLNET site identifiers to screen
+    """
+    # Define the base URL
+    base_url = "https://mplnet.gsfc.nasa.gov/operations/sites?api&format=galion"
+
+    if sites is None:
+        raise ValueError(
+            "Site Parameter is required to download MPLNET Meta Data"
+        )
+    else:
+        base_url += "&sites=" + str(sites)
+
+    if method:
+        base_url += "&method=" + str(method)
+
+    if year:
+        base_url += "&year=" + str(year)
+
+    if month:
+        base_url += "&month=" + str(month)
+
+    if day:
+        base_url += "&day=" + str(day)
+
+    with requests.get(base_url, stream=True) as r:
+        # Convert to JSON
+        site_request = r.json()
+        if print_to_screen:
+            for i in range(len(site_request)):
+                print(site_request[i]['id'])
+
+    return site_request
diff --git a/examples/discovery/download_mplnet.py b/examples/discovery/download_mplnet.py
new file mode 100644
index 0000000000..f188e22d5f
--- /dev/null
+++ b/examples/discovery/download_mplnet.py
@@ -0,0 +1,28 @@
+"""
+NASA MPLNET
+-----------
+
+This example shows how to download data from
+NASA's MicroPulsed Lidar Network
+
+"""
+
+import os
+import glob
+
+import act
+
+# Determine MPLNET site of interest
+meta = act.discovery.get_mplnet_meta(sites="active",
+                                     method="data",
+                                     print_to_screen=True
+                                     )
+
+# Download MPLNET data for site of interest
+output = act.discovery.download_mplnet_data(version=3,
+                                            level=2,
+                                            product="NRB",
+                                            site="GSFC",
+                                            year="2022",
+                                            month="09",
+                                            day="01")
diff --git a/tests/discovery/test_nasa.py b/tests/discovery/test_nasa.py
new file mode 100644
index 0000000000..790cd35a23
--- /dev/null
+++ b/tests/discovery/test_nasa.py
@@ -0,0 +1,62 @@
+import numpy as np
+import act
+
+
+def test_get_mplnet_meta():
+    output = act.discovery.get_mplnet_meta(sites="GSFC",
+                                           method="data",
+                                           year="2024",
+                                           month="09",
+                                           day="12")
+
+    assert 'id' in output[0]
+    assert 'station' in output[0]
+    assert output[0]['station']['latitude_unit'] == "deg"
+
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.get_mplnet_meta()
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.get_mplnet_meta(sites=10)
+
+
+def test_download_mplnet_data():
+    output = act.discovery.download_mplnet_data(version=3,
+                                                level=1,
+                                                product="NRB",
+                                                site="GSFC",
+                                                year="2020",
+                                                month="09",
+                                                day="01")
+
+    assert len(output) == 1
+    assert output[0][-3:] == "nc4"
+
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data()
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3)
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3,
+                                                    level=1)
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3,
+                                                    level=1,
+                                                    product='NRB')
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3,
+                                                    level=1,
+                                                    product='NRB',
+                                                    site="GSFC")
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3,
+                                                    level=1,
+                                                    product='NRB',
+                                                    site="GSFC",
+                                                    year="2020")
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3,
+                                                    level=1,
+                                                    product='NRB',
+                                                    site="GSFC",
+                                                    year="2020",
+                                                    month="09")

From 337a840ab19722dd7a183bb7f58cd7c53a4854b0 Mon Sep 17 00:00:00 2001
From: jrobrien91 <obrienj@anl.gov>
Date: Thu, 12 Sep 2024 20:37:22 -0500
Subject: [PATCH 2/4] ENH: Removal of conflicting test

---
 tests/discovery/test_nasa.py | 51 +++++++++++-------------------------
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/tests/discovery/test_nasa.py b/tests/discovery/test_nasa.py
index 790cd35a23..5a11a7d723 100644
--- a/tests/discovery/test_nasa.py
+++ b/tests/discovery/test_nasa.py
@@ -3,11 +3,9 @@
 
 
 def test_get_mplnet_meta():
-    output = act.discovery.get_mplnet_meta(sites="GSFC",
-                                           method="data",
-                                           year="2024",
-                                           month="09",
-                                           day="12")
+    output = act.discovery.get_mplnet_meta(
+        sites="GSFC", method="data", year="2024", month="09", day="12"
+    )
 
     assert 'id' in output[0]
     assert 'station' in output[0]
@@ -20,13 +18,9 @@ def test_get_mplnet_meta():
 
 
 def test_download_mplnet_data():
-    output = act.discovery.download_mplnet_data(version=3,
-                                                level=1,
-                                                product="NRB",
-                                                site="GSFC",
-                                                year="2020",
-                                                month="09",
-                                                day="01")
+    output = act.discovery.download_mplnet_data(
+        version=3, level=1, product="NRB", site="GSFC", year="2020", month="09", day="01"
+    )
 
     assert len(output) == 1
     assert output[0][-3:] == "nc4"
@@ -36,27 +30,12 @@ def test_download_mplnet_data():
     with np.testing.assert_raises(ValueError):
         output = act.discovery.download_mplnet_data(version=3)
     with np.testing.assert_raises(ValueError):
-        output = act.discovery.download_mplnet_data(version=3,
-                                                    level=1)
-    with np.testing.assert_raises(ValueError):
-        output = act.discovery.download_mplnet_data(version=3,
-                                                    level=1,
-                                                    product='NRB')
-    with np.testing.assert_raises(ValueError):
-        output = act.discovery.download_mplnet_data(version=3,
-                                                    level=1,
-                                                    product='NRB',
-                                                    site="GSFC")
-    with np.testing.assert_raises(ValueError):
-        output = act.discovery.download_mplnet_data(version=3,
-                                                    level=1,
-                                                    product='NRB',
-                                                    site="GSFC",
-                                                    year="2020")
-    with np.testing.assert_raises(ValueError):
-        output = act.discovery.download_mplnet_data(version=3,
-                                                    level=1,
-                                                    product='NRB',
-                                                    site="GSFC",
-                                                    year="2020",
-                                                    month="09")
+        output = act.discovery.download_mplnet_data(version=3, level=1)
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3, level=1, product='NRB')
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(version=3, level=1, product='NRB', site="GSFC")
+    with np.testing.assert_raises(ValueError):
+        output = act.discovery.download_mplnet_data(
+            version=3, level=1, product='NRB', site="GSFC", year="2020"
+        )

From 8ad3643076ddb29ca81ccc4a72e8b8a72f542702 Mon Sep 17 00:00:00 2001
From: jrobrien91 <obrienj@anl.gov>
Date: Fri, 13 Sep 2024 09:12:29 -0500
Subject: [PATCH 3/4] ENH: Fixed linting issues

---
 act/discovery/__init__.py             | 14 ++++-
 act/discovery/nasa.py                 | 76 ++++++++++++---------------
 examples/discovery/download_mplnet.py | 18 ++-----
 3 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py
index ff45240825..7845872891 100644
--- a/act/discovery/__init__.py
+++ b/act/discovery/__init__.py
@@ -8,7 +8,17 @@
 
 __getattr__, __dir__, __all__ = lazy.attach(
     __name__,
-    submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad', 'nasa'],
+    submodules=[
+        'arm',
+        'asos',
+        'airnow',
+        'cropscape',
+        'improve',
+        'noaapsl',
+        'neon',
+        'surfrad',
+        'nasa',
+    ],
     submod_attrs={
         'arm': ['download_arm_data', 'get_arm_doi'],
         'asos': ['get_asos_data'],
@@ -18,6 +28,6 @@
         'noaapsl': ['download_noaa_psl_data'],
         'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'],
         'surfrad': ['download_surfrad_data'],
-        'nasa' : ["download_mplnet_data", "get_mplnet_meta"]
+        'nasa': ["download_mplnet_data", "get_mplnet_meta"],
     },
 )
diff --git a/act/discovery/nasa.py b/act/discovery/nasa.py
index b1d00aa484..70552dbbff 100644
--- a/act/discovery/nasa.py
+++ b/act/discovery/nasa.py
@@ -9,21 +9,17 @@
 import re
 import shutil
 
-try:
-    from urllib.request import urlopen
-except ImportError:
-    from urllib import urlopen
-
-
-def download_mplnet_data(version=None,
-                         level=None,
-                         product=None,
-                         site=None,
-                         year=None,
-                         month=None,
-                         day=None,
-                         outdir=None,
-                         ):
+
+def download_mplnet_data(
+    version=None,
+    level=None,
+    product=None,
+    site=None,
+    year=None,
+    month=None,
+    day=None,
+    outdir=None,
+):
     """
     Function to download data from the NASA MPL Network Data
     https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?download
@@ -83,12 +79,18 @@ def download_mplnet_data(version=None,
     print("The MPLNET Data Policy can be found at:\n\thttps://mplnet.gsfc.nasa.gov/data-policy\n")
 
     # Generate the data acknowledgement statement, might require site information.
-    print("Please Include the Following Acknowledgements in Any Publication \nor" +
-          " presentation of MPLNET data, regardless of co-authorship status:")
-    print("\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program" +
-          " \n\tand Earth Observing System.")
-    print("\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing" +
-          " \n\tand maintaining sites.\n")
+    print(
+        "Please Include the Following Acknowledgements in Any Publication \nor"
+        + " presentation of MPLNET data, regardless of co-authorship status:"
+    )
+    print(
+        "\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program"
+        + " \n\tand Earth Observing System."
+    )
+    print(
+        "\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing"
+        + " \n\tand maintaining sites.\n"
+    )
 
     # Define the base URL
     base_url = "https://mplnet.gsfc.nasa.gov/download?"
@@ -115,14 +117,12 @@ def download_mplnet_data(version=None,
         base_url += "&site=" + str(site)
 
     if year is None:
-        raise ValueError(
-            "Year of desired data download is required to download MPLNET data")
+        raise ValueError("Year of desired data download is required to download MPLNET data")
     else:
         base_url += "&year=" + str(year)
 
     if month is None:
-        raise ValueError(
-            "Month of desired data download is required to download MPLNet data")
+        raise ValueError("Month of desired data download is required to download MPLNet data")
     else:
         base_url += "&month=" + str(month)
 
@@ -145,15 +145,14 @@ def download_mplnet_data(version=None,
     # Make a Request
     files = []
     with requests.get(base_url, stream=True) as r:
-        fname = re.findall("filename=(.+)",
-                           r.headers['Content-Disposition']
-                           )
+        fname = re.findall("filename=(.+)", r.headers['Content-Disposition'])
         # Check for successful file check
         if fname[0][1:-1] == "MPLNET_download_fail.txt":
-            raise ValueError("Failed MPLNET Download\n" +
-                             " File could not be found for the desired input parameters" +
-                             " for MPLNET Download API"
-                             )
+            raise ValueError(
+                "Failed MPLNET Download\n"
+                + " File could not be found for the desired input parameters"
+                + " for MPLNET Download API"
+            )
         else:
             output_filename = os.path.join(output_dir, fname[0][1:-1])
             print("[DOWNLOADING] ", fname[0][1:-1])
@@ -164,12 +163,9 @@ def download_mplnet_data(version=None,
     return files
 
 
-def get_mplnet_meta(sites=None,
-                    method=None,
-                    year=None,
-                    month=None,
-                    day=None,
-                    print_to_screen=False):
+def get_mplnet_meta(
+    sites=None, method=None, year=None, month=None, day=None, print_to_screen=False
+):
     """
     Returns a list of meta data from the NASA MPL Network Data
     https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?metadata
@@ -212,9 +208,7 @@ def get_mplnet_meta(sites=None,
     base_url = "https://mplnet.gsfc.nasa.gov/operations/sites?api&format=galion"
 
     if sites is None:
-        raise ValueError(
-            "Site Parameter is required to download MPLNET Meta Data"
-        )
+        raise ValueError("Site Parameter is required to download MPLNET Meta Data")
     else:
         base_url += "&sites=" + str(sites)
 
diff --git a/examples/discovery/download_mplnet.py b/examples/discovery/download_mplnet.py
index f188e22d5f..7d6c807fa4 100644
--- a/examples/discovery/download_mplnet.py
+++ b/examples/discovery/download_mplnet.py
@@ -7,22 +7,12 @@
 
 """
 
-import os
-import glob
-
 import act
 
 # Determine MPLNET site of interest
-meta = act.discovery.get_mplnet_meta(sites="active",
-                                     method="data",
-                                     print_to_screen=True
-                                     )
+meta = act.discovery.get_mplnet_meta(sites="active", method="data", print_to_screen=True)
 
 # Download MPLNET data for site of interest
-output = act.discovery.download_mplnet_data(version=3,
-                                            level=2,
-                                            product="NRB",
-                                            site="GSFC",
-                                            year="2022",
-                                            month="09",
-                                            day="01")
+output = act.discovery.download_mplnet_data(
+    version=3, level=2, product="NRB", site="GSFC", year="2022", month="09", day="01"
+)

From d4b8b0b7899b2c9f121a3e5f7e7f0d33e59d30b3 Mon Sep 17 00:00:00 2001
From: jrobrien91 <obrienj@anl.gov>
Date: Mon, 16 Sep 2024 10:30:00 -0500
Subject: [PATCH 4/4] ENH: Changed MPLNET example data request processing level

---
 examples/discovery/download_mplnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/discovery/download_mplnet.py b/examples/discovery/download_mplnet.py
index 7d6c807fa4..4d44f0ff14 100644
--- a/examples/discovery/download_mplnet.py
+++ b/examples/discovery/download_mplnet.py
@@ -14,5 +14,5 @@
 
 # Download MPLNET data for site of interest
 output = act.discovery.download_mplnet_data(
-    version=3, level=2, product="NRB", site="GSFC", year="2022", month="09", day="01"
+    version=3, level=1, product="NRB", site="GSFC", year="2022", month="09", day="01"
 )