From 1284753d0148f214b1566a8c84db7b9e3582bb7b Mon Sep 17 00:00:00 2001 From: jrobrien91 <obrienj@anl.gov> Date: Thu, 12 Sep 2024 18:28:09 -0500 Subject: [PATCH 1/4] ADD: MPLNET Discovery Module --- act/discovery/__init__.py | 3 +- act/discovery/nasa.py | 240 ++++++++++++++++++++++++++ examples/discovery/download_mplnet.py | 28 +++ tests/discovery/test_nasa.py | 62 +++++++ 4 files changed, 332 insertions(+), 1 deletion(-) create mode 100644 act/discovery/nasa.py create mode 100644 examples/discovery/download_mplnet.py create mode 100644 tests/discovery/test_nasa.py diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py index 8159f8f4d3..ff45240825 100644 --- a/act/discovery/__init__.py +++ b/act/discovery/__init__.py @@ -8,7 +8,7 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad'], + submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad', 'nasa'], submod_attrs={ 'arm': ['download_arm_data', 'get_arm_doi'], 'asos': ['get_asos_data'], @@ -18,5 +18,6 @@ 'noaapsl': ['download_noaa_psl_data'], 'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'], 'surfrad': ['download_surfrad_data'], + 'nasa' : ["download_mplnet_data", "get_mplnet_meta"] }, ) diff --git a/act/discovery/nasa.py b/act/discovery/nasa.py new file mode 100644 index 0000000000..b1d00aa484 --- /dev/null +++ b/act/discovery/nasa.py @@ -0,0 +1,240 @@ +""" +Function for downloading data from the NASA Atmospheric Science Data Center +(ASDC), which hosts data including the Atmopsheric Composition +Ground Observation Network. + +""" +import os +import requests +import re +import shutil + +try: + from urllib.request import urlopen +except ImportError: + from urllib import urlopen + + +def download_mplnet_data(version=None, + level=None, + product=None, + site=None, + year=None, + month=None, + day=None, + outdir=None, + ): + """ + Function to download data from the NASA MPL Network Data + https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?download + + Downloaded Products are contained within NETCDF-4, CF compliant files. + + Parameters + ---------- + version : int + MPLNet Dataset Version Number (2 or 3). + All data from 2000 have been processed to Version 3. + Information on the MPLNet Dataset Version can be found here: + https://mplnet.gsfc.nasa.gov/versions.htm + level : int + MPLNet Product Levels (1, 15, 2). + MPLNet Levels used to differentiate quality assurance screens. + Information on the MPLNet Product levels can be found here: + https://mplnet.gsfc.nasa.gov/product-info/ + Level 1 data should never be used for publication. + product : str + MPLNet Product (NRB, CLD, PBL, AER). + NRB - Lidar signals; volume depolarization ratos, diagnostics + CLD - Cloud Heights, thin cloud extinction and optical depths, cloud + phase + AER - Aerosol heights; extinction, backscatter, and aerosol + depolarization ratio profiles; lidar ratio + PBL - Surface-Attached Mixed Layer Top and estimated mixed layer AOD + Information on the MPLNet Products can be found here: + https://mplnet.gsfc.nasa.gov/product-info/ + year : str + Four digit Year for desired product download (YYYY). + Note Level 1 and 1.5 products are available for + download the day after automated collection. + Information on the MPLNet naming convention can be found here: + https://mplnet.gsfc.nasa.gov/product-info/mplnet_file_name.htm + month : str + Two digit month for desired product download (MM). + day : str + Two digit desired day for product download (DD). + If day not supplied, will download all data for month supplied + in a zip file. + site : str + MPLNet four letter site identifier. + outdir : str + The output directory for the data. Set to None to make a folder in the + current working directory with the same name as *datastream* to place + the files in. + + Returns + ------- + files : list + Returns list of files retrieved. + """ + + # Generate the data policy agreement information + print("\nPlease Review the MPLNET Data Policy Prior to Use of MPLNET Data") + print("The MPLNET Data Policy can be found at:\n\thttps://mplnet.gsfc.nasa.gov/data-policy\n") + + # Generate the data acknowledgement statement, might require site information. + print("Please Include the Following Acknowledgements in Any Publication \nor" + + " presentation of MPLNET data, regardless of co-authorship status:") + print("\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program" + + " \n\tand Earth Observing System.") + print("\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing" + + " \n\tand maintaining sites.\n") + + # Define the base URL + base_url = "https://mplnet.gsfc.nasa.gov/download?" + + # Add specific information to the base URL + if version is None: + raise ValueError("Please provide a MPLNet Product Version") + else: + base_url += "version=V" + str(version) + + if level is None: + raise ValueError("Please provide a MPLNet Product Level") + else: + base_url += "&level=L" + str(level) + + if product is None: + raise ValueError("Please provide a specific MPLNet Product identifer") + else: + base_url += "&product=" + str(product) + + if site is None: + raise ValueError("Please provide a specific MPLNet site") + else: + base_url += "&site=" + str(site) + + if year is None: + raise ValueError( + "Year of desired data download is required to download MPLNET data") + else: + base_url += "&year=" + str(year) + + if month is None: + raise ValueError( + "Month of desired data download is required to download MPLNet data") + else: + base_url += "&month=" + str(month) + + if day: + # Note: Day is not required for the MPLNet download + base_url += "&day=" + str(day) + + # Construct output directory + if outdir: + # Output files to directory specified + output_dir = os.path.join(outdir) + else: + # If no folder given, add MPLNET folder + # to current working dir to prevent file mix-up + output_dir = os.path.join(os.getcwd(), "MPLNET") + + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + + # Make a Request + files = [] + with requests.get(base_url, stream=True) as r: + fname = re.findall("filename=(.+)", + r.headers['Content-Disposition'] + ) + # Check for successful file check + if fname[0][1:-1] == "MPLNET_download_fail.txt": + raise ValueError("Failed MPLNET Download\n" + + " File could not be found for the desired input parameters" + + " for MPLNET Download API" + ) + else: + output_filename = os.path.join(output_dir, fname[0][1:-1]) + print("[DOWNLOADING] ", fname[0][1:-1]) + with open(output_filename, 'wb') as f: + shutil.copyfileobj(r.raw, f) + files.append(output_filename) + + return files + + +def get_mplnet_meta(sites=None, + method=None, + year=None, + month=None, + day=None, + print_to_screen=False): + """ + Returns a list of meta data from the NASA MPL Network Data + https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?metadata + + + Parameters + ---------- + sites : str + How to return MPLNET Site Information + all - produces output on all sites (active and inactive) + active - produces output file containing only active sites + (if year, month, or day are not set then uses today's date) + inactive - produces output file containing only inactive sites + (if year, month, or day are not set then uses today's date) + planned - produces output file containing only planned sites + site_name - produces output file containing only requested site + collection - produces output file containing sites in pre-defined + collections (e.g. field campaigns or regions) + year : str + Four digit Year for desired product download (YYYY). + Note Level 1 and 1.5 products are available for + download the day after automated collection. + Information on the MPLNet naming convention can be found here: + https://mplnet.gsfc.nasa.gov/product-info/mplnet_file_name.htm + month : str + Two digit month for desired product download (MM). + day : str + Two digit desired day for product download (DD). + If day not supplied, will download all data for month supplied + in a zip file. + method : str + Method for returning JSON list of MPLNET GALION format parameters. + station - returns GALION JSON with only station and PI contact info + data - return GALION JSON with data elements, station, date and PI + contact information + print_to_screen : Boolean + If true, print MPLNET site identifiers to screen + """ + # Define the base URL + base_url = "https://mplnet.gsfc.nasa.gov/operations/sites?api&format=galion" + + if sites is None: + raise ValueError( + "Site Parameter is required to download MPLNET Meta Data" + ) + else: + base_url += "&sites=" + str(sites) + + if method: + base_url += "&method=" + str(method) + + if year: + base_url += "&year=" + str(year) + + if month: + base_url += "&month=" + str(month) + + if day: + base_url += "&day=" + str(day) + + with requests.get(base_url, stream=True) as r: + # Convert to JSON + site_request = r.json() + if print_to_screen: + for i in range(len(site_request)): + print(site_request[i]['id']) + + return site_request diff --git a/examples/discovery/download_mplnet.py b/examples/discovery/download_mplnet.py new file mode 100644 index 0000000000..f188e22d5f --- /dev/null +++ b/examples/discovery/download_mplnet.py @@ -0,0 +1,28 @@ +""" +NASA MPLNET +----------- + +This example shows how to download data from +NASA's MicroPulsed Lidar Network + +""" + +import os +import glob + +import act + +# Determine MPLNET site of interest +meta = act.discovery.get_mplnet_meta(sites="active", + method="data", + print_to_screen=True + ) + +# Download MPLNET data for site of interest +output = act.discovery.download_mplnet_data(version=3, + level=2, + product="NRB", + site="GSFC", + year="2022", + month="09", + day="01") diff --git a/tests/discovery/test_nasa.py b/tests/discovery/test_nasa.py new file mode 100644 index 0000000000..790cd35a23 --- /dev/null +++ b/tests/discovery/test_nasa.py @@ -0,0 +1,62 @@ +import numpy as np +import act + + +def test_get_mplnet_meta(): + output = act.discovery.get_mplnet_meta(sites="GSFC", + method="data", + year="2024", + month="09", + day="12") + + assert 'id' in output[0] + assert 'station' in output[0] + assert output[0]['station']['latitude_unit'] == "deg" + + with np.testing.assert_raises(ValueError): + output = act.discovery.get_mplnet_meta() + with np.testing.assert_raises(ValueError): + output = act.discovery.get_mplnet_meta(sites=10) + + +def test_download_mplnet_data(): + output = act.discovery.download_mplnet_data(version=3, + level=1, + product="NRB", + site="GSFC", + year="2020", + month="09", + day="01") + + assert len(output) == 1 + assert output[0][-3:] == "nc4" + + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data() + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3) + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, + level=1) + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, + level=1, + product='NRB') + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, + level=1, + product='NRB', + site="GSFC") + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, + level=1, + product='NRB', + site="GSFC", + year="2020") + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, + level=1, + product='NRB', + site="GSFC", + year="2020", + month="09") From 337a840ab19722dd7a183bb7f58cd7c53a4854b0 Mon Sep 17 00:00:00 2001 From: jrobrien91 <obrienj@anl.gov> Date: Thu, 12 Sep 2024 20:37:22 -0500 Subject: [PATCH 2/4] ENH: Removal of conflicting test --- tests/discovery/test_nasa.py | 51 +++++++++++------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/tests/discovery/test_nasa.py b/tests/discovery/test_nasa.py index 790cd35a23..5a11a7d723 100644 --- a/tests/discovery/test_nasa.py +++ b/tests/discovery/test_nasa.py @@ -3,11 +3,9 @@ def test_get_mplnet_meta(): - output = act.discovery.get_mplnet_meta(sites="GSFC", - method="data", - year="2024", - month="09", - day="12") + output = act.discovery.get_mplnet_meta( + sites="GSFC", method="data", year="2024", month="09", day="12" + ) assert 'id' in output[0] assert 'station' in output[0] @@ -20,13 +18,9 @@ def test_get_mplnet_meta(): def test_download_mplnet_data(): - output = act.discovery.download_mplnet_data(version=3, - level=1, - product="NRB", - site="GSFC", - year="2020", - month="09", - day="01") + output = act.discovery.download_mplnet_data( + version=3, level=1, product="NRB", site="GSFC", year="2020", month="09", day="01" + ) assert len(output) == 1 assert output[0][-3:] == "nc4" @@ -36,27 +30,12 @@ def test_download_mplnet_data(): with np.testing.assert_raises(ValueError): output = act.discovery.download_mplnet_data(version=3) with np.testing.assert_raises(ValueError): - output = act.discovery.download_mplnet_data(version=3, - level=1) - with np.testing.assert_raises(ValueError): - output = act.discovery.download_mplnet_data(version=3, - level=1, - product='NRB') - with np.testing.assert_raises(ValueError): - output = act.discovery.download_mplnet_data(version=3, - level=1, - product='NRB', - site="GSFC") - with np.testing.assert_raises(ValueError): - output = act.discovery.download_mplnet_data(version=3, - level=1, - product='NRB', - site="GSFC", - year="2020") - with np.testing.assert_raises(ValueError): - output = act.discovery.download_mplnet_data(version=3, - level=1, - product='NRB', - site="GSFC", - year="2020", - month="09") + output = act.discovery.download_mplnet_data(version=3, level=1) + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, level=1, product='NRB') + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data(version=3, level=1, product='NRB', site="GSFC") + with np.testing.assert_raises(ValueError): + output = act.discovery.download_mplnet_data( + version=3, level=1, product='NRB', site="GSFC", year="2020" + ) From 8ad3643076ddb29ca81ccc4a72e8b8a72f542702 Mon Sep 17 00:00:00 2001 From: jrobrien91 <obrienj@anl.gov> Date: Fri, 13 Sep 2024 09:12:29 -0500 Subject: [PATCH 3/4] ENH: Fixed linting issues --- act/discovery/__init__.py | 14 ++++- act/discovery/nasa.py | 76 ++++++++++++--------------- examples/discovery/download_mplnet.py | 18 ++----- 3 files changed, 51 insertions(+), 57 deletions(-) diff --git a/act/discovery/__init__.py b/act/discovery/__init__.py index ff45240825..7845872891 100644 --- a/act/discovery/__init__.py +++ b/act/discovery/__init__.py @@ -8,7 +8,17 @@ __getattr__, __dir__, __all__ = lazy.attach( __name__, - submodules=['arm', 'asos', 'airnow', 'cropscape', 'improve', 'noaapsl', 'neon', 'surfrad', 'nasa'], + submodules=[ + 'arm', + 'asos', + 'airnow', + 'cropscape', + 'improve', + 'noaapsl', + 'neon', + 'surfrad', + 'nasa', + ], submod_attrs={ 'arm': ['download_arm_data', 'get_arm_doi'], 'asos': ['get_asos_data'], @@ -18,6 +28,6 @@ 'noaapsl': ['download_noaa_psl_data'], 'neon': ['get_neon_site_products', 'get_neon_product_avail', 'download_neon_data'], 'surfrad': ['download_surfrad_data'], - 'nasa' : ["download_mplnet_data", "get_mplnet_meta"] + 'nasa': ["download_mplnet_data", "get_mplnet_meta"], }, ) diff --git a/act/discovery/nasa.py b/act/discovery/nasa.py index b1d00aa484..70552dbbff 100644 --- a/act/discovery/nasa.py +++ b/act/discovery/nasa.py @@ -9,21 +9,17 @@ import re import shutil -try: - from urllib.request import urlopen -except ImportError: - from urllib import urlopen - - -def download_mplnet_data(version=None, - level=None, - product=None, - site=None, - year=None, - month=None, - day=None, - outdir=None, - ): + +def download_mplnet_data( + version=None, + level=None, + product=None, + site=None, + year=None, + month=None, + day=None, + outdir=None, +): """ Function to download data from the NASA MPL Network Data https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?download @@ -83,12 +79,18 @@ def download_mplnet_data(version=None, print("The MPLNET Data Policy can be found at:\n\thttps://mplnet.gsfc.nasa.gov/data-policy\n") # Generate the data acknowledgement statement, might require site information. - print("Please Include the Following Acknowledgements in Any Publication \nor" + - " presentation of MPLNET data, regardless of co-authorship status:") - print("\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program" + - " \n\tand Earth Observing System.") - print("\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing" + - " \n\tand maintaining sites.\n") + print( + "Please Include the Following Acknowledgements in Any Publication \nor" + + " presentation of MPLNET data, regardless of co-authorship status:" + ) + print( + "\n\tThe MPLNET project is funded by the NASA Radiation Sciences Program" + + " \n\tand Earth Observing System." + ) + print( + "\n\tWe thank the MPLNET (PI) for (its/theirs) effort in establishing" + + " \n\tand maintaining sites.\n" + ) # Define the base URL base_url = "https://mplnet.gsfc.nasa.gov/download?" @@ -115,14 +117,12 @@ def download_mplnet_data(version=None, base_url += "&site=" + str(site) if year is None: - raise ValueError( - "Year of desired data download is required to download MPLNET data") + raise ValueError("Year of desired data download is required to download MPLNET data") else: base_url += "&year=" + str(year) if month is None: - raise ValueError( - "Month of desired data download is required to download MPLNet data") + raise ValueError("Month of desired data download is required to download MPLNet data") else: base_url += "&month=" + str(month) @@ -145,15 +145,14 @@ def download_mplnet_data(version=None, # Make a Request files = [] with requests.get(base_url, stream=True) as r: - fname = re.findall("filename=(.+)", - r.headers['Content-Disposition'] - ) + fname = re.findall("filename=(.+)", r.headers['Content-Disposition']) # Check for successful file check if fname[0][1:-1] == "MPLNET_download_fail.txt": - raise ValueError("Failed MPLNET Download\n" + - " File could not be found for the desired input parameters" + - " for MPLNET Download API" - ) + raise ValueError( + "Failed MPLNET Download\n" + + " File could not be found for the desired input parameters" + + " for MPLNET Download API" + ) else: output_filename = os.path.join(output_dir, fname[0][1:-1]) print("[DOWNLOADING] ", fname[0][1:-1]) @@ -164,12 +163,9 @@ def download_mplnet_data(version=None, return files -def get_mplnet_meta(sites=None, - method=None, - year=None, - month=None, - day=None, - print_to_screen=False): +def get_mplnet_meta( + sites=None, method=None, year=None, month=None, day=None, print_to_screen=False +): """ Returns a list of meta data from the NASA MPL Network Data https://mplnet.gsfc.nasa.gov/mplnet_web_services.cgi?metadata @@ -212,9 +208,7 @@ def get_mplnet_meta(sites=None, base_url = "https://mplnet.gsfc.nasa.gov/operations/sites?api&format=galion" if sites is None: - raise ValueError( - "Site Parameter is required to download MPLNET Meta Data" - ) + raise ValueError("Site Parameter is required to download MPLNET Meta Data") else: base_url += "&sites=" + str(sites) diff --git a/examples/discovery/download_mplnet.py b/examples/discovery/download_mplnet.py index f188e22d5f..7d6c807fa4 100644 --- a/examples/discovery/download_mplnet.py +++ b/examples/discovery/download_mplnet.py @@ -7,22 +7,12 @@ """ -import os -import glob - import act # Determine MPLNET site of interest -meta = act.discovery.get_mplnet_meta(sites="active", - method="data", - print_to_screen=True - ) +meta = act.discovery.get_mplnet_meta(sites="active", method="data", print_to_screen=True) # Download MPLNET data for site of interest -output = act.discovery.download_mplnet_data(version=3, - level=2, - product="NRB", - site="GSFC", - year="2022", - month="09", - day="01") +output = act.discovery.download_mplnet_data( + version=3, level=2, product="NRB", site="GSFC", year="2022", month="09", day="01" +) From d4b8b0b7899b2c9f121a3e5f7e7f0d33e59d30b3 Mon Sep 17 00:00:00 2001 From: jrobrien91 <obrienj@anl.gov> Date: Mon, 16 Sep 2024 10:30:00 -0500 Subject: [PATCH 4/4] ENH: Changed MPLNET example data request processing level --- examples/discovery/download_mplnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/discovery/download_mplnet.py b/examples/discovery/download_mplnet.py index 7d6c807fa4..4d44f0ff14 100644 --- a/examples/discovery/download_mplnet.py +++ b/examples/discovery/download_mplnet.py @@ -14,5 +14,5 @@ # Download MPLNET data for site of interest output = act.discovery.download_mplnet_data( - version=3, level=2, product="NRB", site="GSFC", year="2022", month="09", day="01" + version=3, level=1, product="NRB", site="GSFC", year="2022", month="09", day="01" )