ARM-DOE · zssherman · Mar 28, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
@@ -60,6 +60,7 @@
             'EXAMPLE_ENA_MET',
             'EXAMPLE_CCN',
             'EXAMPLE_OLD_QC',
+            'EXAMPLE_AOSACSM',
         ]
     },
 )
@@ -66,6 +66,7 @@
 EXAMPLE_SONDE_WILDCARD = DATASETS.fetch('sgpsondewnpnC1.b1.20190101.053200.cdf')
 EXAMPLE_CEIL_WILDCARD = DATASETS.fetch('sgpceilC1.b1.20190101.000000.nc')
 EXAMPLE_HYSPLIT = DATASETS.fetch('houstonaug300.0summer2010080100')
+EXAMPLE_AOSACSM = DATASETS.fetch('sgpaosacsmE13.b2.20230420.000109.nc')
 
 # Multiple files in a list
 dlppi_multi_list = ['sgpdlppiC1.b1.20191015.120023.cdf', 'sgpdlppiC1.b1.20191015.121506.cdf']

@@ -32,6 +32,7 @@
             'convert_to_potential_temp',
             'arm_site_location_search',
             'DatastreamParserARM',
+            'calculate_percentages',
         ],
         'datetime_utils': [
             'dates_between',

@@ -1331,3 +1331,76 @@ def arm_site_location_search(site_code='sgp', facility_code=None):
         coord_dict.setdefault(site + ' ' + facility, coords)
 
     return coord_dict
+
+
+def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None, fill_value=0.0):
+    """
+    This function calculates percentages of different fields of a dataset.
+
+    Parameters
+    ----------
+    ds : ACT Dataset
+        The ACT dataset to calculate the percentages on.
+    fields : list
+        A list of all the fields to use in the percentage calculations.
+    time : datetime
+        A single datetime to calculate percentages on if desired. Default
+        is None and all data will be included.
+    time_slice : tuple
+        A tuple of two datetimes to grab all data between those two datatimes.
+        Default is None and all data will be included.
+    threshold : float
+        Threshold in which anything below will be considered invalid.
+        Default is None.
+    fill_value : float
+        Fill value for invalid data. Only used if a threshold is provided.
+
+    Returns
+    -------
+    percentages : dict
+        A dictionary containing the fields provided and their corresponding
+        percentage that was calculated.
+
+    """
+    # Copy Dataset so we are not overriding the data.
+    ds_percent = ds.copy()
+
+    # Check if any incorrect values based on a threshold and replace with a fill
+    # value.
+    if threshold is not None:
+        for field in fields:
+            ds_percent[field] = ds_percent[field].where(ds_percent[field] > threshold, fill_value)
+
+    # Raise warning if negative values present in a field.
+    if threshold is None:
+        for field in fields:
+            res = np.all(ds_percent[field].values >= 0.0)
+            if not res:
+                warnings.warn(
+                    f"{field} contains negatives values, consider using a threshold.",
+                    UserWarning,
+                )
+
+    # Select the data based on time, multiple times within a slice, or
+    # a sample of times per a timestep.
+    if time is not None:
+        ds_percent = ds_percent.sel(time=time)
+    elif time_slice is not None:
+        ds_percent = ds_percent.sel(time=slice(time_slice[0], time_slice[1]))
+    else:
+        warnings.warn(
+            "No time parameter used, calculating a mean for each field for the whole dataset.",
+            UserWarning,
+        )
+
+    # Calculate concentration percentage of each field in the air.
+    values = [ds_percent[field].mean(skipna=True).values for field in fields]
+    total = sum(values)
+    percent_values = [(value / total) * 100 for value in values]
+
+    # Create a dictionary of the fields and their percentages.
+    percentages = {}
+    for i, j in zip(fields, percent_values):
+        percentages[i] = j
+    ds_percent.close()
+    return percentages
@@ -12,7 +12,7 @@
     from moviepy.video.io.VideoFileClip import VideoFileClip
 
     MOVIEPY_AVAILABLE = True
-except ImportError:
+except (ImportError, RuntimeError):
     MOVIEPY_AVAILABLE = False
 
 

@@ -26,7 +26,6 @@ dependencies:
   - shapely
   - lazy_loader
   - cmweather
-  - arm-test-data
   - moviepy
   - ruff
   - pip
@@ -37,3 +36,4 @@ dependencies:
     - arm_pyart
     - icartt
     - aiohttp>=3.9.0b1
+    - git+https://github.com/ARM-DOE/arm-test-data.git
@@ -18,7 +18,6 @@ dependencies:
   - scikit-posthocs
   - pip
   - shapely<1.8.3
-  - arm-test-data
   - moviepy
   - pip:
     - mpl2nc
@@ -40,3 +39,4 @@ dependencies:
     - sphinxcontrib-htmlhelp==2.0.4
     - sphinxcontrib-qthelp==1.0.6
     - sphinxcontrib-serializinghtml==1.1.9
+    - git+https://github.com/ARM-DOE/arm-test-data.git
@@ -0,0 +1,33 @@
+"""
+Calculate and plot aerosol percentages.
+----------------------------------------
+
+Example on how to plot a Pie Chart of the composition of aerosols in a volume of air by
+obtaining percentages of each aerosol from the dataset.
+
+Author: Zach Sherman
+"""
+
+from arm_test_data import DATASETS
+import matplotlib.pyplot as plt
+
+import act
+
+# Read in the data.
+ds = act.io.read_arm_netcdf(DATASETS.fetch("sgpaosacsmE13.b2.20230420.000109.nc"))
+
+# Calculate percentages using selected fields.
+fields = ['sulfate', 'ammonium', 'nitrate', 'chloride']
+time_slice = ('2023-04-20T17:38:20.000000000', '2023-04-20T20:29:47.000000000')
+threshold = 0.0
+percentages = act.utils.calculate_percentages(ds, fields, time_slice=time_slice, threshold=0.0)
+
+# Get values for the pie chart.
+labels = percentages.keys()
+sizes = [percentages[i] for i in percentages.keys()]
+
+# Plot the figure
+fig, ax = plt.subplots()
+ax.pie(sizes, labels=labels, autopct='%1.1f%%')
+plt.show()
+ds.close()
@@ -520,3 +520,40 @@ def test_arm_site_location_search():
     assert list(test_dict_nsa)[0] == 'nsa C1'
     assert test_dict_nsa[list(test_dict_nsa)[0]]['latitude'] == 71.323
     assert test_dict_nsa[list(test_dict_nsa)[0]]['longitude'] == -156.615
+
+
+def test_calculate_percentages():
+    ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AOSACSM)
+    fields = ['sulfate', 'ammonium', 'nitrate', 'chloride']
+    time = '2023-04-20T03:49:45.000000000'
+    time_slice = ('2023-04-20T17:38:20.000000000', '2023-04-20T20:29:47.000000000')
+    threshold = 0.0
+
+    # Without threshold, chloride has invalid negative values so
+    # percentages will be incorrect. Check if warning is created
+    with pytest.warns(UserWarning) as record:
+        act.utils.calculate_percentages(
+            ds, fields, time='2023-04-20T03:49:45.000000000', threshold=None
+        )
+        if not record:
+            pytest.fail("Expected a warning for invalid data.")
+
+    # Test with threshold and singular time
+    percentages = act.utils.calculate_percentages(ds, fields, time=time, threshold=threshold)
+    assert 'sulfate' in percentages.keys()
+    assert 'chloride' in percentages.keys()
+    assert np.round(percentages["sulfate"], 3) == 66.125
+    assert np.round(percentages["chloride"], 3) == 0.539
+
+    # Test with sliced time
+    percentages = act.utils.calculate_percentages(ds, fields, time_slice=time_slice, threshold=0.0)
+    assert np.round(percentages["sulfate"], 3) == 68.342
+    assert np.round(percentages["chloride"], 3) == 1.042
+
+    # Run on all times and check if warning exists.
+    with pytest.warns(UserWarning) as record:
+        percentages = act.utils.calculate_percentages(ds, fields, threshold=0.0)
+        assert np.round(percentages["sulfate"], 3) == 66.373
+        assert np.round(percentages["chloride"], 3) == 0.915
+        if not record:
+            pytest.fail("Expected a warning for using all times.")
@@ -15,7 +15,7 @@
     import moviepy.video.io.ImageSequenceClip  # noqa
 
     MOVIEPY_AVAILABLE = True
-except ImportError:
+except (ImportError, RuntimeError):
     MOVIEPY_AVAILABLE = False