Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADD: calculate_percentage function. #811

Merged
merged 9 commits into from
Mar 28, 2024
1 change: 1 addition & 0 deletions act/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
'EXAMPLE_ENA_MET',
'EXAMPLE_CCN',
'EXAMPLE_OLD_QC',
'EXAMPLE_AOSACSM',
]
},
)
1 change: 1 addition & 0 deletions act/tests/sample_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
EXAMPLE_SONDE_WILDCARD = DATASETS.fetch('sgpsondewnpnC1.b1.20190101.053200.cdf')
EXAMPLE_CEIL_WILDCARD = DATASETS.fetch('sgpceilC1.b1.20190101.000000.nc')
EXAMPLE_HYSPLIT = DATASETS.fetch('houstonaug300.0summer2010080100')
EXAMPLE_AOSACSM = DATASETS.fetch('sgpaosacsmE13.b2.20230420.000109.nc')

# Multiple files in a list
dlppi_multi_list = ['sgpdlppiC1.b1.20191015.120023.cdf', 'sgpdlppiC1.b1.20191015.121506.cdf']
Expand Down
1 change: 1 addition & 0 deletions act/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
'convert_to_potential_temp',
'arm_site_location_search',
'DatastreamParserARM',
'calculate_percentages',
],
'datetime_utils': [
'dates_between',
Expand Down
73 changes: 73 additions & 0 deletions act/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1331,3 +1331,76 @@ def arm_site_location_search(site_code='sgp', facility_code=None):
coord_dict.setdefault(site + ' ' + facility, coords)

return coord_dict


def calculate_percentages(ds, fields, time=None, time_slice=None, threshold=None, fill_value=0.0):
"""
This function calculates percentages of different fields of a dataset.

Parameters
----------
ds : ACT Dataset
The ACT dataset to calculate the percentages on.
fields : list
A list of all the fields to use in the percentage calculations.
time : datetime
A single datetime to calculate percentages on if desired. Default
is None and all data will be included.
time_slice : tuple
A tuple of two datetimes to grab all data between those two datatimes.
Default is None and all data will be included.
threshold : float
Threshold in which anything below will be considered invalid.
Default is None.
fill_value : float
Fill value for invalid data. Only used if a threshold is provided.

Returns
-------
percentages : dict
A dictionary containing the fields provided and their corresponding
percentage that was calculated.

"""
# Copy Dataset so we are not overriding the data.
ds_percent = ds.copy()

# Check if any incorrect values based on a threshold and replace with a fill
# value.
if threshold is not None:
for field in fields:
ds_percent[field] = ds_percent[field].where(ds_percent[field] > threshold, fill_value)

# Raise warning if negative values present in a field.
if threshold is None:
for field in fields:
res = np.all(ds_percent[field].values >= 0.0)
if not res:
warnings.warn(
f"{field} contains negatives values, consider using a threshold.",
UserWarning,
)

# Select the data based on time, multiple times within a slice, or
# a sample of times per a timestep.
if time is not None:
ds_percent = ds_percent.sel(time=time)
elif time_slice is not None:
ds_percent = ds_percent.sel(time=slice(time_slice[0], time_slice[1]))
else:
warnings.warn(
"No time parameter used, calculating a mean for each field for the whole dataset.",
UserWarning,
)

# Calculate concentration percentage of each field in the air.
values = [ds_percent[field].mean(skipna=True).values for field in fields]
total = sum(values)
percent_values = [(value / total) * 100 for value in values]

# Create a dictionary of the fields and their percentages.
percentages = {}
for i, j in zip(fields, percent_values):
percentages[i] = j
ds_percent.close()
return percentages
2 changes: 1 addition & 1 deletion act/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from moviepy.video.io.VideoFileClip import VideoFileClip

MOVIEPY_AVAILABLE = True
except ImportError:
except (ImportError, RuntimeError):
MOVIEPY_AVAILABLE = False


Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/environment_actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ dependencies:
- shapely
- lazy_loader
- cmweather
- arm-test-data
- moviepy
- ruff
- pip
Expand All @@ -37,3 +36,4 @@ dependencies:
- arm_pyart
- icartt
- aiohttp>=3.9.0b1
- git+https://github.com/ARM-DOE/arm-test-data.git
2 changes: 1 addition & 1 deletion docs/environment_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ dependencies:
- scikit-posthocs
- pip
- shapely<1.8.3
- arm-test-data
- moviepy
- pip:
- mpl2nc
Expand All @@ -40,3 +39,4 @@ dependencies:
- sphinxcontrib-htmlhelp==2.0.4
- sphinxcontrib-qthelp==1.0.6
- sphinxcontrib-serializinghtml==1.1.9
- git+https://github.com/ARM-DOE/arm-test-data.git
37 changes: 37 additions & 0 deletions tests/utils/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,3 +520,40 @@ def test_arm_site_location_search():
assert list(test_dict_nsa)[0] == 'nsa C1'
assert test_dict_nsa[list(test_dict_nsa)[0]]['latitude'] == 71.323
assert test_dict_nsa[list(test_dict_nsa)[0]]['longitude'] == -156.615


def test_calculate_percentages():
ds = act.io.arm.read_arm_netcdf(act.tests.sample_files.EXAMPLE_AOSACSM)
fields = ['sulfate', 'ammonium', 'nitrate', 'chloride']
time = '2023-04-20T03:49:45.000000000'
time_slice = ('2023-04-20T17:38:20.000000000', '2023-04-20T20:29:47.000000000')
threshold = 0.0

# Without threshold, chloride has invalid negative values so
# percentages will be incorrect. Check if warning is created
with pytest.warns(UserWarning) as record:
act.utils.calculate_percentages(
ds, fields, time='2023-04-20T03:49:45.000000000', threshold=None
)
if not record:
pytest.fail("Expected a warning for invalid data.")

# Test with threshold and singular time
percentages = act.utils.calculate_percentages(ds, fields, time=time, threshold=threshold)
assert 'sulfate' in percentages.keys()
assert 'chloride' in percentages.keys()
assert np.round(percentages["sulfate"], 3) == 66.125
assert np.round(percentages["chloride"], 3) == 0.539

# Test with sliced time
percentages = act.utils.calculate_percentages(ds, fields, time_slice=time_slice, threshold=0.0)
assert np.round(percentages["sulfate"], 3) == 68.342
assert np.round(percentages["chloride"], 3) == 1.042

# Run on all times and check if warning exists.
with pytest.warns(UserWarning) as record:
percentages = act.utils.calculate_percentages(ds, fields, threshold=0.0)
assert np.round(percentages["sulfate"], 3) == 66.373
assert np.round(percentages["chloride"], 3) == 0.915
if not record:
pytest.fail("Expected a warning for using all times.")
2 changes: 1 addition & 1 deletion tests/utils/test_io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import moviepy.video.io.ImageSequenceClip # noqa

MOVIEPY_AVAILABLE = True
except ImportError:
except (ImportError, RuntimeError):
MOVIEPY_AVAILABLE = False


Expand Down
Loading