From 66a0e766d6dd65e9e2c014fcd20b6d2fa0b6093e Mon Sep 17 00:00:00 2001 From: Hydrogen Service Date: Sun, 5 Jan 2025 09:32:04 -0500 Subject: [PATCH] add test_subset_forcing_users.py --- .github/workflows/run-performance-tests.yaml | 3 + performance/conftest.py | 1 + performance/test_1pt_1wy.py | 3 +- performance/test_subset_forcing_users.py | 69 ++++++++++++++++++++ scripts/append_artifacts.py | 2 +- 5 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 performance/test_subset_forcing_users.py diff --git a/.github/workflows/run-performance-tests.yaml b/.github/workflows/run-performance-tests.yaml index 7388571..b8b7275 100644 --- a/.github/workflows/run-performance-tests.yaml +++ b/.github/workflows/run-performance-tests.yaml @@ -51,6 +51,9 @@ jobs: pytest -s performance/test_full_3d_pfb.py --wy=2003 --wy_month=${{ github.event.inputs.wy_month}} --cache=hot pytest -s performance/test_subset_forcing.py --wy=2007 --cache=cold pytest -s performance/test_subset_forcing.py --wy=2007 --cache=hot + pytest -s performance/test_subset_forcing_users.py --wy 2009 --cache=cold --users=2 + pytest -s performance/test_subset_forcing_users.py --wy 2009 --cache=hot --users=2 + echo "Show ./artifacts/log_artifact.csv file" cat ./artifacts/log_artifact.csv diff --git a/performance/conftest.py b/performance/conftest.py index 6d9f726..f1ecb49 100644 --- a/performance/conftest.py +++ b/performance/conftest.py @@ -9,3 +9,4 @@ def pytest_addoption(parser): parser.addoption("--wy_month", action="store", default="02") parser.addoption("--comment", action="store", default="") parser.addoption("--cpus", action="store", default="8") + parser.addoption("--users", action="store", default="1") \ No newline at end of file diff --git a/performance/test_1pt_1wy.py b/performance/test_1pt_1wy.py index 0153f24..c1aaf14 100644 --- a/performance/test_1pt_1wy.py +++ b/performance/test_1pt_1wy.py @@ -76,6 +76,7 @@ def write_log(scenario_name, request, local_remote, duration): cache_state = request.config.getoption("--cache") wy = request.config.getoption("--wy") cpus = request.config.getoption("--cpus") + users = request.config.getoption("--users") hf_hydrodata_version = importlib.metadata.version("hf_hydrodata") subsettools_version = importlib.metadata.version("subsettools") comment = request.config.getoption("--comment") @@ -90,7 +91,7 @@ def write_log(scenario_name, request, local_remote, duration): est = pytz.timezone('US/Eastern') current_time_est = datetime.datetime.now(est) cur_date = current_time_est.strftime("%Y-%m-%d:%H:%M:%S") - line = f"{cur_date},{scenario_name},{hf_hydrodata_version},{hydrodata_url},{subsettools_version},{local_remote},{hostname},{cpus},{cache_state},{wy},{comment},{duration}\n" + line = f"{cur_date},{scenario_name},{hf_hydrodata_version},{hydrodata_url},{subsettools_version},{local_remote},{hostname},{cpus},{users},{cache_state},{wy},{comment},{duration}\n" log_file = f"{log_directory}/log_artifact.csv" with open(log_file, "a+") as stream: stream.write(line) diff --git a/performance/test_subset_forcing_users.py b/performance/test_subset_forcing_users.py new file mode 100644 index 0000000..141014f --- /dev/null +++ b/performance/test_subset_forcing_users.py @@ -0,0 +1,69 @@ +""" +Performance test for calling subsettools function subset_forcing +in parallel threads to simulate multiple users download data at the same time. +""" + +# pylint: disable=C0301,W1514 + +import os +import time +import shutil +import subsettools as st +import test_1pt_1wy +import concurrent.futures + +def test_scenario(request): + """ + Test the scenario to call subsettools function subset_forcing and log timing. + This downloads 248 forcing files (1 month, 8 vars) which are removed after the test executes. + The scenario is executed in multiple threads to simulate several people downloading at the same time. + """ + + local_remote = test_1pt_1wy.register_email_pin("private") + wy = request.config.getoption("--wy") + wy = int(wy) + nthreads = int(request.config.getoption("--users")) + t0 = time.time() + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [] + pass + for index in range(0, nthreads): + data_dir = f"./forcing_files_{index}" + future = executor.submit( + _execute_scenario, + data_dir, + wy, + index + ) + futures.append(future) + _ = [future.result() for future in concurrent.futures.as_completed(futures)] + t1 = time.time() + duration = round(t1 - t0, 2) + scenario_name = "subset_forcing_users" + test_1pt_1wy.write_log(scenario_name, request, local_remote, duration) + for index in range(0, nthreads): + data_dir = f"./forcing_files_{index}" + assert os.path.exists(f"{data_dir}/CW3E.Press.000001_to_000024.pfb") + shutil.rmtree(data_dir) + assert not os.path.exists(f"{data_dir}/CW3E.Press.000001_to_000024.pfb") + + + +def _execute_scenario(data_dir, wy, index): + """Execute the scenario to be tested""" + + os.makedirs(data_dir, exist_ok=True) + start_month = 10 + index if index <= 2 else index - 2 + start_year = wy if index <= 2 else wy+1 + end_month = 10 + index + 1 if index <= 1 else index - 1 + end_year = wy if index <= 1 else wy+1 + start_str = f"{start_year}-{start_month:02}-01" + end_str = f"{end_year}-{end_month:02}-01" + forcing_paths = st.subset_forcing( + ij_bounds = (2865, 1143, 2923, 1184), + grid="conus2", + start = start_str, + end=end_str, + dataset = "CW3E", + write_dir=data_dir) + assert len(forcing_paths.keys()) == 8 \ No newline at end of file diff --git a/scripts/append_artifacts.py b/scripts/append_artifacts.py index d503bf8..b77b3fb 100644 --- a/scripts/append_artifacts.py +++ b/scripts/append_artifacts.py @@ -46,7 +46,7 @@ def append_csv_file(csv_contents, archive_csv_file): # Archive file does not exists, create it with a CSV file header with open(archive_csv_file, "a+") as fp: fp.write( - "date,scenario,hf_hydrodata_version,hydrodata_url,subsettools_version,remotelocal,server,cpus,hotcold,wy,comment,duration\n" + "date,scenario,hf_hydrodata_version,hydrodata_url,subsettools_version,remotelocal,server,cpus,users,hotcold,wy,comment,duration\n" ) added_rows = 0