From 66a0e766d6dd65e9e2c014fcd20b6d2fa0b6093e Mon Sep 17 00:00:00 2001
From: Hydrogen Service <hmei-hydro@verde.princeton.edu>
Date: Sun, 5 Jan 2025 09:32:04 -0500
Subject: [PATCH] add test_subset_forcing_users.py

---
 .github/workflows/run-performance-tests.yaml |  3 +
 performance/conftest.py                      |  1 +
 performance/test_1pt_1wy.py                  |  3 +-
 performance/test_subset_forcing_users.py     | 69 ++++++++++++++++++++
 scripts/append_artifacts.py                  |  2 +-
 5 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 performance/test_subset_forcing_users.py

diff --git a/.github/workflows/run-performance-tests.yaml b/.github/workflows/run-performance-tests.yaml
index 7388571..b8b7275 100644
--- a/.github/workflows/run-performance-tests.yaml
+++ b/.github/workflows/run-performance-tests.yaml
@@ -51,6 +51,9 @@ jobs:
           pytest -s performance/test_full_3d_pfb.py --wy=2003 --wy_month=${{ github.event.inputs.wy_month}} --cache=hot
           pytest -s performance/test_subset_forcing.py --wy=2007 --cache=cold
           pytest -s performance/test_subset_forcing.py --wy=2007 --cache=hot
+          pytest -s performance/test_subset_forcing_users.py --wy 2009 --cache=cold --users=2
+          pytest -s performance/test_subset_forcing_users.py --wy 2009 --cache=hot --users=2
+          
 
           echo "Show ./artifacts/log_artifact.csv file"
           cat ./artifacts/log_artifact.csv
diff --git a/performance/conftest.py b/performance/conftest.py
index 6d9f726..f1ecb49 100644
--- a/performance/conftest.py
+++ b/performance/conftest.py
@@ -9,3 +9,4 @@ def pytest_addoption(parser):
     parser.addoption("--wy_month", action="store", default="02")
     parser.addoption("--comment", action="store", default="")
     parser.addoption("--cpus", action="store", default="8")
+    parser.addoption("--users", action="store", default="1")
\ No newline at end of file
diff --git a/performance/test_1pt_1wy.py b/performance/test_1pt_1wy.py
index 0153f24..c1aaf14 100644
--- a/performance/test_1pt_1wy.py
+++ b/performance/test_1pt_1wy.py
@@ -76,6 +76,7 @@ def write_log(scenario_name, request, local_remote, duration):
     cache_state = request.config.getoption("--cache")
     wy = request.config.getoption("--wy")
     cpus = request.config.getoption("--cpus")
+    users = request.config.getoption("--users")
     hf_hydrodata_version = importlib.metadata.version("hf_hydrodata")
     subsettools_version = importlib.metadata.version("subsettools")
     comment = request.config.getoption("--comment")
@@ -90,7 +91,7 @@ def write_log(scenario_name, request, local_remote, duration):
     est = pytz.timezone('US/Eastern')
     current_time_est = datetime.datetime.now(est)
     cur_date = current_time_est.strftime("%Y-%m-%d:%H:%M:%S")
-    line = f"{cur_date},{scenario_name},{hf_hydrodata_version},{hydrodata_url},{subsettools_version},{local_remote},{hostname},{cpus},{cache_state},{wy},{comment},{duration}\n"
+    line = f"{cur_date},{scenario_name},{hf_hydrodata_version},{hydrodata_url},{subsettools_version},{local_remote},{hostname},{cpus},{users},{cache_state},{wy},{comment},{duration}\n"
     log_file = f"{log_directory}/log_artifact.csv"
     with open(log_file, "a+") as stream:
         stream.write(line)
diff --git a/performance/test_subset_forcing_users.py b/performance/test_subset_forcing_users.py
new file mode 100644
index 0000000..141014f
--- /dev/null
+++ b/performance/test_subset_forcing_users.py
@@ -0,0 +1,69 @@
+"""
+Performance test for calling subsettools function subset_forcing
+in parallel threads to simulate multiple users download data at the same time.
+"""
+
+# pylint: disable=C0301,W1514
+
+import os
+import time
+import shutil
+import subsettools as st
+import test_1pt_1wy
+import concurrent.futures
+
+def test_scenario(request):
+    """
+    Test the scenario to call subsettools function subset_forcing and log timing.
+    This downloads 248 forcing files (1 month, 8 vars) which are removed after the test executes.
+    The scenario is executed in multiple threads to simulate several people downloading at the same time.
+    """
+
+    local_remote = test_1pt_1wy.register_email_pin("private")
+    wy = request.config.getoption("--wy")
+    wy = int(wy)
+    nthreads = int(request.config.getoption("--users"))
+    t0 = time.time()
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = []
+        pass
+        for index in range(0, nthreads):
+            data_dir = f"./forcing_files_{index}"
+            future = executor.submit(
+                _execute_scenario,
+                data_dir,
+                wy,
+                index
+            )
+            futures.append(future)
+        _ = [future.result() for future in concurrent.futures.as_completed(futures)]
+    t1 = time.time()
+    duration = round(t1 - t0, 2)
+    scenario_name = "subset_forcing_users"
+    test_1pt_1wy.write_log(scenario_name, request, local_remote, duration)
+    for index in range(0, nthreads):
+        data_dir = f"./forcing_files_{index}"
+        assert os.path.exists(f"{data_dir}/CW3E.Press.000001_to_000024.pfb")
+        shutil.rmtree(data_dir)
+        assert not os.path.exists(f"{data_dir}/CW3E.Press.000001_to_000024.pfb")
+
+
+
+def _execute_scenario(data_dir, wy, index):
+    """Execute the scenario to be tested"""
+
+    os.makedirs(data_dir, exist_ok=True)
+    start_month = 10 + index if index <= 2 else index - 2
+    start_year = wy if index <= 2 else wy+1
+    end_month = 10 + index + 1 if index <= 1 else index - 1
+    end_year = wy if index <= 1 else wy+1
+    start_str = f"{start_year}-{start_month:02}-01"
+    end_str = f"{end_year}-{end_month:02}-01"
+    forcing_paths = st.subset_forcing(
+        ij_bounds = (2865, 1143, 2923, 1184),
+        grid="conus2",
+        start = start_str,
+        end=end_str,
+        dataset = "CW3E",
+        write_dir=data_dir)
+    assert len(forcing_paths.keys()) == 8
\ No newline at end of file
diff --git a/scripts/append_artifacts.py b/scripts/append_artifacts.py
index d503bf8..b77b3fb 100644
--- a/scripts/append_artifacts.py
+++ b/scripts/append_artifacts.py
@@ -46,7 +46,7 @@ def append_csv_file(csv_contents, archive_csv_file):
         # Archive file does not exists, create it with a CSV file header
         with open(archive_csv_file, "a+") as fp:
             fp.write(
-                "date,scenario,hf_hydrodata_version,hydrodata_url,subsettools_version,remotelocal,server,cpus,hotcold,wy,comment,duration\n"
+                "date,scenario,hf_hydrodata_version,hydrodata_url,subsettools_version,remotelocal,server,cpus,users,hotcold,wy,comment,duration\n"
             )
 
     added_rows = 0