From 35290e521475a39b0e02275dfb90a3251c3e0922 Mon Sep 17 00:00:00 2001 From: Lizhen You Date: Mon, 23 Dec 2024 23:40:18 -0800 Subject: [PATCH 1/2] Add User-Agent header to download_dataset function Fix the HTTP Error 403 by using urlretrieve() Signed-off-by: Lizhen You --- python/cuvs_bench/cuvs_bench/get_dataset/__main__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py index a6b154ef2..f26a24a40 100644 --- a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py +++ b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py @@ -17,7 +17,7 @@ import os import subprocess import sys -from urllib.request import urlretrieve +import urllib.request def get_dataset_path(name, ann_bench_data_path): @@ -29,7 +29,10 @@ def get_dataset_path(name, ann_bench_data_path): def download_dataset(url, path): if not os.path.exists(path): print(f"downloading {url} -> {path}...") - urlretrieve(url, path) + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urllib.request.urlopen(req) as response, open(path, 'wb') as out_file: + data = response.read() + out_file.write(data) def convert_hdf5_to_fbin(path, normalize): From 6f8ba11716e4ab170291ab47cded4f9db9e0b79a Mon Sep 17 00:00:00 2001 From: Lizhen You Date: Sat, 28 Dec 2024 03:29:34 -0800 Subject: [PATCH 2/2] Only load algos configuration files with .yaml suffix Fixing the following warning during benchmark run ./cuvs_benchmarks/lib/python3.12/site-packages/cuvs_bench/run/run.py:186: UserWarning: Could not load YAML config ./lib/python3.12/site-packages/cuvs_bench/run/../config/algos/__pycache__ due to [Errno 21] Is a directory: ' warnings.warn(f"Could not load YAML config {algo_f} due to {e}") Signed-off-by: Lizhen You --- python/cuvs_bench/cuvs_bench/run/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuvs_bench/cuvs_bench/run/run.py b/python/cuvs_bench/cuvs_bench/run/run.py index 0159d2c19..b7813f1c6 100644 --- a/python/cuvs_bench/cuvs_bench/run/run.py +++ b/python/cuvs_bench/cuvs_bench/run/run.py @@ -140,7 +140,7 @@ def gather_algorithm_configs( algos_conf_fs = [ os.path.join(scripts_path, "../config", "algos", f) for f in algos_conf_fs - if ".json" not in f and "constraint" not in f and ".py" not in f + if f.endswith(".yaml") ] if configuration: