From 242b347adf7a22ddb1106c81b437849d65fdeb56 Mon Sep 17 00:00:00 2001 From: Nicolas <17946453+troubadour-hell@users.noreply.github.com> Date: Wed, 1 May 2024 18:45:29 +0800 Subject: [PATCH 1/3] add progress bar for downloading --- subscriber/podaac_access.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/subscriber/podaac_access.py b/subscriber/podaac_access.py index a898569..7349c64 100644 --- a/subscriber/podaac_access.py +++ b/subscriber/podaac_access.py @@ -2,6 +2,7 @@ import logging import netrc import re +import tqdm from http.cookiejar import CookieJar import os from os import makedirs @@ -345,26 +346,39 @@ def get_temporal_range(start, end, now): raise ValueError("One of start-date or end-date must be specified.") -def download_file(remote_file, output_path, retries=3): +def download_file(remote_file, output_path, retries=3, progress_bar=False): failed = False for r in range(retries): try: - urlretrieve(remote_file, output_path) + if progress_bar: + with open(output_path, 'wb') as f: + with requests.get(remote_file, stream=True) as r: + r.raise_for_status() + total = int(r.headers.get('content-length', 0)) + tqdm_params = { + 'desc': os.path.split(remote_file)[-1], + 'total': total, + 'miniters': 1, + 'unit': 'B', + 'unit_scale': True, + 'unit_divisor': 1024, + } + with tqdm.tqdm(**tqdm_params) as pb: + for chunk in r.iter_content(chunk_size=8192): + pb.update(len(chunk)) + f.write(chunk) + else: + urlretrieve(remote_file, output_path) except HTTPError as e: if e.code == 503: logging.warning(f'Error downloading {remote_file}. Retrying download.') - # back off on sleep time each error... time.sleep(r) - # range is exclusive, so range(3): 0,1,2 so retries will - # never be >= 3; need to subtract 1 (doh) if r >= retries-1: failed = True else: - #downlaoded fie without 503 break - - if failed: - raise Exception("Could not download file.") + if failed: + raise Exception("Could not download file.") # Retry using random exponential backoff if a 500 error is raised. Maximum 10 attempts. From a3b24bd90971f5b8983e8997434f97ea36aa4ddc Mon Sep 17 00:00:00 2001 From: Nicolas <17946453+troubadour-hell@users.noreply.github.com> Date: Wed, 1 May 2024 18:50:02 +0800 Subject: [PATCH 2/3] add progress bar for downloading --- subscriber/podaac_data_downloader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/subscriber/podaac_data_downloader.py b/subscriber/podaac_data_downloader.py index aff5956..972f801 100755 --- a/subscriber/podaac_data_downloader.py +++ b/subscriber/podaac_data_downloader.py @@ -107,12 +107,15 @@ def create_parser(): help="Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times.", action='append') + parser.add_argument("-progress", dest="show_progress", action="store_true", + help="Flag to show progress bar when downloading.") + parser.add_argument("--version", action="version", version='%(prog)s ' + __version__, help="Display script version information and exit.") # noqa E501 parser.add_argument("--verbose", dest="verbose", action="store_true", help="Verbose mode.") # noqa E501 parser.add_argument("-p", "--provider", dest="provider", default='POCLOUD', help="Specify a provider for collection search. Default is POCLOUD.") # noqa E501 - + parser.add_argument("--limit", dest="limit", default=None, type=int, help="Integer limit for number of granules to download. Useful in testing. Defaults to no limit.") # noqa E501 parser.add_argument("--dry-run", dest="dry_run", action="store_true", help="Search and identify files to download, but do not actually download them.") # noqa E501 @@ -351,7 +354,7 @@ def cmr_downloader(args, token, data_path): skip_cnt += 1 continue - pa.download_file(f,output_path) + pa.download_file(f,output_path, progress_bar=args.show_progress) #urlretrieve(f, output_path) pa.process_file(process_cmd, output_path, args) From 805460a266a5e611c70b248d26925e459e63e082 Mon Sep 17 00:00:00 2001 From: Nicolas <17946453+troubadour-hell@users.noreply.github.com> Date: Wed, 1 May 2024 18:52:48 +0800 Subject: [PATCH 3/3] add progress bar for downloading --- subscriber/podaac_data_subscriber.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/subscriber/podaac_data_subscriber.py b/subscriber/podaac_data_subscriber.py index f7bb7f5..3b93b1a 100755 --- a/subscriber/podaac_data_subscriber.py +++ b/subscriber/podaac_data_subscriber.py @@ -99,6 +99,9 @@ def create_parser(): help="Processing command to run on each downloaded file (e.g., compression). Can be specified multiple times.", action='append') + parser.add_argument("-progress", dest="show_progress", action="store_true", + help="Flag to show progress bar when downloading.") + parser.add_argument("--version", action="version", version='%(prog)s ' + __version__, help="Display script version information and exit.") # noqa E501 parser.add_argument("--verbose", dest="verbose", action="store_true", help="Verbose mode.") # noqa E501 @@ -372,7 +375,7 @@ def cmr_downloader(granules, extensions, args, data_path, file_start_times, ts_s skip_cnt += 1 continue - pa.download_file(f, output_path) + pa.download_file(f, output_path, progress_bar=args.show_progress) pa.process_file(process_cmd, output_path, args) logging.info(str(datetime.now()) + " SUCCESS: " + f) success_cnt = success_cnt + 1