From 46b478a8bfadd85a3d240b33054b9d7a535fa8fb Mon Sep 17 00:00:00 2001 From: jiakf Date: Wed, 24 Jan 2024 12:49:23 -0600 Subject: [PATCH] DEV-1185: fix AWG download of related files Previously, requests for file metadata in the AWG environment were unauthenticated. However, the metadata endpoint in AWG requires proper authenticated requests. This change allows for requests for file metadata in the AWG environment to be properly authenticated. --- .pre-commit-config.yaml | 3 ++- gdc_client/download/parser.py | 21 ++++++++++----------- gdc_client/query/index.py | 8 +++++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 330d0325..a9e5a47c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,4 @@ +repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.5.0 hooks: @@ -5,7 +6,7 @@ - id: no-commit-to-branch args: [--branch, develop, --branch, master, --pattern, release/.*] - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 23.12.1 hooks: - id: black - repo: git@github.com:Yelp/detect-secrets diff --git a/gdc_client/download/parser.py b/gdc_client/download/parser.py index 50c4a42d..15987b61 100644 --- a/gdc_client/download/parser.py +++ b/gdc_client/download/parser.py @@ -14,8 +14,7 @@ def validate_args(parser, args): - """ Validate argparse namespace. - """ + """Validate argparse namespace.""" if not args.file_ids and not args.manifest: msg = "must specify either --manifest or file_id" parser.error(msg) @@ -42,12 +41,12 @@ def get_client(args, index_client): def download(parser, args): - """ Downloads data from the GDC. + """Downloads data from the GDC. - Combine the smaller files (~KB range) into a grouped download. - The API now supports combining UUID's into one uncompressed tarfile - using the ?tarfile url parameter. Combining many smaller files into one - download decreases the number of open connections we have to make + Combine the smaller files (~KB range) into a grouped download. + The API now supports combining UUID's into one uncompressed tarfile + using the ?tarfile url parameter. Combining many smaller files into one + download decreases the number of open connections we have to make """ successful_count = 0 unsuccessful_count = 0 @@ -81,7 +80,9 @@ def download(parser, args): ids = ids_map.values() if args.latest else ids_map.keys() - index_client = GDCIndexClient(args.server, not args.no_verify) + index_client = GDCIndexClient( + uri=args.server, token=args.token_file, verify=not args.no_verify + ) client = get_client(args, index_client) # separate the smaller files from the larger files @@ -164,7 +165,6 @@ def download(parser, args): def retry_download(client, url, retry_amount, no_auto_retry, wait_time): - log.debug("Retrying download {0}".format(url)) error = True @@ -194,8 +194,7 @@ def retry_download(client, url, retry_amount, no_auto_retry, wait_time): def config(parser, download_defaults): - """ Configure a parser for download. - """ + """Configure a parser for download.""" func = partial(download, parser) download_defaults["func"] = func diff --git a/gdc_client/query/index.py b/gdc_client/query/index.py index a090017f..dc6983a0 100644 --- a/gdc_client/query/index.py +++ b/gdc_client/query/index.py @@ -8,12 +8,13 @@ class GDCIndexClient(object): - def __init__(self, uri, verify=True): + def __init__(self, uri, token=None, verify=True): self.uri = uri self.active_meta_endpoint = "/v0/files" self.legacy_meta_endpoint = "/v0/legacy/files" self.metadata = dict() self.verify = verify + self.token = token def get_related_files(self, uuid): # type: (str) -> list[str] @@ -54,7 +55,8 @@ def _get_hits(self, url, metadata_query): """ json_response = {} # using a POST request lets us avoid the MAX URL character length limit - r = requests.post(url, json=metadata_query, verify=self.verify) + headers = {"X-Auth-Token": self.token} + r = requests.post(url, json=metadata_query, headers=headers, verify=self.verify) if r is None: return [] @@ -151,7 +153,7 @@ def _get_metadata(self, uuids): return self.metadata def separate_small_files(self, ids, chunk_size): - """ Separate big and small files + """Separate big and small files Separate the small files from the larger files in order to combine them into single grouped downloads. This will reduce