Skip to content

Commit

Permalink
DEV-1185: fix AWG download of related files
Browse files Browse the repository at this point in the history
Previously, requests for file metadata in the AWG environment were
unauthenticated. However, the metadata endpoint in AWG requires proper
authenticated requests. This change allows for requests for file
metadata in the AWG environment to be properly authenticated.
  • Loading branch information
jiakf committed Jan 24, 2024
1 parent b62fd75 commit 46b478a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:
- id: end-of-file-fixer
- id: no-commit-to-branch
args: [--branch, develop, --branch, master, --pattern, release/.*]
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 23.12.1
hooks:
- id: black
- repo: [email protected]:Yelp/detect-secrets
Expand Down
21 changes: 10 additions & 11 deletions gdc_client/download/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@


def validate_args(parser, args):
""" Validate argparse namespace.
"""
"""Validate argparse namespace."""
if not args.file_ids and not args.manifest:
msg = "must specify either --manifest or file_id"
parser.error(msg)
Expand All @@ -42,12 +41,12 @@ def get_client(args, index_client):


def download(parser, args):
""" Downloads data from the GDC.
"""Downloads data from the GDC.
Combine the smaller files (~KB range) into a grouped download.
The API now supports combining UUID's into one uncompressed tarfile
using the ?tarfile url parameter. Combining many smaller files into one
download decreases the number of open connections we have to make
Combine the smaller files (~KB range) into a grouped download.
The API now supports combining UUID's into one uncompressed tarfile
using the ?tarfile url parameter. Combining many smaller files into one
download decreases the number of open connections we have to make
"""
successful_count = 0
unsuccessful_count = 0
Expand Down Expand Up @@ -81,7 +80,9 @@ def download(parser, args):

ids = ids_map.values() if args.latest else ids_map.keys()

index_client = GDCIndexClient(args.server, not args.no_verify)
index_client = GDCIndexClient(
uri=args.server, token=args.token_file, verify=not args.no_verify
)
client = get_client(args, index_client)

# separate the smaller files from the larger files
Expand Down Expand Up @@ -164,7 +165,6 @@ def download(parser, args):


def retry_download(client, url, retry_amount, no_auto_retry, wait_time):

log.debug("Retrying download {0}".format(url))

error = True
Expand Down Expand Up @@ -194,8 +194,7 @@ def retry_download(client, url, retry_amount, no_auto_retry, wait_time):


def config(parser, download_defaults):
""" Configure a parser for download.
"""
"""Configure a parser for download."""
func = partial(download, parser)
download_defaults["func"] = func

Expand Down
8 changes: 5 additions & 3 deletions gdc_client/query/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@


class GDCIndexClient(object):
def __init__(self, uri, verify=True):
def __init__(self, uri, token=None, verify=True):
self.uri = uri
self.active_meta_endpoint = "/v0/files"
self.legacy_meta_endpoint = "/v0/legacy/files"
self.metadata = dict()
self.verify = verify
self.token = token

def get_related_files(self, uuid):
# type: (str) -> list[str]
Expand Down Expand Up @@ -54,7 +55,8 @@ def _get_hits(self, url, metadata_query):
"""
json_response = {}
# using a POST request lets us avoid the MAX URL character length limit
r = requests.post(url, json=metadata_query, verify=self.verify)
headers = {"X-Auth-Token": self.token}
r = requests.post(url, json=metadata_query, headers=headers, verify=self.verify)

if r is None:
return []
Expand Down Expand Up @@ -151,7 +153,7 @@ def _get_metadata(self, uuids):
return self.metadata

def separate_small_files(self, ids, chunk_size):
""" Separate big and small files
"""Separate big and small files
Separate the small files from the larger files in
order to combine them into single grouped downloads. This will reduce
Expand Down

0 comments on commit 46b478a

Please sign in to comment.