From 46b478a8bfadd85a3d240b33054b9d7a535fa8fb Mon Sep 17 00:00:00 2001
From: jiakf <jkfeng@uchicago.edu>
Date: Wed, 24 Jan 2024 12:49:23 -0600
Subject: [PATCH] DEV-1185: fix AWG download of related files

Previously, requests for file metadata in the AWG environment were
unauthenticated. However, the metadata endpoint in AWG requires proper
authenticated requests. This change allows for requests for file
metadata in the AWG environment to be properly authenticated.
---
 .pre-commit-config.yaml       |  3 ++-
 gdc_client/download/parser.py | 21 ++++++++++-----------
 gdc_client/query/index.py     |  8 +++++---
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 330d0325..a9e5a47c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,4 @@
+repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v2.5.0
   hooks:
@@ -5,7 +6,7 @@
     - id: no-commit-to-branch
       args: [--branch, develop, --branch, master, --pattern, release/.*]
 - repo: https://github.com/psf/black
-  rev: 19.10b0
+  rev: 23.12.1
   hooks:
     - id: black
 -   repo: git@github.com:Yelp/detect-secrets
diff --git a/gdc_client/download/parser.py b/gdc_client/download/parser.py
index 50c4a42d..15987b61 100644
--- a/gdc_client/download/parser.py
+++ b/gdc_client/download/parser.py
@@ -14,8 +14,7 @@
 
 
 def validate_args(parser, args):
-    """ Validate argparse namespace.
-    """
+    """Validate argparse namespace."""
     if not args.file_ids and not args.manifest:
         msg = "must specify either --manifest or file_id"
         parser.error(msg)
@@ -42,12 +41,12 @@ def get_client(args, index_client):
 
 
 def download(parser, args):
-    """ Downloads data from the GDC.
+    """Downloads data from the GDC.
 
-        Combine the smaller files (~KB range) into a grouped download.
-        The API now supports combining UUID's into one uncompressed tarfile
-        using the ?tarfile url parameter. Combining many smaller files into one
-        download decreases the number of open connections we have to make
+    Combine the smaller files (~KB range) into a grouped download.
+    The API now supports combining UUID's into one uncompressed tarfile
+    using the ?tarfile url parameter. Combining many smaller files into one
+    download decreases the number of open connections we have to make
     """
     successful_count = 0
     unsuccessful_count = 0
@@ -81,7 +80,9 @@ def download(parser, args):
 
     ids = ids_map.values() if args.latest else ids_map.keys()
 
-    index_client = GDCIndexClient(args.server, not args.no_verify)
+    index_client = GDCIndexClient(
+        uri=args.server, token=args.token_file, verify=not args.no_verify
+    )
     client = get_client(args, index_client)
 
     # separate the smaller files from the larger files
@@ -164,7 +165,6 @@ def download(parser, args):
 
 
 def retry_download(client, url, retry_amount, no_auto_retry, wait_time):
-
     log.debug("Retrying download {0}".format(url))
 
     error = True
@@ -194,8 +194,7 @@ def retry_download(client, url, retry_amount, no_auto_retry, wait_time):
 
 
 def config(parser, download_defaults):
-    """ Configure a parser for download.
-    """
+    """Configure a parser for download."""
     func = partial(download, parser)
     download_defaults["func"] = func
 
diff --git a/gdc_client/query/index.py b/gdc_client/query/index.py
index a090017f..dc6983a0 100644
--- a/gdc_client/query/index.py
+++ b/gdc_client/query/index.py
@@ -8,12 +8,13 @@
 
 
 class GDCIndexClient(object):
-    def __init__(self, uri, verify=True):
+    def __init__(self, uri, token=None, verify=True):
         self.uri = uri
         self.active_meta_endpoint = "/v0/files"
         self.legacy_meta_endpoint = "/v0/legacy/files"
         self.metadata = dict()
         self.verify = verify
+        self.token = token
 
     def get_related_files(self, uuid):
         # type: (str) -> list[str]
@@ -54,7 +55,8 @@ def _get_hits(self, url, metadata_query):
         """
         json_response = {}
         # using a POST request lets us avoid the MAX URL character length limit
-        r = requests.post(url, json=metadata_query, verify=self.verify)
+        headers = {"X-Auth-Token": self.token}
+        r = requests.post(url, json=metadata_query, headers=headers, verify=self.verify)
 
         if r is None:
             return []
@@ -151,7 +153,7 @@ def _get_metadata(self, uuids):
         return self.metadata
 
     def separate_small_files(self, ids, chunk_size):
-        """ Separate big and small files
+        """Separate big and small files
 
         Separate the small files from the larger files in
         order to combine them into single grouped downloads. This will reduce