diff --git a/README.md b/README.md index 4505510..e235033 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ OPTIONAL: log messages to use a text string format. By default log messages will be formatted as JSON. * `MAX_DOWNLOAD_RETRIES`: Number of times to retry HTTP download calls that fail due to transient errors. +* `POST_URL_LENGTH`: Minimum url length that will be submitted via POST request. OPTIONAL -- Use with CAUTION: diff --git a/harmony_service_lib/http.py b/harmony_service_lib/http.py index 16eea29..16bbd54 100644 --- a/harmony_service_lib/http.py +++ b/harmony_service_lib/http.py @@ -217,12 +217,18 @@ def _download( tries = 0 retry = True response = None + download_url = None while retry is True: retry = False tries += 1 try: session = _earthdata_session() session.auth = auth + if data is None and len(url) > config.post_url_length: + parsed_url = urlparse(url) + data = parsed_url.query + download_url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}" + if data is None: response = session.get(url, headers=headers, timeout=TIMEOUT, **kwargs_download_agent) if response.ok: @@ -234,7 +240,12 @@ def _download( # Including this header since the stdlib does by default, # but we've switched to `requests` which does not. headers['Content-Type'] = 'application/x-www-form-urlencoded' - response = session.post(url, headers=headers, data=data, timeout=TIMEOUT, **kwargs_download_agent) + response = session.post( + download_url if download_url is not None else url, + headers=headers, + data=data, + timeout=TIMEOUT, + **kwargs_download_agent) if response.ok: return response else: diff --git a/harmony_service_lib/util.py b/harmony_service_lib/util.py index f1d145e..9646bf1 100644 --- a/harmony_service_lib/util.py +++ b/harmony_service_lib/util.py @@ -46,6 +46,7 @@ ENV: The application environment. One of: dev, test. Used for local development. TEXT_LOGGER: Whether to log in plaintext or JSON. Default: True (plaintext). MAX_DOWNLOAD_RETRIES: Number of times to retry HTTP download calls that fail due to transient errors. + POST_URL_LENGTH: Minimum url length that will be submitted via POST request. """ from base64 import b64decode @@ -96,7 +97,8 @@ 'text_logger', 'shared_secret_key', 'user_agent', - 'max_download_retries' + 'max_download_retries', + 'post_url_length' ]) @@ -192,7 +194,8 @@ def int_envvar(name: str, default: int) -> int: text_logger=bool_envvar('TEXT_LOGGER', False), shared_secret_key=str_envvar('SHARED_SECRET_KEY', DEFAULT_SHARED_SECRET_KEY), user_agent=str_envvar('USER_AGENT', 'harmony (unknown version)'), - max_download_retries=int_envvar('MAX_DOWNLOAD_RETRIES', 0) + max_download_retries=int_envvar('MAX_DOWNLOAD_RETRIES', 0), + post_url_length=int_envvar('POST_URL_LENGTH', 2000) ) if validate: diff --git a/tests/test_util.py b/tests/test_util.py index dc3261b..3cd13ef 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -124,6 +124,24 @@ def test_https_download_with_post_sets_api_request_uuid(self, post, get_version) post.assert_called_with('https://example/file.txt?A-api-request-uuid=abc123', headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)', 'Content-Type': 'application/x-www-form-urlencoded'}, data = { 'foo': 'bar' }, timeout=60, stream=True) + @patch('harmony_service_lib.util.get_version') + @patch.object(Session, 'post') + def test_http_download_with_long_url_get_becomes_post(self, post, get_version): + request_context['request_id'] = 'abc123' + app_name = 'gdal-subsetter' + fake_lib_version = '0.1.0' + get_version.return_value = fake_lib_version + # set post_url_length to 300 and download with url longer than 300, the download will be done with POST + cfg = config_fixture(app_name=app_name,post_url_length=300) + with patch('builtins.open', mock_open()): + util.download('https://opendap.uat.earthdata.nasa.gov/collections/C1245618475-EEDTEST/granules/GPM_3IMERGHH.06:3B-HHR.MS.MRG.3IMERG.20200118-S233000-E235959.1410.V06B.HDF5.dap.nc4?dap4.ce=%2FGrid%2Ftime%3B%2FGrid%2Flon%3B%2FGrid%2Flat_bnds%3B%2FGrid%2Ftime_bnds%3B%2FGrid%2Flon_bnds%3B%2FGrid%2Flat', + 'tmp', + access_token='', + cfg=cfg) + post.assert_called_with('https://opendap.uat.earthdata.nasa.gov/collections/C1245618475-EEDTEST/granules/GPM_3IMERGHH.06:3B-HHR.MS.MRG.3IMERG.20200118-S233000-E235959.1410.V06B.HDF5.dap.nc4', + headers={'user-agent': f'harmony (unknown version) harmony-service-lib/{fake_lib_version} (gdal-subsetter)', 'Content-Type': 'application/x-www-form-urlencoded'}, data = 'dap4.ce=%2FGrid%2Ftime%3B%2FGrid%2Flon%3B%2FGrid%2Flat_bnds%3B%2FGrid%2Ftime_bnds%3B%2FGrid%2Flon_bnds%3B%2FGrid%2Flat&A-api-request-uuid=abc123', timeout=60, stream=True) + + class TestStage(unittest.TestCase): def setUp(self): self.config = util.config(validate=False) diff --git a/tests/util.py b/tests/util.py index d978d99..da54958 100644 --- a/tests/util.py +++ b/tests/util.py @@ -45,7 +45,8 @@ def config_fixture(fallback_authn_enabled=False, user_agent=None, app_name=None, text_logger=False, - max_download_retries=5): + max_download_retries=5, + post_url_length=2000): c = util.config(validate=False) return util.Config( # Override @@ -59,6 +60,7 @@ def config_fixture(fallback_authn_enabled=False, app_name=app_name, text_logger=text_logger, max_download_retries=max_download_retries, + post_url_length=post_url_length, # Default env=c.env, oauth_host=c.oauth_host,