Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HARMONY-1938: Remove fallback authentication functionality and related code #54

Merged
merged 2 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,6 @@ REQUIRED:
`STAGING_BUCKET` under which data will be staged
* `ENV`: The name of the environment. If 'dev' or 'test', callbacks to Harmony are
not made and data is not staged unless also using localstack
* `OAUTH_UID`, `OAUTH_PASSWORD`: Used to acquire a shared EDL token
needed for downloading granules from EDL token-aware data
sources. Services using data in S3 do not need to set this.

NOTE: If `FALLBACK_AUTHN_ENABLED` is set to True (CAUTION!)
these credentials will be used to download data *as* the EDL
application user. This may cause problems with metrics and can
result in users getting data for which they've not approved a
EULA.
* `OAUTH_CLIENT_ID`: The Earthdata application client ID.
* `OAUTH_HOST`: Set to the correct Earthdata Login URL, depending on
where the service is being deployed. This should be the same
environment where the `OAUTH_*` credentials are valid. Defaults
to UAT.
* `OAUTH_REDIRECT_URI`: A valid redirect URI for the EDL application.
* `SHARED_SECRET_KEY`: The 32-byte encryption key shared between Harmony and backend services.
This is used to encrypt & decrypt the `accessToken` in the Harmony operation message.
In a production environment, this should be injected into the container running the service
Expand All @@ -92,13 +77,6 @@ OPTIONAL:
* `MAX_DOWNLOAD_RETRIES`: Number of times to retry HTTP download calls that fail due to transient errors.
* `POST_URL_LENGTH`: Minimum url length that will be submitted via POST request.

OPTIONAL -- Use with CAUTION:

* `FALLBACK_AUTHN_ENABLED`: Default: False. Enable the fallback authentication that
uses the EDL application credentials. See CAUTION note above.
* `EDL_USERNAME`: The Earthdata Login username used for fallback authn.
* `EDL_PASSWORD`: The Earthdata Login password used for fallback authn.

## Development Setup

Prerequisites:
Expand Down
49 changes: 6 additions & 43 deletions harmony_service_lib/earthdata.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
from base64 import b64encode
import re
from urllib.parse import urlparse

from requests.auth import AuthBase
from requests import Session

EDL_URL_PATTERN = r""".*urs\.earthdata\.nasa\.gov$"""


def _edl_url(url: str) -> bool:
"""Determine if the given URL is for Earthdata Login."""
hostname = urlparse(url).hostname
return re.fullmatch(EDL_URL_PATTERN, hostname) is not None


class EarthdataSession(Session):
"""Session which ensures the Authorization header is sent to correct
Expand All @@ -24,32 +12,15 @@ class EarthdataSession(Session):
session.auth = EarthdataAuth(...)

This lifecycle method on requests.Session is called when handling
redirect requests. There are two cases important for handling
Earthdata Login:

(A) When handling a redirect from a resource server to Earthdata
Login, the session will use the auth (if provided) to add the
required Authorization to the request.

(B) When handling a redirect from Earthdata Login back to a
resource server, the session will remove the Authorization header
from the request (which the `requests` package copies from the
request which caused this redirect).

redirect requests.
"""
def rebuild_auth(self, prepared_request, response):
# If not configured with an EarthdataAuth instance, defer to
# default behavior
if not self.auth:
return super().rebuild_auth(prepared_request, response)

if _edl_url(prepared_request.url):
# (A) Defer to auth to add the Authorization header
self.auth(prepared_request)
else:
# (B) Remove the Authorization header when redirecting away
# from EDL.
prepared_request.headers.pop('Authorization', None)
self.auth(prepared_request)


class EarthdataAuth(AuthBase):
Expand All @@ -63,28 +34,20 @@ class EarthdataAuth(AuthBase):
header, and the user's access token as a Bearer auth header.

"""
def __init__(self, app_uid: str, app_pwd: str, user_access_token: str):
def __init__(self, user_access_token: str):
"""Instantiate the Earthdata Auth provider.

Parameters
----------
app_uid:
The Earthdata Login Application `uid`.

app_pwd:
The Earthdata Login Application `password`.

user_access_token:
The EDL-issued token for the user making the request.
"""
creds = b64encode(f"{app_uid}:{app_pwd}".encode('utf-8')).decode('utf-8')
self.authorization_header = f'Basic {creds}, Bearer {user_access_token}'
self.authorization_header = f'Bearer {user_access_token}'

def __call__(self, r):
"""The EarthdataAuth is a callable which adds Authorization headers
when handling a request for Earthdata Login.
when handling a request for sites backed by Earthdata Login.

"""
if _edl_url(r.url):
r.headers['Authorization'] = self.authorization_header
r.headers['Authorization'] = self.authorization_header
return r
67 changes: 3 additions & 64 deletions harmony_service_lib/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
locally.

When downloading from an EDL-token aware data source, this module uses EDL shared /
federated token authentication. It includes an optional fallback authentication that
uses an EDL user to download data when the feature is enabled.
federated token authentication.

This module relies on the harmony_service_lib.util.config and its environment variables to be
set for correct operation. See that module and the project README for details.
Expand All @@ -18,9 +17,6 @@
import sys
import os
import re

import requests

from harmony_service_lib.earthdata import EarthdataAuth, EarthdataSession
from harmony_service_lib.exceptions import ServerException, ForbiddenException
from harmony_service_lib.logging import build_logger
Expand Down Expand Up @@ -178,8 +174,7 @@ def _download(
"""Implements the download functionality.

Using the EarthdataSession and EarthdataAuth extensions to the
`requests` module, this function will download the given url and
perform any necessary Earthdata Login OAuth handshakes.
`requests` module, this function will download the given url.

Parameters
----------
Expand Down Expand Up @@ -213,7 +208,7 @@ def _download(
headers = {}
if user_agent is not None:
headers['user-agent'] = user_agent
auth = EarthdataAuth(config.oauth_uid, config.oauth_password, access_token)
auth = EarthdataAuth(access_token)
tries = 0
retry = True
response = None
Expand Down Expand Up @@ -273,52 +268,6 @@ def _download(
return response


def _download_with_fallback_authn(config, url: str, data, user_agent=None, **kwargs_download_agent):
"""Downloads the given url using Basic authentication as a fallback
mechanism should the normal EDL Oauth handshake fail.

This function requires the `edl_username` and `edl_password`
attributes in the config object to be populated with valid
credentials.

Parameters
----------
config : harmony_service_lib.util.Config
The configuration for the current runtime environment.
url : str
The url for the resource to download
data : dict or Tuple[str, str]
Optional parameter for additional data to send to the server
when making an HTTP POST request. These data will be URL
encoded to a query string containing a series of `key=value`
pairs, separated by ampersands. If None (the default), the
request will be sent with an HTTP GET request.
total_retries: int
Upper limit on the number of times to retry the request
user_agent : str
The user agent that is requesting the download.
E.g. harmony/0.0.0 (harmony-sit) harmony-service-lib/4.0 (gdal-subsetter)
kwargs_download_agent: dict
kwargs to be passed to the download agent
E.g. stream=True

Returns
-------
requests.Response with the download result

"""
headers = {}
if user_agent is not None:
headers['user-agent'] = user_agent
auth = requests.auth.HTTPBasicAuth(config.edl_username, config.edl_password)
session = requests.Session()
session.auth = auth
if data is None:
return session.get(url, headers=headers, timeout=TIMEOUT, **kwargs_download_agent)
else:
return session.post(url, headers=headers, data=data, timeout=TIMEOUT)


def _log_download_performance(logger, url, duration_ms, file_size):
"""Logs a message tracking performance information related to a file download.

Expand Down Expand Up @@ -364,8 +313,6 @@ def download(config, url: str, access_token: str, data, destination_file,
b. Application credentials
4. Error response when downloading
5. Data requires EULA acceptance by user
6. If fallback authentication enabled, the application credentials are
invalid, or do not have permission to download the data.

Parameters
----------
Expand Down Expand Up @@ -422,14 +369,6 @@ def download(config, url: str, access_token: str, data, destination_file,
config, url, access_token, data, config.max_download_retries, logger, user_agent, stream=stream
)

if response is None or not response.ok:
if config.fallback_authn_enabled:
msg = ('No valid user access token in request or EDL OAuth authentication failed.'
'Fallback authentication enabled: retrying with Basic auth.')
logger.warning(msg)
response = _download_with_fallback_authn(
config, url, data, user_agent, stream=stream)

if response.ok:
if not stream:
destination_file.write(response.content)
Expand Down
44 changes: 0 additions & 44 deletions harmony_service_lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,9 @@
STAGING_BUCKET: The bucket where staged files should be placed
STAGING_PATH: The base path under which staged files should be placed

Required when using HTTPS, allowing Earthdata Login auth:
OAUTH_HOST: The Earthdata Login (EDL) environment to connect to
OAUTH_CLIENT_ID: The EDL application client id used to acquire an EDL shared access token
OAUTH_UID: The EDL application UID used to acquire an EDL shared access token
OAUTH_PASSWORD: The EDL application password used to acquire an EDL shared access token
OAUTH_REDIRECT_URI: A valid redirect URI for the EDL application (NOTE: the redirect URI is
not followed or used; it does need to be in the app's redirect URI list)

Always provided by newer versions of the Harmony frontend:
USER_AGENT: The Harmony user agent string. E.g. harmony/0.0.0 (harmony-sit)

Optional, if support is needed for downloading data from an endpoint that is not
EDL-share-token aware:

FALLBACK_AUTHN_ENABLED: Whether to try downloading with the EDL_* credentials.
EDL_USERNAME: An valid EDL user entity username.
EDL_PASSWORD: The password belonging to EDL_USERNAME.

Optional when reading from or staging to S3:
USE_LOCALSTACK: 'true' if the S3 client should connect to a LocalStack instance instead of
Amazon S3 (for testing)
Expand Down Expand Up @@ -79,14 +64,6 @@
Config = namedtuple(
'Config', [
'app_name',
'oauth_host',
'oauth_client_id',
'oauth_uid',
'oauth_password',
'oauth_redirect_uri',
'fallback_authn_enabled',
'edl_username',
'edl_password',
'use_localstack',
'backend_host',
'localstack_host',
Expand All @@ -108,23 +85,13 @@ def _validated_config(config):
"""
required = [
'shared_secret_key',
'oauth_client_id',
'oauth_uid',
'oauth_password',
'oauth_redirect_uri',
'staging_path',
'staging_bucket',
'max_download_retries'
]

unset = [var.upper() for var in required if getattr(config, var) is None]

# Conditionally required
if config.fallback_authn_enabled and getattr(config, 'edl_username') is None:
unset.append("EDL_USERNAME")
if config.fallback_authn_enabled and getattr(config, 'edl_password') is None:
unset.append("EDL_PASSWORD")

if len(unset) > 0:
msg = f"Required environment variables are not set: {', '.join(unset)}"
raise Exception(msg)
Expand Down Expand Up @@ -168,22 +135,11 @@ def int_envvar(name: str, default: int) -> int:
value = environ.get(name)
return int(value) if value is not None else default

oauth_redirect_uri = str_envvar('OAUTH_REDIRECT_URI', None)
if oauth_redirect_uri is not None:
oauth_redirect_uri = parse.quote(oauth_redirect_uri)
backend_host = str_envvar('BACKEND_HOST', 'localhost')
localstack_host = str_envvar('LOCALSTACK_HOST', backend_host)

config = Config(
app_name=str_envvar('APP_NAME', sys.argv[0]),
oauth_host=str_envvar('OAUTH_HOST', 'https://uat.urs.earthdata.nasa.gov'),
oauth_client_id=str_envvar('OAUTH_CLIENT_ID', None),
oauth_uid=str_envvar('OAUTH_UID', None),
oauth_password=str_envvar('OAUTH_PASSWORD', None),
oauth_redirect_uri=oauth_redirect_uri,
fallback_authn_enabled=bool_envvar('FALLBACK_AUTHN_ENABLED', False),
edl_username=str_envvar('EDL_USERNAME', None),
edl_password=str_envvar('EDL_PASSWORD', None),
use_localstack=bool_envvar('USE_LOCALSTACK', False),
backend_host=backend_host,
localstack_host=localstack_host,
Expand Down
Loading
Loading