From 371ca7fc273a97004ffd6dda16de6c9792af17e9 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 11:57:29 -0500 Subject: [PATCH 1/9] Add AttachmentConfig, branch logic for attachment URLs if CDN_URL is set --- .../opportunities_v1/get_opportunity.py | 18 ++++++++-- api/src/util/file_util.py | 14 ++++++++ .../test_opportunity_route_get.py | 31 ++++++++++++++++ api/tests/src/util/test_file_util.py | 36 +++++++++++++++++++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/api/src/services/opportunities_v1/get_opportunity.py b/api/src/services/opportunities_v1/get_opportunity.py index d11e18dce..2d55de6d4 100644 --- a/api/src/services/opportunities_v1/get_opportunity.py +++ b/api/src/services/opportunities_v1/get_opportunity.py @@ -1,4 +1,5 @@ from datetime import date +import os from sqlalchemy import select from sqlalchemy.orm import noload, selectinload @@ -8,7 +9,13 @@ from src.api.route_utils import raise_flask_error from src.db.models.agency_models import Agency from src.db.models.opportunity_models import Opportunity, OpportunityAttachment, OpportunitySummary -from src.util.file_util import pre_sign_file_location +from src.util.file_util import pre_sign_file_location, convert_s3_to_cdn_url +from src.util.env_config import PydanticBaseEnvConfig + + +class AttachmentConfig(PydanticBaseEnvConfig): + # If the CDN URL is set, we'll use it instead of pre-signing the file locations + cdn_url: str | None = None def _fetch_opportunity( @@ -50,7 +57,14 @@ def get_opportunity(db_session: db.Session, opportunity_id: int) -> Opportunity: db_session, opportunity_id, load_all_opportunity_summaries=False ) - pre_sign_opportunity_file_location(opportunity.opportunity_attachments) + # attachment_config = AttachmentConfig() + if os.environ.get("CDN_URL"): + for opp_att in opportunity.opportunity_attachments: + opp_att.download_path = convert_s3_to_cdn_url( + opp_att.file_location, os.environ.get("CDN_URL") + ) + else: + pre_sign_opportunity_file_location(opportunity.opportunity_attachments) return opportunity diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index 3322c3c5b..5635dae67 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -166,3 +166,17 @@ def read_file(path: str | Path, mode: str = "r", encoding: str | None = None) -> """Simple function for just getting all of the contents of a file""" with open_stream(path, mode, encoding) as input_file: return input_file.read() + + +def convert_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: + """ + Convert an S3 URL to a CDN URL + + Example: + s3://bucket-name/path/to/file.txt -> https://cdn.example.com/path/to/file.txt + """ + if not is_s3_path(file_path): + raise ValueError(f"Expected s3:// path, got: {file_path}") + + _, key = split_s3_url(file_path) + return f"{cdn_url.rstrip('/')}/{key}" diff --git a/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py b/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py index 722e6df35..608a99799 100644 --- a/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py +++ b/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py @@ -153,3 +153,34 @@ def test_get_opportunity_404_not_found_is_draft(client, api_auth_token, enable_f resp.get_json()["message"] == f"Could not find Opportunity with ID {opportunity.opportunity_id}" ) + + +def test_get_opportunity_returns_cdn_urls( + client, api_auth_token, monkeypatch_session, enable_factory_create, db_session, mock_s3_bucket +): + monkeypatch_session.setenv("CDN_URL", "https://cdn.example.com") + """Test that S3 file locations are converted to CDN URLs in the response""" + # Create an opportunity with a specific attachment + opportunity = OpportunityFactory.create(opportunity_attachments=[]) + + object_name = "test_file_1.txt" + file_loc = f"s3://{mock_s3_bucket}/{object_name}" + OpportunityAttachmentFactory.create( + file_location=file_loc, opportunity=opportunity, file_contents="Hello, world" + ) + + # Make the GET request + resp = client.get( + f"/v1/opportunities/{opportunity.opportunity_id}", headers={"X-Auth": api_auth_token} + ) + + # Check the response + assert resp.status_code == 200 + response_data = resp.get_json()["data"] + + # Verify attachment URL is a CDN URL + assert len(response_data["attachments"]) == 1 + attachment = response_data["attachments"][0] + + assert attachment["download_path"].startswith("https://cdn.") + assert "s3://" not in attachment["download_path"] diff --git a/api/tests/src/util/test_file_util.py b/api/tests/src/util/test_file_util.py index 06cddbf0c..aafa609ae 100644 --- a/api/tests/src/util/test_file_util.py +++ b/api/tests/src/util/test_file_util.py @@ -211,3 +211,39 @@ def test_move_file_local_disk(tmp_path): assert file_util.file_exists(other_file_path) is True assert file_util.read_file(other_file_path) == contents + + +@pytest.mark.parametrize( + "s3_path,cdn_url,expected", + [ + ( + "s3://my-bucket/path/to/file.pdf", + "cdn.example.com", + "https://cdn.example.com/path/to/file.pdf", + ), + ( + "s3://local-mock-public-bucket/opportunities/9/attachments/79853231/manager.webm", + "cdn.example.com", + "https://cdn.example.com/opportunities/9/attachments/79853231/manager.webm", + ), + # Test with trailing slash in CDN URL + ( + "s3://my-bucket/file.txt", + "cdn.example.com/", + "https://cdn.example.com/file.txt", + ), + # Test with subdirectory in CDN URL + ( + "s3://my-bucket/file.txt", + "cdn.example.com/assets", + "https://cdn.example.com/assets/file.txt", + ), + ], +) +def test_convert_s3_to_cdn_url(s3_path, cdn_url, expected): + assert file_util.convert_s3_to_cdn_url(s3_path, cdn_url) == expected + + +def test_convert_s3_to_cdn_url_invalid_path(): + with pytest.raises(ValueError, match="Expected s3:// path"): + file_util.convert_s3_to_cdn_url("http://not-s3/file.txt", "cdn.example.com") From 3bc548e84edf18fb2407a30ee374bdfcffc1f743 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:06:05 -0500 Subject: [PATCH 2/9] Format / lint --- api/src/services/opportunities_v1/get_opportunity.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/api/src/services/opportunities_v1/get_opportunity.py b/api/src/services/opportunities_v1/get_opportunity.py index 2d55de6d4..68e7bf2e0 100644 --- a/api/src/services/opportunities_v1/get_opportunity.py +++ b/api/src/services/opportunities_v1/get_opportunity.py @@ -1,5 +1,5 @@ -from datetime import date import os +from datetime import date from sqlalchemy import select from sqlalchemy.orm import noload, selectinload @@ -9,8 +9,8 @@ from src.api.route_utils import raise_flask_error from src.db.models.agency_models import Agency from src.db.models.opportunity_models import Opportunity, OpportunityAttachment, OpportunitySummary -from src.util.file_util import pre_sign_file_location, convert_s3_to_cdn_url from src.util.env_config import PydanticBaseEnvConfig +from src.util.file_util import convert_s3_to_cdn_url, pre_sign_file_location class AttachmentConfig(PydanticBaseEnvConfig): @@ -58,11 +58,10 @@ def get_opportunity(db_session: db.Session, opportunity_id: int) -> Opportunity: ) # attachment_config = AttachmentConfig() - if os.environ.get("CDN_URL"): + cdn_url = os.environ.get("CDN_URL") + if cdn_url is not None: for opp_att in opportunity.opportunity_attachments: - opp_att.download_path = convert_s3_to_cdn_url( - opp_att.file_location, os.environ.get("CDN_URL") - ) + opp_att.download_path = convert_s3_to_cdn_url(opp_att.file_location, cdn_url) # type: ignore else: pre_sign_opportunity_file_location(opportunity.opportunity_attachments) From de4bd08dac7abfef5cfd19bdfa592ded272a8af9 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:17:15 -0500 Subject: [PATCH 3/9] Load via config --- api/src/services/opportunities_v1/get_opportunity.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/api/src/services/opportunities_v1/get_opportunity.py b/api/src/services/opportunities_v1/get_opportunity.py index 68e7bf2e0..5094e9577 100644 --- a/api/src/services/opportunities_v1/get_opportunity.py +++ b/api/src/services/opportunities_v1/get_opportunity.py @@ -1,4 +1,3 @@ -import os from datetime import date from sqlalchemy import select @@ -57,11 +56,12 @@ def get_opportunity(db_session: db.Session, opportunity_id: int) -> Opportunity: db_session, opportunity_id, load_all_opportunity_summaries=False ) - # attachment_config = AttachmentConfig() - cdn_url = os.environ.get("CDN_URL") - if cdn_url is not None: + attachment_config = AttachmentConfig() + if attachment_config.cdn_url is not None: for opp_att in opportunity.opportunity_attachments: - opp_att.download_path = convert_s3_to_cdn_url(opp_att.file_location, cdn_url) # type: ignore + opp_att.download_path = convert_s3_to_cdn_url( # type: ignore + opp_att.file_location, attachment_config.cdn_url + ) else: pre_sign_opportunity_file_location(opportunity.opportunity_attachments) From 57ee4f846a7256db37987fcab1fc199b3f2581e9 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:29:31 -0500 Subject: [PATCH 4/9] Add autouse fixture, use `join` --- analytics/tests/conftest.py | 5 +++++ api/local.env | 2 ++ api/src/util/file_util.py | 2 +- .../src/api/opportunities_v1/test_opportunity_route_get.py | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/analytics/tests/conftest.py b/analytics/tests/conftest.py index 3c696a2f6..0b967f4eb 100644 --- a/analytics/tests/conftest.py +++ b/analytics/tests/conftest.py @@ -287,6 +287,11 @@ def reset_aws_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("AWS_DEFAULT_REGION", "us-east-1") +@pytest.fixture(autouse=True) +def use_cdn(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("CDN_URL", "http://localhost:4566") + + @pytest.fixture def mock_s3() -> boto3.resource: """Instantiate an S3 bucket resource.""" diff --git a/api/local.env b/api/local.env index 0ac1aa68f..b6fabccbf 100644 --- a/api/local.env +++ b/api/local.env @@ -150,3 +150,5 @@ DEPLOY_GITHUB_REF=main DEPLOY_GITHUB_SHA=ffaca647223e0b6e54344122eefa73401f5ec131 DEPLOY_TIMESTAMP=2024-12-02T21:25:18Z DEPLOY_WHOAMI=local-developer + +CDN_URL=http://localhost:4566 diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index 5635dae67..0c44e278f 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -179,4 +179,4 @@ def convert_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: raise ValueError(f"Expected s3:// path, got: {file_path}") _, key = split_s3_url(file_path) - return f"{cdn_url.rstrip('/')}/{key}" + return join(cdn_url, key) diff --git a/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py b/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py index 608a99799..3d53d5d4c 100644 --- a/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py +++ b/api/tests/src/api/opportunities_v1/test_opportunity_route_get.py @@ -109,8 +109,9 @@ def test_get_opportunity_with_agency_200(client, api_auth_token, enable_factory_ def test_get_opportunity_s3_endpoint_url_200( - client, api_auth_token, enable_factory_create, db_session, mock_s3_bucket + client, api_auth_token, enable_factory_create, db_session, mock_s3_bucket, monkeypatch_session ): + monkeypatch_session.delenv("CDN_URL") # Create an opportunity with a specific attachment opportunity = OpportunityFactory.create(opportunity_attachments=[]) object_name = "test_file_1.txt" From 90ab8a1252b978ab58569d47eb6c37af43f0b0c0 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:35:14 -0500 Subject: [PATCH 5/9] Use an explicit str replace to make sure we only replace public paths --- api/src/util/file_util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index 0c44e278f..202f2a00e 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -178,5 +178,4 @@ def convert_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: if not is_s3_path(file_path): raise ValueError(f"Expected s3:// path, got: {file_path}") - _, key = split_s3_url(file_path) - return join(cdn_url, key) + return file_path.replace(os.environ["PUBLIC_FILES_BUCKET"], cdn_url) From d5caae0762563b2e712d8b2e13dbf6e1ce0b37e9 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:39:09 -0500 Subject: [PATCH 6/9] Update file tests --- api/tests/src/util/test_file_util.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/api/tests/src/util/test_file_util.py b/api/tests/src/util/test_file_util.py index aafa609ae..6bcbc78f0 100644 --- a/api/tests/src/util/test_file_util.py +++ b/api/tests/src/util/test_file_util.py @@ -217,25 +217,19 @@ def test_move_file_local_disk(tmp_path): "s3_path,cdn_url,expected", [ ( - "s3://my-bucket/path/to/file.pdf", - "cdn.example.com", + "s3://local-mock-public-bucket/path/to/file.pdf", + "https://cdn.example.com", "https://cdn.example.com/path/to/file.pdf", ), ( "s3://local-mock-public-bucket/opportunities/9/attachments/79853231/manager.webm", - "cdn.example.com", + "https://cdn.example.com", "https://cdn.example.com/opportunities/9/attachments/79853231/manager.webm", ), - # Test with trailing slash in CDN URL - ( - "s3://my-bucket/file.txt", - "cdn.example.com/", - "https://cdn.example.com/file.txt", - ), # Test with subdirectory in CDN URL ( - "s3://my-bucket/file.txt", - "cdn.example.com/assets", + "s3://local-mock-public-bucket/file.txt", + "https://cdn.example.com/assets", "https://cdn.example.com/assets/file.txt", ), ], From b90c413fd2a6ddabb1330fdd91dfbd024a6b2ba5 Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:46:19 -0500 Subject: [PATCH 7/9] Rename method / lint --- analytics/tests/conftest.py | 1 + api/src/services/opportunities_v1/get_opportunity.py | 4 ++-- api/src/util/file_util.py | 2 +- api/tests/src/util/test_file_util.py | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/analytics/tests/conftest.py b/analytics/tests/conftest.py index 0b967f4eb..9242f1a58 100644 --- a/analytics/tests/conftest.py +++ b/analytics/tests/conftest.py @@ -289,6 +289,7 @@ def reset_aws_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: @pytest.fixture(autouse=True) def use_cdn(monkeypatch: pytest.MonkeyPatch) -> None: + """Set up CDN URL environment variable for tests."""s monkeypatch.setenv("CDN_URL", "http://localhost:4566") diff --git a/api/src/services/opportunities_v1/get_opportunity.py b/api/src/services/opportunities_v1/get_opportunity.py index 5094e9577..55db1f9b8 100644 --- a/api/src/services/opportunities_v1/get_opportunity.py +++ b/api/src/services/opportunities_v1/get_opportunity.py @@ -9,7 +9,7 @@ from src.db.models.agency_models import Agency from src.db.models.opportunity_models import Opportunity, OpportunityAttachment, OpportunitySummary from src.util.env_config import PydanticBaseEnvConfig -from src.util.file_util import convert_s3_to_cdn_url, pre_sign_file_location +from src.util.file_util import convert_public_s3_to_cdn_url, pre_sign_file_location class AttachmentConfig(PydanticBaseEnvConfig): @@ -59,7 +59,7 @@ def get_opportunity(db_session: db.Session, opportunity_id: int) -> Opportunity: attachment_config = AttachmentConfig() if attachment_config.cdn_url is not None: for opp_att in opportunity.opportunity_attachments: - opp_att.download_path = convert_s3_to_cdn_url( # type: ignore + opp_att.download_path = convert_public_s3_to_cdn_url( # type: ignore opp_att.file_location, attachment_config.cdn_url ) else: diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index 202f2a00e..11d50bcde 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -168,7 +168,7 @@ def read_file(path: str | Path, mode: str = "r", encoding: str | None = None) -> return input_file.read() -def convert_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: +def convert_public_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: """ Convert an S3 URL to a CDN URL diff --git a/api/tests/src/util/test_file_util.py b/api/tests/src/util/test_file_util.py index 6bcbc78f0..9e230a52c 100644 --- a/api/tests/src/util/test_file_util.py +++ b/api/tests/src/util/test_file_util.py @@ -235,9 +235,9 @@ def test_move_file_local_disk(tmp_path): ], ) def test_convert_s3_to_cdn_url(s3_path, cdn_url, expected): - assert file_util.convert_s3_to_cdn_url(s3_path, cdn_url) == expected + assert file_util.convert_public_s3_to_cdn_url(s3_path, cdn_url) == expected def test_convert_s3_to_cdn_url_invalid_path(): with pytest.raises(ValueError, match="Expected s3:// path"): - file_util.convert_s3_to_cdn_url("http://not-s3/file.txt", "cdn.example.com") + file_util.convert_public_s3_to_cdn_url("http://not-s3/file.txt", "cdn.example.com") From 40fe954dba93384a695acdc413626f4e1484073b Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Tue, 28 Jan 2025 12:49:24 -0500 Subject: [PATCH 8/9] Format --- analytics/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analytics/tests/conftest.py b/analytics/tests/conftest.py index 9242f1a58..8f58fe107 100644 --- a/analytics/tests/conftest.py +++ b/analytics/tests/conftest.py @@ -289,7 +289,7 @@ def reset_aws_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: @pytest.fixture(autouse=True) def use_cdn(monkeypatch: pytest.MonkeyPatch) -> None: - """Set up CDN URL environment variable for tests."""s + """Set up CDN URL environment variable for tests.""" monkeypatch.setenv("CDN_URL", "http://localhost:4566") From dcfe46f661758dcb5a75e32f83bfec47fa669d0e Mon Sep 17 00:00:00 2001 From: Michael Huneke Date: Wed, 29 Jan 2025 11:46:57 -0500 Subject: [PATCH 9/9] Use S3Config vs. os path --- api/local.env | 2 +- api/src/services/opportunities_v1/get_opportunity.py | 4 +++- api/src/util/file_util.py | 4 ++-- api/tests/src/util/test_file_util.py | 10 ++++++---- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/api/local.env b/api/local.env index b6fabccbf..7867cc68b 100644 --- a/api/local.env +++ b/api/local.env @@ -151,4 +151,4 @@ DEPLOY_GITHUB_SHA=ffaca647223e0b6e54344122eefa73401f5ec131 DEPLOY_TIMESTAMP=2024-12-02T21:25:18Z DEPLOY_WHOAMI=local-developer -CDN_URL=http://localhost:4566 +CDN_URL=http://localhost:4566/local-mock-public-bucket diff --git a/api/src/services/opportunities_v1/get_opportunity.py b/api/src/services/opportunities_v1/get_opportunity.py index 55db1f9b8..4eae805f6 100644 --- a/api/src/services/opportunities_v1/get_opportunity.py +++ b/api/src/services/opportunities_v1/get_opportunity.py @@ -5,6 +5,7 @@ import src.adapters.db as db import src.util.datetime_util as datetime_util +from src.adapters.aws import S3Config from src.api.route_utils import raise_flask_error from src.db.models.agency_models import Agency from src.db.models.opportunity_models import Opportunity, OpportunityAttachment, OpportunitySummary @@ -58,9 +59,10 @@ def get_opportunity(db_session: db.Session, opportunity_id: int) -> Opportunity: attachment_config = AttachmentConfig() if attachment_config.cdn_url is not None: + s3_config = S3Config() for opp_att in opportunity.opportunity_attachments: opp_att.download_path = convert_public_s3_to_cdn_url( # type: ignore - opp_att.file_location, attachment_config.cdn_url + opp_att.file_location, attachment_config.cdn_url, s3_config ) else: pre_sign_opportunity_file_location(opportunity.opportunity_attachments) diff --git a/api/src/util/file_util.py b/api/src/util/file_util.py index 11d50bcde..bd499e445 100644 --- a/api/src/util/file_util.py +++ b/api/src/util/file_util.py @@ -168,7 +168,7 @@ def read_file(path: str | Path, mode: str = "r", encoding: str | None = None) -> return input_file.read() -def convert_public_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: +def convert_public_s3_to_cdn_url(file_path: str, cdn_url: str, s3_config: S3Config) -> str: """ Convert an S3 URL to a CDN URL @@ -178,4 +178,4 @@ def convert_public_s3_to_cdn_url(file_path: str, cdn_url: str) -> str: if not is_s3_path(file_path): raise ValueError(f"Expected s3:// path, got: {file_path}") - return file_path.replace(os.environ["PUBLIC_FILES_BUCKET"], cdn_url) + return file_path.replace(s3_config.public_files_bucket_path, cdn_url) diff --git a/api/tests/src/util/test_file_util.py b/api/tests/src/util/test_file_util.py index 9e230a52c..30eac915c 100644 --- a/api/tests/src/util/test_file_util.py +++ b/api/tests/src/util/test_file_util.py @@ -234,10 +234,12 @@ def test_move_file_local_disk(tmp_path): ), ], ) -def test_convert_s3_to_cdn_url(s3_path, cdn_url, expected): - assert file_util.convert_public_s3_to_cdn_url(s3_path, cdn_url) == expected +def test_convert_s3_to_cdn_url(s3_path, cdn_url, expected, s3_config): + assert file_util.convert_public_s3_to_cdn_url(s3_path, cdn_url, s3_config) == expected -def test_convert_s3_to_cdn_url_invalid_path(): +def test_convert_s3_to_cdn_url_invalid_path(s3_config): with pytest.raises(ValueError, match="Expected s3:// path"): - file_util.convert_public_s3_to_cdn_url("http://not-s3/file.txt", "cdn.example.com") + file_util.convert_public_s3_to_cdn_url( + "http://not-s3/file.txt", "cdn.example.com", s3_config + )