Skip to content

Commit

Permalink
Fix urls_metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
BinamB committed Feb 5, 2025
1 parent 2fd293b commit c453921
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 1 deletion.
17 changes: 16 additions & 1 deletion indexd/index/drivers/single_table_alchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def to_document_dict(self):
if self.content_updated_date is not None
else None
)
urls_metadata = generate_url_metadata(self.url_metadata, self.urls)

return {
"did": self.guid,
Expand All @@ -94,7 +95,7 @@ def to_document_dict(self):
"version": self.version,
"uploader": self.uploader,
"urls": self.urls,
"urls_metadata": self.url_metadata,
"urls_metadata": urls_metadata,
"acl": acl,
"authz": authz,
"hashes": self.hashes,
Expand Down Expand Up @@ -1529,6 +1530,20 @@ def check_url_metadata(url_metadata, record):
raise UserError("url {} in url_metadata does not exist".format(url))


def generate_url_metadata(record_url_metadata, urls):
"""
Genrates url_metadata for an indexd record. Pulls urls information from urls if urls_metadata is empty.
Args:
record_url_metadata (dict): urls metadata for an indexd record
urls (list): list of urls of an indexd record
"""
for url in urls:
if url not in record_url_metadata:
record_url_metadata[url] = {}
return record_url_metadata


def get_record_if_exists(did, session):
"""
Searches for a record with this did and returns it.
Expand Down
42 changes: 42 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2846,3 +2846,45 @@ def test_timestamps_no_updated_without_created(
data["content_updated_date"] = "2022-03-14T17:02:54"
create_obj_resp = client.post("/index/", json=data, headers=user)
assert create_obj_resp.status_code == 400


def test_check_urls_metadata(client, user, combined_default_and_single_table_settings):
"""
Checks that the urls_metadata field has the same url keys as the urls
"""
data = get_doc()
res = client.post("/index/", json=data, headers=user)
assert res.status_code == 200
rec = res.json
did = rec["did"]

res = client.get("/index/" + did, headers=user)
assert res.status_code == 200
rec = res.json
urls = rec["urls"]

assert len(rec["urls_metadata"]) == len(rec["urls"])

for key in rec["urls_metadata"]:
assert key in urls


def test_check_urls_metadata_partially_missing_metadata(
client, user, combined_default_and_single_table_settings
):
data = get_doc(has_urls_metadata=True)
data["urls"].append("s3://new-data/location.txt")
res = client.post("/index", json=data, headers=user)
assert res.status_code == 200
rec = res.json
did = rec["did"]

res = client.get("/index/" + did, headers=user)
assert res.status_code == 200
rec = res.json
urls = rec["urls"]

assert len(rec["urls_metadata"]) == len(rec["urls"])

for key in rec["urls_metadata"]:
assert key in urls

0 comments on commit c453921

Please sign in to comment.