Skip to content

Commit

Permalink
#1022 - Temporarily skip validator during harvest process.
Browse files Browse the repository at this point in the history
  • Loading branch information
amywieliczka committed Jul 15, 2024
1 parent efbf06e commit a6c2206
Showing 1 changed file with 25 additions and 14 deletions.
39 changes: 25 additions & 14 deletions dags/shared_tasks/mapping_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,36 @@ def validate_collection_task(
"[3433/vernacular_metadata_v1/mapped_metadata_v1/3.jsonl]"
]
"""
mapped_page_batches = [json.loads(batch) for batch in mapped_page_batches]
mapped_pages = list(chain.from_iterable(mapped_page_batches))
mapped_pages = [path for path in mapped_pages if 'children' not in path]
send_event_to_sns(context, {
"validation": "skipped",
"collection_id": collection_id,
"mapped_page_batches": mapped_page_batches
})
return (
f"Skipping validation of {mapped_page_batches} for {collection_id} "
"until we can re-implement validator to compare against OpenSearch "
"data."
)

num_rows, version_page = create_collection_validation_csv(
collection_id, mapped_pages)
# mapped_page_batches = [json.loads(batch) for batch in mapped_page_batches]
# mapped_pages = list(chain.from_iterable(mapped_page_batches))
# mapped_pages = [path for path in mapped_pages if 'children' not in path]

status = ValidationReportStatus(
filepath=version_page,
num_validation_errors=num_rows,
mapped_version=get_version(collection_id, mapped_pages[0])
)
# num_rows, version_page = create_collection_validation_csv(
# collection_id, mapped_pages)

# status = ValidationReportStatus(
# filepath=version_page,
# num_validation_errors=num_rows,
# mapped_version=get_version(collection_id, mapped_pages[0])
# )

print(f"Output {num_rows} rows to {version_page}")
print_s3_link(version_page, get_version(collection_id, mapped_pages[0]))
# print(f"Output {num_rows} rows to {version_page}")
# print_s3_link(version_page, get_version(collection_id, mapped_pages[0]))

send_event_to_sns(context, asdict(status))
# send_event_to_sns(context, asdict(status))

return version_page
# return version_page


@task_group(group_id='mapping')
Expand Down

0 comments on commit a6c2206

Please sign in to comment.