Skip to content

Commit

Permalink
Tests using the date cases
Browse files Browse the repository at this point in the history
  • Loading branch information
amywieliczka committed Aug 28, 2024
1 parent a1f137c commit a212112
Show file tree
Hide file tree
Showing 8 changed files with 1,480 additions and 1,374 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
collection_id | function | input | output
collection_id | function | date_values | converted_dates
7007 | convert_dates | date_values=[] | converted_dates=None
12680 | convert_dates | date_values=None | converted_dates=None
12680 | convert_dates | date_values=[{'end': '1916', 'begin': '1856', 'displayDate': '1856 - 1916'}] | converted_dates=[{'end': '1916', 'begin': '1856', 'displayDate': '1856 - 1916'}]
Expand Down
690 changes: 690 additions & 0 deletions metadata_mapper/test/date_data/enrich_earliest_date.csv

Large diffs are not rendered by default.

682 changes: 682 additions & 0 deletions metadata_mapper/test/date_data/get_facet_decades.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
collection_id | function | date_source argument | start_date return value | end_date return value
collection_id | function | date_source | start_date | end_date
12680 | make_sort_dates | date_source=[{'end': '1916', 'begin': '1856', 'displayDate': '1856 - 1916'}] | start_date='1856-01-01T00:00:00+00:00' | end_date='1916-01-01T00:00:00+00:00'
12680 | make_sort_dates | date_source=[{'end': '1912', 'begin': '1858', 'displayDate': '1858 - 1912'}] | start_date='1858-01-01T00:00:00+00:00' | end_date='1912-01-01T00:00:00+00:00'
12680 | make_sort_dates | date_source=[{'end': '1912', 'begin': '1874', 'displayDate': '1874 - 1912'}] | start_date='1874-01-01T00:00:00+00:00' | end_date='1912-01-01T00:00:00+00:00'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
collection_id | function | input | output
collection_id | function | date_obj | dates
7007 | unpack_display_date | date_obj=[None] | dates=[None]
7007 | unpack_display_date | date_obj=[None] | dates=[None]
12680 | unpack_display_date | date_obj=[{'end': '1916', 'begin': '1856', 'displayDate': '1856 - 1916'}] | dates=['1856 - 1916']
Expand Down
681 changes: 0 additions & 681 deletions metadata_mapper/test/date_enrichment_cases/add_facet_decade.csv

This file was deleted.

This file was deleted.

105 changes: 105 additions & 0 deletions metadata_mapper/test/test_date_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import csv
import sys
import os
import ast

# get absolute path to parent of rikolti folder, four folders up
rikolti_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))
if rikolti_path not in sys.path:
sys.path.append(rikolti_path)

from rikolti.metadata_mapper.mappers.solr_updater_helpers import (
get_facet_decades, unpack_display_date, make_sort_dates)

Check failure on line 12 in metadata_mapper/test/test_date_processing.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (E402)

metadata_mapper/test/test_date_processing.py:11:1: E402 Module level import not at top of file
from rikolti.metadata_mapper.mappers.date_enrichments import convert_dates

Check failure on line 13 in metadata_mapper/test/test_date_processing.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (E402)

metadata_mapper/test/test_date_processing.py:13:1: E402 Module level import not at top of file


def enumerate_cases_from_csv(csv_filename):
test_data_path = os.path.join(
rikolti_path, "rikolti", "metadata_mapper", "test", "date_data")
with open(os.path.join(test_data_path, csv_filename)) as csvfile:
reader = csv.reader(csvfile, delimiter="|")
header = next(reader)
header = [cell.strip() for cell in header]
for row_number, row in enumerate(reader):
for column, cell in enumerate(row):
cell = cell.strip()
if cell.startswith(f"{header[column]}="):
cell = cell[len(f"{header[column]}="):]
cell = ast.literal_eval(cell)
row[column] = cell
yield row_number, row


def test_get_facet_decades():
filename = "get_facet_decades.csv"
for index, row in enumerate_cases_from_csv(filename):
collection_id, _, date_value, facet_decades, __ = row
assertion_error = (
f"Test case {index} failed - Collection {collection_id}\n"
f"{'Input:':<20}{date_value}\n"
f"{'Expected Output:':<20}{facet_decades}"
)
assert (
set(get_facet_decades(date_value)) == set(facet_decades)
), assertion_error


def test_enrich_date():
filename = "enrich_date.csv"
for index, row in enumerate_cases_from_csv(filename):
collection_id, _, date_values, converted_dates = row
assertion_error = (
f"Test case {index} failed - Collection {collection_id}\n"
f"{'Input:':<20}{date_values}\n"
f"{'Expected Output:':<20}{converted_dates}"
)
assert convert_dates(date_values) == converted_dates, assertion_error


def test_enrich_earliest_date():
filename = "enrich_earliest_date.csv"
for index, row in enumerate_cases_from_csv(filename):
collection_id, _, date_values, converted_dates = row
assertion_error = (
f"Test case {index} failed - Collection {collection_id}\n"
f"{'Input:':<20}{date_values}\n"
f"{'Expected Output:':<20}{converted_dates}"
)
actual_output = convert_dates(date_values)

# for some reason, convert_dates does not seem to return
# values in a reliable order, sort both actual output and expected
# output before comparison
if isinstance(actual_output, list) and isinstance(converted_dates, list):
converted_dates = (
converted_dates.sort(key=lambda d: d.get('displayDate')))
actual_output = (
actual_output.sort(key=lambda d: d.get('displayDate')))
assert actual_output == converted_dates, assertion_error
else:
assert actual_output == converted_dates, assertion_error

def test_sort_dates():
filename = "sort_dates.csv"
for index, row in enumerate_cases_from_csv(filename):
collection_id, _, date_source, start_date, end_date = row
expected_output = (start_date, end_date)
assertion_error = (
f"Test case {index} failed - Collection {collection_id}\n"
f"{'Input:':<20}{date_source}\n"
f"{'Expected Output:':<20}{expected_output}"
)
assert make_sort_dates(date_source) == expected_output, assertion_error


def test_unpack_display_date():
filename = "unpack_display_date.csv"
for index, row in enumerate_cases_from_csv(filename):
collection_id, _, date_obj, dates = row
assertion_error = (
f"Test case {index} failed - Collection {collection_id}\n"
f"{'Input:':<20}{date_obj}\n"
f"{'Expected Output:':<20}{dates}"
)
assert unpack_display_date(date_obj) == dates, assertion_error

0 comments on commit a212112

Please sign in to comment.