-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into calisphere-etl
- Loading branch information
Showing
5 changed files
with
202 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
import re | ||
from datetime import datetime | ||
from typing import Any, Optional | ||
|
||
from ..mapper import Validator | ||
from ...validator import ValidationLogLevel | ||
from ...validator import ValidationLogLevel, ValidationMode | ||
from .oai_mapper import OaiRecord, OaiVernacular | ||
|
||
|
||
|
@@ -39,6 +41,29 @@ def setup(self): | |
"field": "contributor", | ||
"validations": [SamveraValidator.contributor_match], | ||
"level": ValidationLogLevel.WARNING | ||
}, | ||
{ | ||
"field": "rights", | ||
"validations": [ | ||
SamveraValidator.rights_match, | ||
Validator.verify_type(Validator.list_of(str)) | ||
] | ||
}, | ||
{ | ||
"field": "date", | ||
"validations": [SamveraValidator.date_match], | ||
"level": ValidationLogLevel.WARNING, | ||
}, | ||
{ | ||
"field": "source", | ||
"validations": [SamveraValidator.source_match], | ||
"level": ValidationLogLevel.WARNING | ||
}, | ||
{ | ||
"field": "description", | ||
"validations": [Validator.content_match], | ||
"level": ValidationLogLevel.WARNING, | ||
"validation_mode": ValidationMode.ORDER_INSENSITIVE_IF_LIST | ||
} | ||
]) | ||
|
||
|
@@ -50,7 +75,10 @@ def replace_ursus_with_digital(validation_def: dict, | |
return | ||
|
||
if comparison_value.startswith("https://ursus.library.ucla.edu"): | ||
comparison_value.replace("https://ursus.library.ucla.edu", "https://digital.library.ucla.edu") | ||
comparison_value = comparison_value.replace( | ||
"https://ursus.library.ucla.edu", | ||
"https://digital.library.ucla.edu" | ||
) | ||
|
||
if rikolti_value == comparison_value: | ||
return | ||
|
@@ -65,12 +93,112 @@ def contributor_match(validation_def: dict, | |
if rikolti_value == comparison_value: | ||
return | ||
|
||
comparison_value[0] = comparison_value[0] + '.' | ||
if comparison_value: | ||
comparison_value = [c + '.' for c in comparison_value] | ||
if rikolti_value == comparison_value: | ||
return | ||
|
||
return "Content mismatch" | ||
|
||
@staticmethod | ||
def rights_match(validation_def: dict, | ||
rikolti_value: Any, | ||
comparison_value: Any) -> Optional[str]: | ||
""" | ||
matches values that differ only in phone number when new phone number | ||
is '(310) 825-4988' - in legacy collection 153, this number seemed to | ||
be auto-incrementing with each record (whoops). example: | ||
legacy: [ | ||
'US', | ||
( | ||
'UCLA Library Special Collections, A1713 Charles E. Young ' | ||
'Research Library, Box 951575, Los Angeles, CA 90095-1575. ' | ||
'Email: [email protected]. Phone: (310) 825-4987' | ||
(310) 825-4986 | ||
) | ||
] | ||
rikolti: [ | ||
'US', | ||
( | ||
'UCLA Library Special Collections, A1713 Charles E. Young ' | ||
'Research Library, Box 951575, Los Angeles, CA 90095-1575. ' | ||
'Email: [email protected]. Phone: (310) 825-4988' | ||
) | ||
] | ||
""" | ||
if rikolti_value == comparison_value: | ||
return | ||
new_phone_number = '(310) 825-4988' | ||
if comparison_value and len(comparison_value) == 2: | ||
new_comparison_value = re.sub( | ||
r'\(310\) 825-\d{4}', # old phone number regex | ||
new_phone_number, | ||
comparison_value[1] | ||
) | ||
comparison_value[1] = new_comparison_value | ||
|
||
if rikolti_value == comparison_value: | ||
return | ||
|
||
return "Content mismatch" | ||
|
||
@staticmethod | ||
def date_match(validation_def: dict, | ||
rikolti_value: Any, | ||
comparison_value: Any) -> Optional[str]: | ||
""" | ||
if comparison value is a list of one string date and rikolti | ||
value is a list of two string dates, one in Month, DD, YYYY | ||
that matches the comparison value, and the other in YYYY-MM-DD | ||
that is the same logical date at the comparison value, then | ||
return None. | ||
comparison value example: ['August 20, 1951'] | ||
rikolti value example: ['August 20, 1951', '1951-08-20'] | ||
""" | ||
if comparison_value == rikolti_value: | ||
return | ||
|
||
if not comparison_value or not rikolti_value: | ||
return "Content mismatch" | ||
|
||
if sorted(rikolti_value) == sorted(comparison_value): | ||
return | ||
|
||
if len(comparison_value) == 1 and len(rikolti_value) == 2: | ||
if comparison_value[0] == rikolti_value[0]: | ||
try: | ||
comparison_datetime = datetime.strptime( | ||
comparison_value[0], '%B %d, %Y') | ||
rikolti_datetime = datetime.strptime( | ||
rikolti_value[1], '%Y-%m-%d') | ||
except ValueError: | ||
return "Content mismatch" | ||
if comparison_datetime == rikolti_datetime: | ||
return | ||
|
||
return "Content mismatch" | ||
|
||
@staticmethod | ||
def source_match(validation_def: dict, | ||
rikolti_value: Any, | ||
comparison_value: Any) -> Optional[str]: | ||
""" | ||
matches | ||
"['Los Angeles Times Photographic Collection']" | ||
"['OpenUCLA Collections', 'Los Angeles Times Photographic Collection']" | ||
""" | ||
if rikolti_value == comparison_value: | ||
return | ||
if ( | ||
comparison_value and rikolti_value and | ||
len(comparison_value) == 1 and len(rikolti_value) == 2 and | ||
comparison_value[0] == "Los Angeles Times Photographic Collection" | ||
and 'OpenUCLA Collections' in rikolti_value and | ||
'Los Angeles Times Photographic Collection' in rikolti_value | ||
): | ||
return | ||
|
||
class SamveraVernacular(OaiVernacular): | ||
record_cls = SamveraRecord | ||
validator = SamveraValidator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters