-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #925 from dchiller/i900-refactor-expandr
Refactor `helpers/expandr.py`
- Loading branch information
Showing
7 changed files
with
511 additions
and
483 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,152 +1,148 @@ | ||
from cantusdata.settings import BASE_DIR | ||
from cantusdata.helpers.scrapers.genre import genres | ||
|
||
import csv | ||
import urllib.request, urllib.error, urllib.parse | ||
import re | ||
import json | ||
import os | ||
import requests | ||
|
||
from django.conf import settings | ||
|
||
|
||
def expand_mode(mode_code): | ||
input_list = mode_code.strip() | ||
def expand_mode(mode_code: str) -> str: | ||
""" | ||
Translate non-numeric components of a CantusDB mode code into human-readable form. | ||
:param mode_code str: A CantusDB mode code | ||
:return: A human-readable translation of the mode code | ||
""" | ||
mode_code_stripped = mode_code.strip() | ||
mode_output = [] | ||
if "1" in input_list: | ||
mode_output.append("1") | ||
if "2" in input_list: | ||
mode_output.append("2") | ||
if "3" in input_list: | ||
mode_output.append("3") | ||
if "4" in input_list: | ||
mode_output.append("4") | ||
if "5" in input_list: | ||
mode_output.append("5") | ||
if "6" in input_list: | ||
mode_output.append("6") | ||
if "7" in input_list: | ||
mode_output.append("7") | ||
if "8" in input_list: | ||
mode_output.append("8") | ||
if "*" in input_list: | ||
mode_output.append("No music") | ||
if "r" in input_list: | ||
mode_output.append("Formulaic") | ||
if "?" in input_list: | ||
mode_output.append("Uncertain") | ||
if "S" in input_list: | ||
mode_output.append("Responsory (special)") | ||
if "T" in input_list: | ||
mode_output.append("Chant in Transposition") | ||
mode_nums = {"1", "2", "3", "4", "5", "6", "7", "8"} | ||
for char in mode_code_stripped: | ||
if char in mode_nums: | ||
mode_output.append(char) | ||
continue | ||
match char: | ||
case "*": | ||
mode_output.append("No music") | ||
case "r": | ||
mode_output.append("Formulaic") | ||
case "?": | ||
mode_output.append("Uncertain") | ||
case "S": | ||
mode_output.append("Responsory (special)") | ||
case "T": | ||
mode_output.append("Chant in Transposition") | ||
outstring = " ".join(mode_output) | ||
return outstring | ||
|
||
|
||
def expand_genre(genre_code): | ||
if genre_code in genres: | ||
description = genres[genre_code] | ||
class GenreExpander: | ||
""" | ||
Loads the genre mapping from the CantusDB API and provides a method to retrieve | ||
the full text genre description based on the given genre code. | ||
""" | ||
|
||
cantus_db_api_endpoint = "https://cantusdatabase.org/genres" | ||
request_headers = {"Accept": "application/json"} | ||
|
||
def __init__(self) -> None: | ||
self.genre_data = self.load_genre_data() | ||
|
||
def load_genre_data(self) -> dict[str, str]: | ||
""" | ||
Loads the genre list from the CantusDB API and returns a dictionary mapping | ||
genre codes to genre descriptions. | ||
""" | ||
response = requests.get( | ||
self.cantus_db_api_endpoint, headers=self.request_headers, timeout=5 | ||
) | ||
response.raise_for_status() | ||
genre_map: dict[str, str] = { | ||
x["name"]: x["description"] for x in response.json()["genres"] | ||
} | ||
return genre_map | ||
|
||
def expand_genre(self, genre_code: str) -> str: | ||
""" | ||
Gets the genre description based on the genre code. | ||
""" | ||
if not genre_code in self.genre_data: | ||
return genre_code | ||
|
||
description = self.genre_data[genre_code] | ||
# some extra stuff in parentheses is showing up | ||
paren = description.find("(") | ||
return description[: paren - 1] if paren != -1 else description | ||
|
||
# If nothing was found, return the original | ||
return genre_code | ||
|
||
|
||
def expand_differentia(differentia_code): | ||
def expand_differentia(differentia_code: str) -> str: | ||
""" | ||
In most cases, the differentia remains unmodified | ||
:param differentia_code: | ||
:return: | ||
:param differentia_code: The differentia. | ||
:return str: "No differentia" if no differentia is present, otherwise the differentia. | ||
""" | ||
return "No differentia" if "*" in differentia_code else differentia_code | ||
|
||
|
||
def expand_office(office_code): | ||
return { | ||
"V": "First Vespers", | ||
"C": "Compline", | ||
"M": "Matins", | ||
"L": "Lauds", | ||
"P": "Prime", | ||
"T": "Terce", | ||
"S": "Sext", | ||
"N": "None", | ||
"V2": "Second Vespers", | ||
"MI": "Mass", | ||
"MI1": "First Mass", | ||
"MI2": "Second Mass", | ||
"MI3": "Third Mass", | ||
"D": "Day Hours", | ||
"R": "Memorial", | ||
"E": "Antiphons for the Magnificat or Benedictus", | ||
"H": "Antiphons based on texts from the Historia", | ||
"CA": "Chapter", | ||
"X": "Supplementary", | ||
}.get(office_code, "Error") | ||
|
||
|
||
class PositionExpander(object): | ||
position_data_base = None | ||
|
||
def __init__(self): | ||
self.csv_file = csv.DictReader( | ||
open(os.path.join(BASE_DIR, "data_dumps", "position_names.csv")) | ||
) | ||
self.position_data_base = dict() | ||
for row in self.csv_file: | ||
office_code = self.remove_double_dash(row["Office"]).strip() | ||
genre_code = self.remove_double_dash(row["Genre"]).strip() | ||
position_code = ( | ||
self.remove_double_dash(row["Position"]) | ||
.strip() | ||
.lstrip("0") | ||
.rstrip("._ ") | ||
) | ||
text = self.remove_double_dash(row["Text Phrase"]).strip() | ||
|
||
# We are creating a 3-dimensional dictionary for fast lookup of names | ||
self.add_text(office_code, genre_code, position_code, text) | ||
|
||
def get_text(self, office_code, genre_code, position_code): | ||
OFFICE_CODES = { | ||
"V": "First Vespers", | ||
"C": "Compline", | ||
"M": "Matins", | ||
"L": "Lauds", | ||
"P": "Prime", | ||
"T": "Terce", | ||
"S": "Sext", | ||
"N": "None", | ||
"V2": "Second Vespers", | ||
"MI": "Mass", | ||
"MI1": "First Mass", | ||
"MI2": "Second Mass", | ||
"MI3": "Third Mass", | ||
"D": "Day Hours", | ||
"R": "Memorial", | ||
"E": "Antiphons for the Magnificat or Benedictus", | ||
"H": "Antiphons based on texts from the Historia", | ||
"CA": "Chapter", | ||
"X": "Supplementary", | ||
} | ||
|
||
|
||
def expand_office(office_code: str) -> str: | ||
""" | ||
Returns the full name of the office based on the given office code. | ||
:param office_code: The office code. | ||
:return: The full name of the office. | ||
""" | ||
return OFFICE_CODES.get(office_code, "Error") | ||
|
||
|
||
class PositionExpander: | ||
""" | ||
Loads the position mapping data from a JSON file and provides a method to retrieve | ||
the full text position description based on the given office, genre, and position code. | ||
""" | ||
|
||
def __init__(self) -> None: | ||
with open( | ||
os.path.join( | ||
settings.BASE_DIR, "cantusdata", "helpers", "position_mapping.json" | ||
), | ||
"r", | ||
encoding="utf-8", | ||
) as f: | ||
self.position_data_base: dict[str, dict[str, dict[str, str]]] = json.load(f) | ||
|
||
def expand_position( | ||
self, office_code: str, genre_code: str, position_code: str | ||
) -> str: | ||
""" | ||
Retrieves the full text position description based on the given office, genre, | ||
and position code. | ||
""" | ||
try: | ||
return self.position_data_base[office_code.strip()][genre_code.strip()][ | ||
position_code.strip().lstrip("0").rstrip("._ ") | ||
] | ||
except KeyError: | ||
# If it's not in the dictionary then we just use an empty string | ||
return "" | ||
|
||
def add_text(self, office, genre, position, text): | ||
""" | ||
Add a record to self.position_data_base, which is a 3d dictionary. | ||
Raises KeyError if a dictionary position is already taken. | ||
""" | ||
if office in self.position_data_base: | ||
if genre in self.position_data_base[office]: | ||
if position in self.position_data_base[office][genre]: | ||
raise KeyError( | ||
"Position record {0} {1} {2} already set to {3}!".format( | ||
office, | ||
genre, | ||
position, | ||
self.position_data_base[office][genre][position], | ||
) | ||
) | ||
else: | ||
# Position doesn't exist, so we create it | ||
self.position_data_base[office][genre].update({position: text}) | ||
else: | ||
# Genre doesn't exist, so we create it and position | ||
self.position_data_base[office].update({genre: {position: text}}) | ||
else: | ||
# Office doesn't exist, so we create office, genre, and position | ||
self.position_data_base.update({office: {genre: {position: text}}}) | ||
|
||
def remove_double_dash(self, text): | ||
""" | ||
Turns double dashes into empty strings | ||
""" | ||
if text.strip() == "--": | ||
return "" | ||
else: | ||
return text |
Oops, something went wrong.