Skip to content

Commit

Permalink
Merge pull request #925 from dchiller/i900-refactor-expandr
Browse files Browse the repository at this point in the history
Refactor `helpers/expandr.py`
  • Loading branch information
dchiller authored Jan 9, 2025
2 parents 427669e + 40cbbef commit cd2464a
Show file tree
Hide file tree
Showing 7 changed files with 511 additions and 483 deletions.
2 changes: 1 addition & 1 deletion app/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:1
# Download and install python dependencies in a container
FROM python:3.12.3 as dependency-install-container
FROM python:3.12.3 AS dependency-install-container
ARG DEVELOPMENT
COPY ./poetry.lock ./pyproject.toml ./app/install-packages.sh /code/
WORKDIR /code
Expand Down
246 changes: 121 additions & 125 deletions app/public/cantusdata/helpers/expandr.py
Original file line number Diff line number Diff line change
@@ -1,152 +1,148 @@
from cantusdata.settings import BASE_DIR
from cantusdata.helpers.scrapers.genre import genres

import csv
import urllib.request, urllib.error, urllib.parse
import re
import json
import os
import requests

from django.conf import settings


def expand_mode(mode_code):
input_list = mode_code.strip()
def expand_mode(mode_code: str) -> str:
"""
Translate non-numeric components of a CantusDB mode code into human-readable form.
:param mode_code str: A CantusDB mode code
:return: A human-readable translation of the mode code
"""
mode_code_stripped = mode_code.strip()
mode_output = []
if "1" in input_list:
mode_output.append("1")
if "2" in input_list:
mode_output.append("2")
if "3" in input_list:
mode_output.append("3")
if "4" in input_list:
mode_output.append("4")
if "5" in input_list:
mode_output.append("5")
if "6" in input_list:
mode_output.append("6")
if "7" in input_list:
mode_output.append("7")
if "8" in input_list:
mode_output.append("8")
if "*" in input_list:
mode_output.append("No music")
if "r" in input_list:
mode_output.append("Formulaic")
if "?" in input_list:
mode_output.append("Uncertain")
if "S" in input_list:
mode_output.append("Responsory (special)")
if "T" in input_list:
mode_output.append("Chant in Transposition")
mode_nums = {"1", "2", "3", "4", "5", "6", "7", "8"}
for char in mode_code_stripped:
if char in mode_nums:
mode_output.append(char)
continue
match char:
case "*":
mode_output.append("No music")
case "r":
mode_output.append("Formulaic")
case "?":
mode_output.append("Uncertain")
case "S":
mode_output.append("Responsory (special)")
case "T":
mode_output.append("Chant in Transposition")
outstring = " ".join(mode_output)
return outstring


def expand_genre(genre_code):
if genre_code in genres:
description = genres[genre_code]
class GenreExpander:
"""
Loads the genre mapping from the CantusDB API and provides a method to retrieve
the full text genre description based on the given genre code.
"""

cantus_db_api_endpoint = "https://cantusdatabase.org/genres"
request_headers = {"Accept": "application/json"}

def __init__(self) -> None:
self.genre_data = self.load_genre_data()

def load_genre_data(self) -> dict[str, str]:
"""
Loads the genre list from the CantusDB API and returns a dictionary mapping
genre codes to genre descriptions.
"""
response = requests.get(
self.cantus_db_api_endpoint, headers=self.request_headers, timeout=5
)
response.raise_for_status()
genre_map: dict[str, str] = {
x["name"]: x["description"] for x in response.json()["genres"]
}
return genre_map

def expand_genre(self, genre_code: str) -> str:
"""
Gets the genre description based on the genre code.
"""
if not genre_code in self.genre_data:
return genre_code

description = self.genre_data[genre_code]
# some extra stuff in parentheses is showing up
paren = description.find("(")
return description[: paren - 1] if paren != -1 else description

# If nothing was found, return the original
return genre_code


def expand_differentia(differentia_code):
def expand_differentia(differentia_code: str) -> str:
"""
In most cases, the differentia remains unmodified
:param differentia_code:
:return:
:param differentia_code: The differentia.
:return str: "No differentia" if no differentia is present, otherwise the differentia.
"""
return "No differentia" if "*" in differentia_code else differentia_code


def expand_office(office_code):
return {
"V": "First Vespers",
"C": "Compline",
"M": "Matins",
"L": "Lauds",
"P": "Prime",
"T": "Terce",
"S": "Sext",
"N": "None",
"V2": "Second Vespers",
"MI": "Mass",
"MI1": "First Mass",
"MI2": "Second Mass",
"MI3": "Third Mass",
"D": "Day Hours",
"R": "Memorial",
"E": "Antiphons for the Magnificat or Benedictus",
"H": "Antiphons based on texts from the Historia",
"CA": "Chapter",
"X": "Supplementary",
}.get(office_code, "Error")


class PositionExpander(object):
position_data_base = None

def __init__(self):
self.csv_file = csv.DictReader(
open(os.path.join(BASE_DIR, "data_dumps", "position_names.csv"))
)
self.position_data_base = dict()
for row in self.csv_file:
office_code = self.remove_double_dash(row["Office"]).strip()
genre_code = self.remove_double_dash(row["Genre"]).strip()
position_code = (
self.remove_double_dash(row["Position"])
.strip()
.lstrip("0")
.rstrip("._ ")
)
text = self.remove_double_dash(row["Text Phrase"]).strip()

# We are creating a 3-dimensional dictionary for fast lookup of names
self.add_text(office_code, genre_code, position_code, text)

def get_text(self, office_code, genre_code, position_code):
OFFICE_CODES = {
"V": "First Vespers",
"C": "Compline",
"M": "Matins",
"L": "Lauds",
"P": "Prime",
"T": "Terce",
"S": "Sext",
"N": "None",
"V2": "Second Vespers",
"MI": "Mass",
"MI1": "First Mass",
"MI2": "Second Mass",
"MI3": "Third Mass",
"D": "Day Hours",
"R": "Memorial",
"E": "Antiphons for the Magnificat or Benedictus",
"H": "Antiphons based on texts from the Historia",
"CA": "Chapter",
"X": "Supplementary",
}


def expand_office(office_code: str) -> str:
"""
Returns the full name of the office based on the given office code.
:param office_code: The office code.
:return: The full name of the office.
"""
return OFFICE_CODES.get(office_code, "Error")


class PositionExpander:
"""
Loads the position mapping data from a JSON file and provides a method to retrieve
the full text position description based on the given office, genre, and position code.
"""

def __init__(self) -> None:
with open(
os.path.join(
settings.BASE_DIR, "cantusdata", "helpers", "position_mapping.json"
),
"r",
encoding="utf-8",
) as f:
self.position_data_base: dict[str, dict[str, dict[str, str]]] = json.load(f)

def expand_position(
self, office_code: str, genre_code: str, position_code: str
) -> str:
"""
Retrieves the full text position description based on the given office, genre,
and position code.
"""
try:
return self.position_data_base[office_code.strip()][genre_code.strip()][
position_code.strip().lstrip("0").rstrip("._ ")
]
except KeyError:
# If it's not in the dictionary then we just use an empty string
return ""

def add_text(self, office, genre, position, text):
"""
Add a record to self.position_data_base, which is a 3d dictionary.
Raises KeyError if a dictionary position is already taken.
"""
if office in self.position_data_base:
if genre in self.position_data_base[office]:
if position in self.position_data_base[office][genre]:
raise KeyError(
"Position record {0} {1} {2} already set to {3}!".format(
office,
genre,
position,
self.position_data_base[office][genre][position],
)
)
else:
# Position doesn't exist, so we create it
self.position_data_base[office][genre].update({position: text})
else:
# Genre doesn't exist, so we create it and position
self.position_data_base[office].update({genre: {position: text}})
else:
# Office doesn't exist, so we create office, genre, and position
self.position_data_base.update({office: {genre: {position: text}}})

def remove_double_dash(self, text):
"""
Turns double dashes into empty strings
"""
if text.strip() == "--":
return ""
else:
return text
Loading

0 comments on commit cd2464a

Please sign in to comment.