Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor helpers/expandr.py #925

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:1
# Download and install python dependencies in a container
FROM python:3.12.3 as dependency-install-container
FROM python:3.12.3 AS dependency-install-container
ARG DEVELOPMENT
COPY ./poetry.lock ./pyproject.toml ./app/install-packages.sh /code/
WORKDIR /code
Expand Down
246 changes: 121 additions & 125 deletions app/public/cantusdata/helpers/expandr.py
Original file line number Diff line number Diff line change
@@ -1,152 +1,148 @@
from cantusdata.settings import BASE_DIR
from cantusdata.helpers.scrapers.genre import genres

import csv
import urllib.request, urllib.error, urllib.parse
import re
import json
import os
import requests

from django.conf import settings


def expand_mode(mode_code):
input_list = mode_code.strip()
def expand_mode(mode_code: str) -> str:
"""
Translate non-numeric components of a CantusDB mode code into human-readable form.

:param mode_code str: A CantusDB mode code
:return: A human-readable translation of the mode code
"""
mode_code_stripped = mode_code.strip()
mode_output = []
if "1" in input_list:
mode_output.append("1")
if "2" in input_list:
mode_output.append("2")
if "3" in input_list:
mode_output.append("3")
if "4" in input_list:
mode_output.append("4")
if "5" in input_list:
mode_output.append("5")
if "6" in input_list:
mode_output.append("6")
if "7" in input_list:
mode_output.append("7")
if "8" in input_list:
mode_output.append("8")
if "*" in input_list:
mode_output.append("No music")
if "r" in input_list:
mode_output.append("Formulaic")
if "?" in input_list:
mode_output.append("Uncertain")
if "S" in input_list:
mode_output.append("Responsory (special)")
if "T" in input_list:
mode_output.append("Chant in Transposition")
mode_nums = {"1", "2", "3", "4", "5", "6", "7", "8"}
for char in mode_code_stripped:
if char in mode_nums:
mode_output.append(char)
continue
match char:
case "*":
mode_output.append("No music")
case "r":
mode_output.append("Formulaic")
case "?":
mode_output.append("Uncertain")
case "S":
mode_output.append("Responsory (special)")
case "T":
mode_output.append("Chant in Transposition")
outstring = " ".join(mode_output)
return outstring


def expand_genre(genre_code):
if genre_code in genres:
description = genres[genre_code]
class GenreExpander:
"""
Loads the genre mapping from the CantusDB API and provides a method to retrieve
the full text genre description based on the given genre code.
"""

cantus_db_api_endpoint = "https://cantusdatabase.org/genres"
request_headers = {"Accept": "application/json"}

def __init__(self) -> None:
self.genre_data = self.load_genre_data()

def load_genre_data(self) -> dict[str, str]:
"""
Loads the genre list from the CantusDB API and returns a dictionary mapping
genre codes to genre descriptions.
"""
response = requests.get(
self.cantus_db_api_endpoint, headers=self.request_headers, timeout=5
)
response.raise_for_status()
genre_map: dict[str, str] = {
x["name"]: x["description"] for x in response.json()["genres"]
}
return genre_map

def expand_genre(self, genre_code: str) -> str:
"""
Gets the genre description based on the genre code.
"""
if not genre_code in self.genre_data:
return genre_code

description = self.genre_data[genre_code]
# some extra stuff in parentheses is showing up
paren = description.find("(")
return description[: paren - 1] if paren != -1 else description

# If nothing was found, return the original
return genre_code


def expand_differentia(differentia_code):
def expand_differentia(differentia_code: str) -> str:
"""
In most cases, the differentia remains unmodified

:param differentia_code:
:return:
:param differentia_code: The differentia.
:return str: "No differentia" if no differentia is present, otherwise the differentia.
"""
return "No differentia" if "*" in differentia_code else differentia_code


def expand_office(office_code):
return {
"V": "First Vespers",
"C": "Compline",
"M": "Matins",
"L": "Lauds",
"P": "Prime",
"T": "Terce",
"S": "Sext",
"N": "None",
"V2": "Second Vespers",
"MI": "Mass",
"MI1": "First Mass",
"MI2": "Second Mass",
"MI3": "Third Mass",
"D": "Day Hours",
"R": "Memorial",
"E": "Antiphons for the Magnificat or Benedictus",
"H": "Antiphons based on texts from the Historia",
"CA": "Chapter",
"X": "Supplementary",
}.get(office_code, "Error")


class PositionExpander(object):
position_data_base = None

def __init__(self):
self.csv_file = csv.DictReader(
open(os.path.join(BASE_DIR, "data_dumps", "position_names.csv"))
)
self.position_data_base = dict()
for row in self.csv_file:
office_code = self.remove_double_dash(row["Office"]).strip()
genre_code = self.remove_double_dash(row["Genre"]).strip()
position_code = (
self.remove_double_dash(row["Position"])
.strip()
.lstrip("0")
.rstrip("._ ")
)
text = self.remove_double_dash(row["Text Phrase"]).strip()

# We are creating a 3-dimensional dictionary for fast lookup of names
self.add_text(office_code, genre_code, position_code, text)

def get_text(self, office_code, genre_code, position_code):
OFFICE_CODES = {
"V": "First Vespers",
"C": "Compline",
"M": "Matins",
"L": "Lauds",
"P": "Prime",
"T": "Terce",
"S": "Sext",
"N": "None",
"V2": "Second Vespers",
"MI": "Mass",
"MI1": "First Mass",
"MI2": "Second Mass",
"MI3": "Third Mass",
"D": "Day Hours",
"R": "Memorial",
"E": "Antiphons for the Magnificat or Benedictus",
"H": "Antiphons based on texts from the Historia",
"CA": "Chapter",
"X": "Supplementary",
}


def expand_office(office_code: str) -> str:
"""
Returns the full name of the office based on the given office code.

:param office_code: The office code.
:return: The full name of the office.
"""
return OFFICE_CODES.get(office_code, "Error")


class PositionExpander:
"""
Loads the position mapping data from a JSON file and provides a method to retrieve
the full text position description based on the given office, genre, and position code.
"""

def __init__(self) -> None:
with open(
os.path.join(
settings.BASE_DIR, "cantusdata", "helpers", "position_mapping.json"
),
"r",
encoding="utf-8",
) as f:
self.position_data_base: dict[str, dict[str, dict[str, str]]] = json.load(f)

def expand_position(
self, office_code: str, genre_code: str, position_code: str
) -> str:
"""
Retrieves the full text position description based on the given office, genre,
and position code.
"""
try:
return self.position_data_base[office_code.strip()][genre_code.strip()][
position_code.strip().lstrip("0").rstrip("._ ")
]
except KeyError:
# If it's not in the dictionary then we just use an empty string
return ""

def add_text(self, office, genre, position, text):
"""
Add a record to self.position_data_base, which is a 3d dictionary.
Raises KeyError if a dictionary position is already taken.
"""
if office in self.position_data_base:
if genre in self.position_data_base[office]:
if position in self.position_data_base[office][genre]:
raise KeyError(
"Position record {0} {1} {2} already set to {3}!".format(
office,
genre,
position,
self.position_data_base[office][genre][position],
)
)
else:
# Position doesn't exist, so we create it
self.position_data_base[office][genre].update({position: text})
else:
# Genre doesn't exist, so we create it and position
self.position_data_base[office].update({genre: {position: text}})
else:
# Office doesn't exist, so we create office, genre, and position
self.position_data_base.update({office: {genre: {position: text}}})

def remove_double_dash(self, text):
"""
Turns double dashes into empty strings
"""
if text.strip() == "--":
return ""
else:
return text
Loading
Loading