Merge pull request #925 from dchiller/i900-refactor-expandr

Refactor `helpers/expandr.py`
DDMAL · Jan 9, 2025 · cd2464a · cd2464a
2 parents 427669e + 40cbbef
commit cd2464a
Show file tree

Hide file tree

Showing 7 changed files with 511 additions and 483 deletions.
diff --git a/app/Dockerfile b/app/Dockerfile
@@ -1,6 +1,6 @@
 # syntax=docker/dockerfile:1
 # Download and install python dependencies in a container
-FROM python:3.12.3 as dependency-install-container
+FROM python:3.12.3 AS dependency-install-container
 ARG DEVELOPMENT
 COPY ./poetry.lock ./pyproject.toml ./app/install-packages.sh /code/
 WORKDIR /code

diff --git a/app/public/cantusdata/helpers/expandr.py b/app/public/cantusdata/helpers/expandr.py
@@ -1,152 +1,148 @@
-from cantusdata.settings import BASE_DIR
-from cantusdata.helpers.scrapers.genre import genres
-
-import csv
-import urllib.request, urllib.error, urllib.parse
-import re
+import json
 import os
+import requests
+
+from django.conf import settings
 
 
-def expand_mode(mode_code):
-    input_list = mode_code.strip()
+def expand_mode(mode_code: str) -> str:
+    """
+    Translate non-numeric components of a CantusDB mode code into human-readable form.
+
+    :param mode_code str: A CantusDB mode code
+    :return: A human-readable translation of the mode code
+    """
+    mode_code_stripped = mode_code.strip()
     mode_output = []
-    if "1" in input_list:
-        mode_output.append("1")
-    if "2" in input_list:
-        mode_output.append("2")
-    if "3" in input_list:
-        mode_output.append("3")
-    if "4" in input_list:
-        mode_output.append("4")
-    if "5" in input_list:
-        mode_output.append("5")
-    if "6" in input_list:
-        mode_output.append("6")
-    if "7" in input_list:
-        mode_output.append("7")
-    if "8" in input_list:
-        mode_output.append("8")
-    if "*" in input_list:
-        mode_output.append("No music")
-    if "r" in input_list:
-        mode_output.append("Formulaic")
-    if "?" in input_list:
-        mode_output.append("Uncertain")
-    if "S" in input_list:
-        mode_output.append("Responsory (special)")
-    if "T" in input_list:
-        mode_output.append("Chant in Transposition")
+    mode_nums = {"1", "2", "3", "4", "5", "6", "7", "8"}
+    for char in mode_code_stripped:
+        if char in mode_nums:
+            mode_output.append(char)
+            continue
+        match char:
+            case "*":
+                mode_output.append("No music")
+            case "r":
+                mode_output.append("Formulaic")
+            case "?":
+                mode_output.append("Uncertain")
+            case "S":
+                mode_output.append("Responsory (special)")
+            case "T":
+                mode_output.append("Chant in Transposition")
     outstring = " ".join(mode_output)
     return outstring
 
 
-def expand_genre(genre_code):
-    if genre_code in genres:
-        description = genres[genre_code]
+class GenreExpander:
+    """
+    Loads the genre mapping from the CantusDB API and provides a method to retrieve
+    the full text genre description based on the given genre code.
+    """
+
+    cantus_db_api_endpoint = "https://cantusdatabase.org/genres"
+    request_headers = {"Accept": "application/json"}
+
+    def __init__(self) -> None:
+        self.genre_data = self.load_genre_data()
+
+    def load_genre_data(self) -> dict[str, str]:
+        """
+        Loads the genre list from the CantusDB API and returns a dictionary mapping
+        genre codes to genre descriptions.
+        """
+        response = requests.get(
+            self.cantus_db_api_endpoint, headers=self.request_headers, timeout=5
+        )
+        response.raise_for_status()
+        genre_map: dict[str, str] = {
+            x["name"]: x["description"] for x in response.json()["genres"]
+        }
+        return genre_map
+
+    def expand_genre(self, genre_code: str) -> str:
+        """
+        Gets the genre description based on the genre code.
+        """
+        if not genre_code in self.genre_data:
+            return genre_code
+
+        description = self.genre_data[genre_code]
         # some extra stuff in parentheses is showing up
         paren = description.find("(")
         return description[: paren - 1] if paren != -1 else description
 
-    # If nothing was found, return the original
-    return genre_code
-
 
-def expand_differentia(differentia_code):
+def expand_differentia(differentia_code: str) -> str:
     """
     In most cases, the differentia remains unmodified
 
-    :param differentia_code:
-    :return:
+    :param differentia_code: The differentia.
+    :return str: "No differentia" if no differentia is present, otherwise the differentia.
     """
     return "No differentia" if "*" in differentia_code else differentia_code
 
 
-def expand_office(office_code):
-    return {
-        "V": "First Vespers",
-        "C": "Compline",
-        "M": "Matins",
-        "L": "Lauds",
-        "P": "Prime",
-        "T": "Terce",
-        "S": "Sext",
-        "N": "None",
-        "V2": "Second Vespers",
-        "MI": "Mass",
-        "MI1": "First Mass",
-        "MI2": "Second Mass",
-        "MI3": "Third Mass",
-        "D": "Day Hours",
-        "R": "Memorial",
-        "E": "Antiphons for the Magnificat or Benedictus",
-        "H": "Antiphons based on texts from the Historia",
-        "CA": "Chapter",
-        "X": "Supplementary",
-    }.get(office_code, "Error")
-
-
-class PositionExpander(object):
-    position_data_base = None
-
-    def __init__(self):
-        self.csv_file = csv.DictReader(
-            open(os.path.join(BASE_DIR, "data_dumps", "position_names.csv"))
-        )
-        self.position_data_base = dict()
-        for row in self.csv_file:
-            office_code = self.remove_double_dash(row["Office"]).strip()
-            genre_code = self.remove_double_dash(row["Genre"]).strip()
-            position_code = (
-                self.remove_double_dash(row["Position"])
-                .strip()
-                .lstrip("0")
-                .rstrip("._ ")
-            )
-            text = self.remove_double_dash(row["Text Phrase"]).strip()
-
-            # We are creating a 3-dimensional dictionary for fast lookup of names
-            self.add_text(office_code, genre_code, position_code, text)
-
-    def get_text(self, office_code, genre_code, position_code):
+OFFICE_CODES = {
+    "V": "First Vespers",
+    "C": "Compline",
+    "M": "Matins",
+    "L": "Lauds",
+    "P": "Prime",
+    "T": "Terce",
+    "S": "Sext",
+    "N": "None",
+    "V2": "Second Vespers",
+    "MI": "Mass",
+    "MI1": "First Mass",
+    "MI2": "Second Mass",
+    "MI3": "Third Mass",
+    "D": "Day Hours",
+    "R": "Memorial",
+    "E": "Antiphons for the Magnificat or Benedictus",
+    "H": "Antiphons based on texts from the Historia",
+    "CA": "Chapter",
+    "X": "Supplementary",
+}
+
+
+def expand_office(office_code: str) -> str:
+    """
+    Returns the full name of the office based on the given office code.
+
+    :param office_code: The office code.
+    :return: The full name of the office.
+    """
+    return OFFICE_CODES.get(office_code, "Error")
+
+
+class PositionExpander:
+    """
+    Loads the position mapping data from a JSON file and provides a method to retrieve
+    the full text position description based on the given office, genre, and position code.
+    """
+
+    def __init__(self) -> None:
+        with open(
+            os.path.join(
+                settings.BASE_DIR, "cantusdata", "helpers", "position_mapping.json"
+            ),
+            "r",
+            encoding="utf-8",
+        ) as f:
+            self.position_data_base: dict[str, dict[str, dict[str, str]]] = json.load(f)
+
+    def expand_position(
+        self, office_code: str, genre_code: str, position_code: str
+    ) -> str:
+        """
+        Retrieves the full text position description based on the given office, genre,
+        and position code.
+        """
         try:
             return self.position_data_base[office_code.strip()][genre_code.strip()][
                 position_code.strip().lstrip("0").rstrip("._ ")
             ]
         except KeyError:
             # If it's not in the dictionary then we just use an empty string
             return ""
-
-    def add_text(self, office, genre, position, text):
-        """
-        Add a record to self.position_data_base, which is a 3d dictionary.
-        Raises KeyError if a dictionary position is already taken.
-        """
-        if office in self.position_data_base:
-            if genre in self.position_data_base[office]:
-                if position in self.position_data_base[office][genre]:
-                    raise KeyError(
-                        "Position record {0} {1} {2} already set to {3}!".format(
-                            office,
-                            genre,
-                            position,
-                            self.position_data_base[office][genre][position],
-                        )
-                    )
-                else:
-                    # Position doesn't exist, so we create it
-                    self.position_data_base[office][genre].update({position: text})
-            else:
-                # Genre doesn't exist, so we create it and position
-                self.position_data_base[office].update({genre: {position: text}})
-        else:
-            # Office doesn't exist, so we create office, genre, and position
-            self.position_data_base.update({office: {genre: {position: text}}})
-
-    def remove_double_dash(self, text):
-        """
-        Turns double dashes into empty strings
-        """
-        if text.strip() == "--":
-            return ""
-        else:
-            return text