diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parser.py b/app/public/cantusdata/helpers/mei_processing/mei_parser.py
index 43ffebc3f..debfad7d5 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_parser.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_parser.py
@@ -8,20 +8,20 @@
between two neume components.
- get_contour_from_interval: Computes the contour of an interval.
- analyze_neume: Analyzes a neume (a list of neume components) to determine its
- neume type, its intervals, and its contour.
+ neume name, its intervals, and its contour.
Defines associated types for the data structures used by the parser.
"""
from typing import Tuple, Dict, List, Iterator, Optional
-from lxml import etree
+from lxml import etree # pylint: disable=no-name-in-module
from .mei_parsing_types import (
Zone,
SyllableText,
NeumeComponentElementData,
NeumeComponent,
ContourType,
- NeumeType,
+ NeumeName,
Neume,
Syllable,
)
@@ -31,24 +31,24 @@
PITCH_CLASS = {"c": 0, "d": 2, "e": 4, "f": 5, "g": 7, "a": 9, "b": 11}
# Mapping from neume contours to neume names
-NEUME_GROUPS: Dict[str, NeumeType] = {
- "": "Punctum",
- "u": "Pes",
- "d": "Clivis",
- "uu": "Scandicus",
- "ud": "Torculus",
- "du": "Porrectus",
- "s": "Distropha",
- "ss": "Tristopha",
- "sd": "Pressus",
- "dd": "Climacus",
- "ddu": "Climacus resupinus",
- "udu": "Torculus resupinus",
- "dud": "Porrectus flexus",
- "udd": "Pes subpunctis",
- "uud": "Scandicus flexus",
- "uudd": "Scandicus subpunctis",
- "dudd": "Porrectus subpunctis",
+NEUME_GROUPS: Dict[str, NeumeName] = {
+ "": "punctum",
+ "u": "pes",
+ "d": "clivis",
+ "uu": "scandicus",
+ "ud": "torculus",
+ "du": "porrectus",
+ "r": "distropha",
+ "rr": "tristopha",
+ "rd": "pressus",
+ "dd": "climacus",
+ "ddu": "climacus_resupinus",
+ "udu": "torculus_resupinus",
+ "dud": "porrectus_flexus",
+ "udd": "pes_subpunctis",
+ "uud": "scandicus_flexus",
+ "uudd": "scandicus_subpunctis",
+ "dudd": "porrectus_subpunctis",
}
@@ -75,6 +75,7 @@ class MEIParser:
def __init__(self, mei_file: str):
self.mei_file = mei_file
self.mei = etree.parse(self.mei_file)
+ self._remove_empty_neumes_and_syllables()
self.zones = self.parse_zones()
self.syllables = self.parse_mei()
@@ -182,7 +183,7 @@ def _parse_neume(
)
if parsed_neume_component:
parsed_nc_elements.append(parsed_neume_component)
- neume_type, intervals, contours = analyze_neume(parsed_nc_elements)
+ neume_name, intervals, contours = analyze_neume(parsed_nc_elements)
# If the first neume component of the next syllable can be parsed,
# add the interval and contour between the final neume component of
# the current syllable and the first neume component of the next syllable.
@@ -193,7 +194,7 @@ def _parse_neume(
if parsed_next_neume_comp:
last_neume_comp = parsed_nc_elements[-1]
intervals.append(
- get_interval_between_neume_components(
+ get_semitones_between_neume_components(
last_neume_comp, parsed_next_neume_comp
)
)
@@ -211,12 +212,13 @@ def _parse_neume(
"pname": nc["pname"],
"octave": nc["octave"],
"bounding_box": nc["bounding_box"],
- "interval": intervals[i] if i < len(intervals) else None,
+ "semitone_interval": intervals[i] if i < len(intervals) else None,
"contour": contours[i] if i < len(contours) else None,
+ "system": neume_system,
}
)
parsed_neume: Neume = {
- "neume_type": neume_type,
+ "neume_name": neume_name,
"neume_components": parsed_neume_components,
"bounding_box": combined_bounding_box,
"system": neume_system,
@@ -323,6 +325,26 @@ def _syllable_iterator(
system += 1
current_elem = next(elem_iterator, None)
+ def _remove_empty_neumes_and_syllables(self) -> None:
+ """
+ Apparently, for a while Rodan was creating invalid MEI files that
+ contained empty neumes (i.e., neumes with no neume components) and
+ empty syllables (i.e., syllables with no neumes or only empty neumes).
+ This method removes those empty neumes and syllables from the MEI being parsed;
+ it was added as a preprocessing step so that it can, once the base
+ MEI files are corrected, be removed.
+ """
+ for neume in self.mei.iter(f"{self.MEINS}neume"):
+ if len(neume.findall(f"{self.MEINS}nc")) == 0:
+ # Ignoring type because we know that getparent() will
+ # return an element in this case.
+ neume.getparent().remove(neume) # type: ignore
+ for syllable in self.mei.iter(f"{self.MEINS}syllable"):
+ if len(syllable.findall(f"{self.MEINS}neume")) == 0:
+ # Ignoring type because we know that getparent() will
+ # return an element in this case.
+ syllable.getparent().remove(syllable) # type: ignore
+
def parse_mei(self) -> List[Syllable]:
"""
Parses the MEI file into a list of syllables.
@@ -351,7 +373,7 @@ def parse_mei(self) -> List[Syllable]:
return syllables
-def get_interval_between_neume_components(
+def get_semitones_between_neume_components(
neume_component_1: NeumeComponentElementData,
neume_component_2: NeumeComponentElementData,
) -> int:
@@ -369,8 +391,8 @@ def get_interval_between_neume_components(
try:
pc1 = PITCH_CLASS[neume_component_1["pname"]]
pc2 = PITCH_CLASS[neume_component_2["pname"]]
- except KeyError:
- raise ValueError("Invalid pitch name in neume component.")
+ except KeyError as err:
+ raise ValueError("Invalid pitch name in neume component.") from err
# In MIDI note numbers, C0 = 12.
pitch_1 = pc1 + (12 * (neume_component_1["octave"] + 1))
pitch_2 = pc2 + (12 * (neume_component_2["octave"] + 1))
@@ -382,34 +404,36 @@ def get_contour_from_interval(interval: int) -> ContourType:
Compute the contour of an interval.
:param interval: The size of the interval in semitones
- :return: The contour of the interval ("u"[p], "d"[own], or "s"[tay])
+ :return: The contour of the interval ("u"[p], "d"[own], or "r"[epeat])
"""
if interval < 0:
return "d"
if interval > 0:
return "u"
- return "s"
+ return "r"
def analyze_neume(
neume: List[NeumeComponentElementData],
-) -> Tuple[NeumeType, List[int], List[ContourType]]:
+) -> Tuple[NeumeName, List[int], List[ContourType]]:
"""
Analyze a neume (a list of neume components) to determine:
- - Neume type
- - Neume intervals
- - Neume contour
+ - The neume type (e.g., punctum, pes, clivis, etc.)
+ - The intervals in the neume in semitones
+ - The contour of the nueme
:param neume: A list of neume components (a list of NeumeComponentsType dictionaries)
:return: A tuple of information about the neume:
- Neume type (str)
- - Neume intervals (list of ints)
- - Neume contour (list of "u"[p], "d"[own], or "s"[tay])
+ - Neume intervals in semitones (list of ints)
+ - Neume contour (list of "u"[p], "d"[own], or "r"[epeat])
"""
- intervals: List[int] = [
- get_interval_between_neume_components(nc1, nc2)
+ semitone_intervals: List[int] = [
+ get_semitones_between_neume_components(nc1, nc2)
for nc1, nc2 in zip(neume[:-1], neume[1:])
]
- contours: List[ContourType] = [get_contour_from_interval(i) for i in intervals]
- neume_type: NeumeType = NEUME_GROUPS.get("".join(contours), "Compound")
- return neume_type, intervals, contours
+ contours: List[ContourType] = [
+ get_contour_from_interval(i) for i in semitone_intervals
+ ]
+ neume_type: NeumeName = NEUME_GROUPS.get("".join(contours), "compound")
+ return neume_type, semitone_intervals, contours
diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
index 2f47aec9d..ff7507b03 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
@@ -2,7 +2,7 @@
Contains type definitions used in the MEI parsing process.
"""
-from typing import Tuple, TypedDict, Literal, List, Optional
+from typing import Tuple, TypedDict, Literal, List, Optional, NotRequired
from typing_extensions import TypeAlias
# A type for coordinates of bounding boxes
@@ -30,26 +30,26 @@ class Zone(TypedDict):
rotate: float
-ContourType = Literal["u", "d", "s"]
-NeumeType = Literal[
- "Punctum",
- "Pes",
- "Clivis",
- "Scandicus",
- "Torculus",
- "Porrectus",
- "Distropha",
- "Tristopha",
- "Pressus",
- "Climacus",
- "Climacus resupinus",
- "Torculus resupinus",
- "Porrectus flexus",
- "Pes subpunctis",
- "Scandicus flexus",
- "Scandicus subpunctis",
- "Porrectus subpunctis",
- "Compound",
+ContourType = Literal["u", "d", "r"]
+NeumeName = Literal[
+ "punctum",
+ "pes",
+ "clivis",
+ "scandicus",
+ "torculus",
+ "porrectus",
+ "distropha",
+ "tristopha",
+ "pressus",
+ "climacus",
+ "climacus_resupinus",
+ "torculus_resupinus",
+ "porrectus_flexus",
+ "pes_subpunctis",
+ "scandicus_flexus",
+ "scandicus_subpunctis",
+ "porrectus_subpunctis",
+ "compound",
]
@@ -74,27 +74,29 @@ class NeumeComponent(NeumeComponentElementData):
"""A type extending NeumeComponentElementData with interval and contour information.
- interval: The interval (in semitones) between the neume component and the
+ semitone_interval: The interval in semitones between the neume component and the
following neume component. If there is no following neume component,
this is None.
- contour: The contour ("u"[p], "d"[own], or "s"[tay]) of 'interval'. If there is no
+ contour: The contour ("u"[p], "d"[own], or "r"[epeat]) of 'interval'. If there is no
following neume component, this is None.
+ system: The system number that the neume component is on
"""
- interval: Optional[int]
+ semitone_interval: Optional[int]
contour: Optional[ContourType]
+ system: int
class Neume(TypedDict):
"""A type for neumes
- neume_type: The name of the neume (ie. "Punctum", "Pes", "Clivis", etc.)
+ neume_name: The name of the neume (ie. "punctum", "pes", "clivis", etc.)
neume_components: A list of neume components (containing pitch infomation)
bounding_box: The bounding box of the neume
system: The system number that the neume is on
"""
- neume_type: NeumeType
+ neume_name: NeumeName
neume_components: List[NeumeComponent]
bounding_box: Zone
system: int
@@ -112,3 +114,41 @@ class Syllable(TypedDict):
text: SyllableText
neumes: List[Neume]
+
+
+class NgramDocument(TypedDict):
+ """
+ A generic type for documents containing n-grams
+ of information extracted from MEI files.
+
+ ngram_unit: The unit of the n-gram
+ location: The location of the n-gram in the MEI file (MEI Zones
+ converted to JSON strings according to bounding_box_utils.stringify_bounding_boxes)
+ pitch_names: A string containing the pitch names of the neume components in the n-gram,
+ separated by underscores.
+ contour: A string containing the contours of the neume components in the n-gram, separated
+ by underscores.
+ semitone_interval: A string containing the semitone intervals between the neume components
+ in the n-gram, separated by underscores.
+ neume_names: A string containing the names of the neumes in the n-gram,
+ separated by underscores. This field is not required, and is only present when
+ the n-gram contains complete neumes.
+
+ The following may be part of an NgramDocument, but are optional because
+ they will be added when the document is indexed:
+ manuscript_id: The ID of the manuscript the n-gram belongs to.
+ folio_number: The number of the folio on which the n-gram exists.
+ id: The unique ID of the document (corresponds to solr schema's id field)
+ type: The type of the document (corresponds to solr schema's type field)
+ """
+
+ location: str
+ pitch_names: str
+ contour: str
+ semitone_intervals: str
+ neume_names: NotRequired[str]
+ manuscript_id: NotRequired[str]
+ folio: NotRequired[str]
+ id: NotRequired[str]
+ type: NotRequired[Literal["omr_ngram"]]
+ image_uri: NotRequired[str]
diff --git a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
index 7ae5b8c8d..12a71dc5e 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
@@ -4,72 +4,19 @@
can then be indexed by a search engine (i.e. for this project, Solr).
"""
-from typing import List, Iterator, Any, TypedDict, Literal
+import uuid
+from typing import List, Tuple, Optional, Never, Union
from .mei_parser import MEIParser
-from .mei_parsing_types import Neume, NeumeComponent
+from .mei_parsing_types import (
+ Neume,
+ NeumeComponent,
+ ContourType,
+ NeumeName,
+ NgramDocument,
+ Zone,
+)
from .bounding_box_utils import combine_bounding_boxes, stringify_bounding_boxes
-NgramUnitType = Literal["neume", "neume_component"]
-
-
-class NgramDocument(TypedDict):
- """
- A generic type for documents containing n-grams
- of information extracted from MEI files.
-
- ngram_unit: The unit of the n-gram
- location: The location of the n-gram in the MEI file (MEI Zones
- converted to JSON strings according to bounding_box_utils.stringify_bounding_boxes)
- """
-
- ngram_unit: NgramUnitType
- location: str
-
-
-class NeumeNgramDocument(NgramDocument):
- """
- A type for documents containing n-grams of neume-level information.
-
- neume_names: A string containing the names of the neumes in the n-gram,
- separated by underscores.
- """
-
- neume_names: str
-
-
-class NeumeComponentNgramDocument(NgramDocument):
- """
- A type for documents containing n-grams of neume component-level information.
-
- pitch_names: A string containing the pitch names of the neume components in the n-gram,
- separated by underscores.
- intervals: A string containing the intervals between the neume components in the n-gram,
- separated by underscores.
- contours: A string containing the contours of the neume components in the n-gram, separated
- by underscores.
- """
-
- pitch_names: str
- intervals: str
- contours: str
-
-
-def generate_ngrams(sequence: List[Any], min_n: int, max_n: int) -> Iterator[List[Any]]:
- """
- Generate n-grams from a sequence (list) of items.
-
- :param sequence: A list of items to generate n-grams from.
- :param min_gram: The minimum length of n-grams to generate.
- :param max_gram: The maximum length of n-grams to generate.
- :yield: A list containing the subset of consecutive items
- that make up an n-gram.
- """
- # Iterate through all desired n-gram lengths
- for i in range(min_n, max_n + 1):
- # Iterate through all n-grams of "sequence" of length "i"
- for j in range(0, len(sequence) - i + 1):
- yield sequence[j : j + i]
-
class MEITokenizer(MEIParser):
"""
@@ -85,67 +32,178 @@ def __init__(self, mei_file: str, min_ngram: int, max_ngram: int) -> None:
self.min_ngram = min_ngram
self.max_ngram = max_ngram
- def get_neume_ngram_docs(self) -> List[NeumeNgramDocument]:
+ @property
+ def flattened_neumes(self) -> List[Neume]:
"""
- Generate neume-level documents for search, containing
- n-grams of neume names.
+ Flatten the neumes contained in the syllables of the MEI file.
- :return: A list of dictionaries containing the n-grams
- of neume names.
+ :return: A list of neumes.
"""
- neumes_sequence: List[Neume] = []
+ neumes: List[Neume] = []
for syllable in self.syllables:
- neumes_sequence.extend(syllable["neumes"])
- neume_documents: List[NeumeNgramDocument] = []
- for ngram in generate_ngrams(neumes_sequence, self.min_ngram, self.max_ngram):
- bounding_boxes = [
- (neume["bounding_box"], neume["system"]) for neume in ngram
- ]
- document_location = combine_bounding_boxes(bounding_boxes)
- neume_names = "_".join([neume["neume_type"] for neume in ngram])
- neume_documents.append(
- {
- "ngram_unit": "neume",
- "location": stringify_bounding_boxes(document_location),
- "neume_names": neume_names,
- }
- )
- return neume_documents
+ neumes.extend(syllable["neumes"])
+ return neumes
- def get_neume_component_ngram_docs(self) -> List[NeumeComponentNgramDocument]:
+ def _stringify_neume_component_data(
+ self,
+ neume_components: List[NeumeComponent],
+ ) -> Tuple[str, str, str]:
"""
- Generate neume component-level documents for search, containing
- n-grams of pitch names, intervals, and contours.
+ Convert pitch, contour, and interval information from a list of
+ neume components into strings.
- :return: A list of dictionaries containing the n-grams
- of pitch names, intervals, and contours.
+ :param neume_components: A list of neumes or neume components to convert into strings.
+ :return: A tuple containing the pitch names, contours, and intervals
+ of the neumes or neume components as strings, separated by underscores.
"""
- neume_components: List[NeumeComponent] = []
- for syllable in self.syllables:
- for neume in syllable["neumes"]:
- neume_components.extend(neume["neume_components"])
- neume_component_documents: List[NeumeComponentNgramDocument] = []
- for ngram in generate_ngrams(
- neume_components,
- self.min_ngram,
- self.max_ngram,
- ):
- pitch_names = "_".join([comp["pname"] for comp in ngram])
- # Keep "internal" intervals and contours (in other words,
- # the intevals and countours between the pitches in these
- # neume components, and not the interval and contour following
- # the last pitch in the ngram).
- intervals = [str(comp["interval"]) for comp in ngram[:-1]]
- contours = [comp["contour"] for comp in ngram[:-1]]
- bounding_boxes = [(comp["bounding_box"], neume["system"]) for comp in ngram]
- document_location = combine_bounding_boxes(bounding_boxes)
- neume_component_documents.append(
- {
- "ngram_unit": "neume_component",
- "location": stringify_bounding_boxes(document_location),
- "pitch_names": pitch_names,
- "intervals": "_".join(intervals),
- "contours": "_".join(contours),
- }
+ pnames: List[str] = []
+ contours: List[ContourType] = []
+ semitone_intervals: List[str] = []
+ for idx, nc in enumerate(neume_components):
+ pnames.append(nc["pname"])
+ # The interval is None if and only if the countour is None,
+ # so we can safely do this single check.
+ if nc["contour"] is not None and idx != len(neume_components) - 1:
+ contours.append(nc["contour"])
+ semitone_intervals.append(str(nc["semitone_interval"]))
+ return "_".join(pnames), "_".join(contours), "_".join(semitone_intervals)
+
+ def _create_document_from_neume_components(
+ self,
+ neume_components: List[NeumeComponent],
+ ) -> NgramDocument:
+ """
+ Create an NgramDocument from a list of neume components and
+ their corresponding system numbers.
+
+ :param ncs_with_sys: A list of tuples, each containing a neume component
+ and the system number of that neume component.
+ :return: An NgramDocument containing the information from the neume components.
+ """
+ pitch_names, contour, intervals = self._stringify_neume_component_data(
+ neume_components
+ )
+ zones_with_sys: List[Tuple[Zone, int]] = [
+ (nc["bounding_box"], nc["system"]) for nc in neume_components
+ ]
+ location: str = stringify_bounding_boxes(combine_bounding_boxes(zones_with_sys))
+ return {
+ "location": location,
+ "pitch_names": pitch_names,
+ "contour": contour,
+ "semitone_intervals": intervals,
+ "id": str(uuid.uuid4()),
+ "type": "omr_ngram",
+ }
+
+ def _create_pitch_sequences(
+ self,
+ ) -> Tuple[List[NeumeComponent], List[Optional[NeumeName]]]:
+ """
+ Create two lists of equal length: one containing
+ the pitches (neume components) contained in the parsed file,
+ and the other containing the names of the neumes that begin
+ at each pitch (or None if no neume begins at that pitch).
+
+ :return: A tuple containing the list of pitches and the list of neume names.
+ """
+ neume_sequence = self.flattened_neumes
+ neume_names: List[Optional[NeumeName]] = []
+ ncs: List[NeumeComponent] = []
+ for neume in neume_sequence:
+ ncs.extend(neume["neume_components"])
+ flattened_neume_names = [neume["neume_name"]] + [None] * (
+ len(neume["neume_components"]) - 1
)
- return neume_component_documents
+ neume_names.extend(flattened_neume_names)
+ return ncs, neume_names
+
+ def create_ngram_documents(self) -> List[NgramDocument]:
+ """
+ Create a list of ngram documents from the MEI file,
+ ensuring that we have ngrams that contain n pitches
+ and n neumes for all n in the range min_ngram to max_ngram.
+
+ In broad strokes, the function:
+ - Iterates through the pitches in the document, and creates ngrams
+ of pitches with n = min_ngram, min_ngram + 1, ..., max_ngram.
+ When an ngram corresponds to a set of complete neumes, neume
+ names are included in the ngram document. When it doesn't,
+ no neume names are added.
+ - Checks whether this has created ngrams of length up to max_ngram
+ of complete neumes starting at the current pitch.
+ (Note: this will only be the case if the
+ current pitch begins a sequence of max_ngram consecutive single-
+ pitch neumes).
+ - If this check fails, the function creates remaining ngrams of complete
+ neumes up to max_ngram of complete neumes.
+
+ :return: A list of NgramDocuments.
+ """
+ pitches, neume_names = self._create_pitch_sequences()
+ ngram_docs: List[NgramDocument] = []
+ num_pitches = len(pitches)
+ # At each pitch in the file, we'll generate all the necessary
+ # ngrams that start with that pitch.
+ for start_idx in range(num_pitches):
+ largest_num_neumes = 0
+ for ngram_length in range(self.min_ngram, self.max_ngram + 1):
+ # Collect the pitches for an ngram of ngram_length
+ # pitches starting at start_idx, if we haven't reached the
+ # end of the pitches.
+ end_idx = start_idx + ngram_length
+ if end_idx > num_pitches:
+ break
+ nc_ngram = pitches[start_idx:end_idx]
+ doc = self._create_document_from_neume_components(nc_ngram)
+ # If the pitch at start_idx is the beginning of a neume
+ # and the pitch following this ngram is also the beginning
+ # of a neume (or we've reached the end of the file),
+ # then our current ngram of pitches overlaps
+ # with some number of complete neumes.
+ neume_start = neume_names[start_idx] is not None
+ if neume_start:
+ if end_idx == num_pitches or neume_names[end_idx] is not None:
+ neume_name_list = [
+ nn
+ for nn in neume_names[start_idx:end_idx]
+ if nn is not None
+ ]
+ doc["neume_names"] = "_".join(neume_name_list)
+ largest_num_neumes = len(neume_name_list)
+ ngram_docs.append(doc)
+ # If the current neume component starts a neume and we
+ # haven't reached the maximum ngram length of neumes
+ # in our existing documents, generate documents containing
+ # larger ngrams of neumes until we reach the maximum ngram length.
+ if neume_start and largest_num_neumes < self.max_ngram:
+ min_wanted_ngram_length = max(largest_num_neumes + 1, self.min_ngram)
+ for wanted_ngram_length in range(
+ min_wanted_ngram_length, self.max_ngram + 1
+ ):
+ ngram_neume_names: List[NeumeName] = []
+ ngram_num_pitches = 0
+ # We'll add pitches to our ngram until we have the
+ # number of neumes we want in our ngram or we reach
+ # the end of the file.
+ while (len(ngram_neume_names) <= wanted_ngram_length) and (
+ start_idx + ngram_num_pitches < len(pitches)
+ ):
+ if (
+ name_at_pitch := neume_names[start_idx + ngram_num_pitches]
+ ) is not None and len(ngram_neume_names) < wanted_ngram_length:
+ ngram_neume_names.append(name_at_pitch)
+ ngram_num_pitches += 1
+ if len(ngram_neume_names) == wanted_ngram_length:
+ break
+ # We'll only add this ngram if we've actually gotten to
+ # the desired number of neumes (if we didn't, it means
+ # we reached the end of the file)
+ if len(ngram_neume_names) == wanted_ngram_length:
+ ngram_pitches = pitches[
+ start_idx : start_idx + ngram_num_pitches
+ ]
+ doc = self._create_document_from_neume_components(ngram_pitches)
+ doc["neume_names"] = "_".join(ngram_neume_names)
+ ngram_docs.append(doc)
+ return ngram_docs
diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py
index 63a499da4..ff6a41994 100644
--- a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py
+++ b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py
@@ -4,12 +4,13 @@
from cantusdata.helpers.mei_processing.mei_parser import (
MEIParser,
get_contour_from_interval,
- get_interval_between_neume_components,
+ get_semitones_between_neume_components,
analyze_neume,
)
from cantusdata.helpers.mei_processing.mei_parsing_types import (
NeumeComponentElementData,
Zone,
+ Syllable,
)
@@ -89,7 +90,7 @@ def test_mei_parser(self) -> None:
# Relevant zones (for first syllable and the single neume component in that syllable):
##
##
- expected_first_syllable = {
+ expected_first_syllable: Syllable = {
"text": {
"text": "Ec",
"bounding_box": {
@@ -99,7 +100,7 @@ def test_mei_parser(self) -> None:
},
"neumes": [
{
- "neume_type": "Punctum",
+ "neume_name": "punctum",
"neume_components": [
{
"pname": "d",
@@ -108,8 +109,9 @@ def test_mei_parser(self) -> None:
"coordinates": (2608, 2399, 2678, 2448),
"rotate": 0.0,
},
- "interval": 0,
- "contour": "s",
+ "semitone_interval": 0,
+ "contour": "r",
+ "system": 1,
}
],
"bounding_box": {
@@ -134,7 +136,7 @@ def test_mei_parser(self) -> None:
##
##
##
- expected_last_syllable = {
+ expected_last_syllable: Syllable = {
"text": {
"text": "gil",
"bounding_box": {
@@ -144,7 +146,7 @@ def test_mei_parser(self) -> None:
},
"neumes": [
{
- "neume_type": "Clivis",
+ "neume_name": "clivis",
"neume_components": [
{
"pname": "e",
@@ -153,8 +155,9 @@ def test_mei_parser(self) -> None:
"coordinates": (5037, 7724, 5108, 7774),
"rotate": 0.0,
},
- "interval": -2,
+ "semitone_interval": -2,
"contour": "d",
+ "system": 10,
},
{
"pname": "d",
@@ -163,8 +166,9 @@ def test_mei_parser(self) -> None:
"coordinates": (5104, 7774, 5175, 7824),
"rotate": 0.0,
},
- "interval": None,
+ "semitone_interval": None,
"contour": None,
+ "system": 10,
},
],
"bounding_box": {
@@ -178,29 +182,37 @@ def test_mei_parser(self) -> None:
self.assertEqual(syllables[-1], expected_last_syllable)
def test_get_contour_from_interval(self) -> None:
- self.assertEqual(get_contour_from_interval(0), "s")
+ self.assertEqual(get_contour_from_interval(0), "r")
self.assertEqual(get_contour_from_interval(1), "u")
self.assertEqual(get_contour_from_interval(-3), "d")
- def test_get_interval_between_neume_components(self) -> None:
+ def test_get_semitones_between_neume_components(self) -> None:
with self.subTest("Interval test: ascending P5"):
self.assertEqual(
- get_interval_between_neume_components(self.nc_elem_g3, self.nc_elem_d4),
+ get_semitones_between_neume_components(
+ self.nc_elem_g3, self.nc_elem_d4
+ ),
7,
)
with self.subTest("Interval test: descending P5"):
self.assertEqual(
- get_interval_between_neume_components(self.nc_elem_d4, self.nc_elem_g3),
+ get_semitones_between_neume_components(
+ self.nc_elem_d4, self.nc_elem_g3
+ ),
-7,
)
with self.subTest("Interval test: descending P4"):
self.assertEqual(
- get_interval_between_neume_components(self.nc_elem_g3, self.nc_elem_d3),
+ get_semitones_between_neume_components(
+ self.nc_elem_g3, self.nc_elem_d3
+ ),
-5,
)
with self.subTest("Interval test: descending m6"):
self.assertEqual(
- get_interval_between_neume_components(self.nc_elem_g3, self.nc_elem_b2),
+ get_semitones_between_neume_components(
+ self.nc_elem_g3, self.nc_elem_b2
+ ),
-8,
)
@@ -219,16 +231,16 @@ def test_analyze_neume(self) -> None:
]
neume_components_5 = [self.nc_elem_d4]
with self.subTest("Analyze Pes"):
- self.assertEqual(analyze_neume(neume_components_1), ("Pes", [5], ["u"]))
+ self.assertEqual(analyze_neume(neume_components_1), ("pes", [5], ["u"]))
with self.subTest("Analyze Torculus"):
self.assertEqual(
- analyze_neume(neume_components_2), ("Torculus", [5, -5], ["u", "d"])
+ analyze_neume(neume_components_2), ("torculus", [5, -5], ["u", "d"])
)
with self.subTest("Analyze Clivis"):
- self.assertEqual(analyze_neume(neume_components_3), ("Clivis", [-7], ["d"]))
+ self.assertEqual(analyze_neume(neume_components_3), ("clivis", [-7], ["d"]))
with self.subTest("Analyze Tristropha"):
self.assertEqual(
- analyze_neume(neume_components_4), ("Tristopha", [0, 0], ["s", "s"])
+ analyze_neume(neume_components_4), ("tristopha", [0, 0], ["r", "r"])
)
with self.subTest("Analyze Punctum"):
- self.assertEqual(analyze_neume(neume_components_5), ("Punctum", [], []))
+ self.assertEqual(analyze_neume(neume_components_5), ("punctum", [], []))
diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
index 3a51ca6be..98ca090c0 100644
--- a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
+++ b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
@@ -1,11 +1,9 @@
from unittest import TestCase
from os import path
import json
+from typing import List
from cantusdata.settings import BASE_DIR
-from cantusdata.helpers.mei_processing.mei_tokenizer import (
- MEITokenizer,
- generate_ngrams,
-)
+from cantusdata.helpers.mei_processing.mei_tokenizer import MEITokenizer, NgramDocument
TEST_MEI_FILE = path.join(
@@ -20,34 +18,65 @@
)
-class MEITokenizerTestCase(TestCase):
+def calculate_expected_total_ngrams(
+ mei_file: str, min_ngram: int, max_ngram: int
+) -> int:
+ """
+ Function to calculate the expected number of ngrams created
+ from an MEI file. The function uses the "flattened_neumes" property
+ of the MEITokenizer class, but does not use any functions
+ of that class that create ngrams.
+
+ The expected number of ngrams is calculated as follows:
+ - The number of neume components in the MEI file is calculated
+ and used to determine how many ngrams are created with min_ngram,
+ min_ngram + 1, ..., max_ngram pitches.
+ - There will be an additional ngram created for every sequence of
+ min_ngram, min_ngram + 1, ..., or max_ngram neumes whose commulative
+ number of pitches is greater than max_ngram. We add one to the count of
+ expected ngrams for every such sequence.
+
+
+ """
+ tokenizer = MEITokenizer(mei_file, min_ngram, max_ngram)
+ parsed_neumes = tokenizer.flattened_neumes
+ num_neume_components = sum(
+ len(neume["neume_components"]) for neume in parsed_neumes
+ )
+ # The number of ngrams of pitches for a given n is:
+ # number of neume components - n + 1
+ exp_num_ngrams = sum(
+ max(0, num_neume_components - i + 1) for i in range(min_ngram, max_ngram + 1)
+ )
+ for i in range(min_ngram, max_ngram + 1):
+ for j in range(len(parsed_neumes) - i + 1):
+ if (
+ sum(
+ len(neume["neume_components"]) for neume in parsed_neumes[j : j + i]
+ )
+ > max_ngram
+ ):
+ exp_num_ngrams += 1
+ return exp_num_ngrams
- def test_generate_ngrams(self) -> None:
- with self.subTest("Ngrams from 2 to 3"):
- sequence = [1, 2, 3, 4, 5]
- min_ngram = 2
- max_ngram = 3
- ngrams = list(generate_ngrams(sequence, min_ngram, max_ngram))
- self.assertEqual(
- ngrams,
- [[1, 2], [2, 3], [3, 4], [4, 5], [1, 2, 3], [2, 3, 4], [3, 4, 5]],
- )
- with self.subTest("Ngrams from 3 to 5"):
- sequence = [1, 2, 3, 4, 5]
- min_ngram = 3
- max_ngram = 5
- ngrams = list(generate_ngrams(sequence, min_ngram, max_ngram))
- self.assertEqual(
- ngrams,
- [
- [1, 2, 3],
- [2, 3, 4],
- [3, 4, 5],
- [1, 2, 3, 4],
- [2, 3, 4, 5],
- [1, 2, 3, 4, 5],
- ],
- )
+
+def prepare_tokenizer_results(
+ tokenizer: MEITokenizer,
+) -> List[NgramDocument]:
+ """
+ This function prepares the results of a tokenizer for comparison
+ with expected results by:
+ - removing the unique ID from generated ngram documents
+ - removing the "type" field from generated ngram documents
+ """
+ ngram_docs = tokenizer.create_ngram_documents()
+ for doc in ngram_docs:
+ doc.pop("id")
+ doc.pop("type")
+ return ngram_docs
+
+
+class MEITokenizerTestCase(TestCase):
def test_mei_tokenizer(self) -> None:
tokenizer_1_2 = MEITokenizer(
@@ -55,37 +84,34 @@ def test_mei_tokenizer(self) -> None:
min_ngram=1,
max_ngram=2,
)
- neume_docs_1_2_grams = tokenizer_1_2.get_neume_ngram_docs()
- neume_component_docs_1_2_grams = tokenizer_1_2.get_neume_component_ngram_docs()
+ ngram_docs_1_2 = prepare_tokenizer_results(tokenizer_1_2)
tokenizer_2_3 = MEITokenizer(
TEST_MEI_FILE,
min_ngram=2,
max_ngram=3,
)
- neume_docs_2_3_grams = tokenizer_2_3.get_neume_ngram_docs()
- neume_component_docs_2_3_grams = tokenizer_2_3.get_neume_component_ngram_docs()
+ ngram_docs_2_3 = prepare_tokenizer_results(tokenizer_2_3)
tokenizer_3_5 = MEITokenizer(
TEST_MEI_FILE,
min_ngram=3,
max_ngram=5,
)
- neume_docs_3_5_grams = tokenizer_3_5.get_neume_ngram_docs()
- neume_component_docs_3_5_grams = tokenizer_3_5.get_neume_component_ngram_docs()
- with self.subTest("Number of ngrams"):
- # Number of neumes in file: 117
- # => Number of 1- and 2-grams: 117 + 116 = 233
- # => Number of 2- and 3-grams: 116 + 115 = 231
- # => Number of 3-, 4-, and 5-grams: 115 + 114 + 113 = 342
- self.assertEqual(len(neume_docs_1_2_grams), 233)
- self.assertEqual(len(neume_docs_2_3_grams), 231)
- self.assertEqual(len(neume_docs_3_5_grams), 342)
- # Number of neume components in file: 179
- # => Number of 1- and 2-grams: 179 + 178 = 357
- # => Number of 2- and 3-grams: 178 + 177 = 355
- # => Number of 3-, 4-, and 5-grams: 177 + 176 + 175 = 528
- self.assertEqual(len(neume_component_docs_1_2_grams), 357)
- self.assertEqual(len(neume_component_docs_2_3_grams), 355)
- self.assertEqual(len(neume_component_docs_3_5_grams), 528)
+ ngram_docs_3_5 = prepare_tokenizer_results(tokenizer_3_5)
+ with self.subTest("Total number of ngrams: 1- and 2-grams"):
+ expected_num_ngrams_1_2 = calculate_expected_total_ngrams(
+ TEST_MEI_FILE, 1, 2
+ )
+ self.assertEqual(len(ngram_docs_1_2), expected_num_ngrams_1_2)
+ with self.subTest("Total number of ngrams: 2- and 3-grams"):
+ expected_num_ngrams_2_3 = calculate_expected_total_ngrams(
+ TEST_MEI_FILE, 2, 3
+ )
+ self.assertEqual(len(ngram_docs_2_3), expected_num_ngrams_2_3)
+ with self.subTest("Total number of ngrams: 3- to 5-grams"):
+ expected_num_ngrams_3_5 = calculate_expected_total_ngrams(
+ TEST_MEI_FILE, 3, 5
+ )
+ self.assertEqual(len(ngram_docs_3_5), expected_num_ngrams_3_5)
# First three neumes in test file:
#
#
@@ -102,63 +128,95 @@ def test_mei_tokenizer(self) -> None:
#
#
#
- with self.subTest("First neume 1-gram"):
- expected_neume_1gram = {
- "ngram_unit": "neume",
- "location": json.dumps(
- [{"ulx": 2608, "uly": 2399, "width": 70, "height": 49}]
- ),
- "neume_names": "Punctum",
- }
- self.assertEqual(neume_docs_1_2_grams[0], expected_neume_1gram)
- with self.subTest("First neume component 1-gram"):
- expected_first_neume_component_1gram = {
- "ngram_unit": "neume_component",
+ # Last two neumes in test file:
+ #
+ #
+ #
+ #
+ #
+ #
+ #
+ #
+ # Relevant zones for the last two neumes:
+ #
+ #
+ #
+ #
+ with self.subTest("First 1-gram"):
+ expected_1gram: NgramDocument = {
"location": json.dumps(
[{"ulx": 2608, "uly": 2399, "width": 70, "height": 49}]
),
"pitch_names": "d",
- "intervals": "",
- "contours": "",
+ "contour": "",
+ "semitone_intervals": "",
+ "neume_names": "punctum",
}
- self.assertEqual(
- neume_component_docs_1_2_grams[0],
- expected_first_neume_component_1gram,
- )
- with self.subTest("First neume 3-gram"):
- expected_neume_3gram = {
- "ngram_unit": "neume",
+ self.assertEqual(expected_1gram, ngram_docs_1_2[0])
+ with self.subTest("Ngram of first 3 neumes"):
+ expected_3gram: NgramDocument = {
"location": json.dumps(
[{"ulx": 2608, "uly": 2292, "width": 477, "height": 201}]
),
- "neume_names": "Punctum_Clivis_Punctum",
+ "neume_names": "punctum_clivis_punctum",
+ "pitch_names": "d_d_c_f",
+ "contour": "r_d_u",
+ "semitone_intervals": "0_-2_5",
}
- self.assertEqual(neume_docs_3_5_grams[0], expected_neume_3gram)
- with self.subTest("First neume component 3-gram"):
- expected_first_neume_component_3gram = {
- "ngram_unit": "neume_component",
+ self.assertEqual(expected_3gram, ngram_docs_3_5[1])
+ self.assertEqual(expected_3gram, ngram_docs_2_3[2])
+ with self.subTest("Pitch 3-gram: second three pitches"):
+ # This 3-gram is constructed from the second three
+ # pitches of the sample above.
+ pitch_3gram: NgramDocument = {
"location": json.dumps(
- [{"ulx": 2608, "uly": 2396, "width": 257, "height": 97}]
+ [{"ulx": 2725, "uly": 2292, "width": 360, "height": 201}]
),
- "pitch_names": "d_d_c",
- "intervals": "0_-2",
- "contours": "s_d",
+ "pitch_names": "d_c_f",
+ "semitone_intervals": "-2_5",
+ "contour": "d_u",
+ "neume_names": "clivis_punctum",
}
self.assertEqual(
- neume_component_docs_3_5_grams[0],
- expected_first_neume_component_3gram,
+ pitch_3gram,
+ ngram_docs_2_3[4],
+ )
+ self.assertEqual(
+ pitch_3gram,
+ ngram_docs_3_5[4],
)
- with self.subTest("Second neume component 3-gram"):
- expected_second_neume_component_3gram = {
- "ngram_unit": "neume_component",
+ with self.subTest("Pitch 3-gram: last three pitches"):
+ # This 4-gram is constructed from the last three
+ # pitches of the test document.
+ pitch_3gram_1: NgramDocument = {
"location": json.dumps(
- [{"ulx": 2725, "uly": 2292, "width": 360, "height": 201}]
+ [{"ulx": 4811, "uly": 7724, "width": 364, "height": 150}]
),
- "pitch_names": "d_c_f",
- "intervals": "-2_5",
- "contours": "d_u",
+ "pitch_names": "c_e_d",
+ "semitone_intervals": "4_-2",
+ "contour": "u_d",
}
- self.assertEqual(
- neume_component_docs_3_5_grams[1],
- expected_second_neume_component_3gram,
+ self.assertIn(
+ pitch_3gram_1,
+ ngram_docs_2_3,
+ )
+ self.assertIn(
+ pitch_3gram_1,
+ ngram_docs_3_5,
+ )
+ with self.subTest("Pitch 4-gram: last 4 pitches"):
+ # This 4-gram is constructed from the last four
+ # pitches of the test document.
+ pitch_4gram: NgramDocument = {
+ "location": json.dumps(
+ [{"ulx": 4750, "uly": 7724, "width": 425, "height": 150}]
+ ),
+ "pitch_names": "d_c_e_d",
+ "semitone_intervals": "-2_4_-2",
+ "contour": "d_u_d",
+ "neume_names": "clivis_clivis",
+ }
+ self.assertIn(
+ pitch_4gram,
+ ngram_docs_3_5,
)
diff --git a/poetry.lock b/poetry.lock
index ed4ad2a95..02d626e8c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -30,13 +30,13 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
[[package]]
name = "astroid"
-version = "3.1.0"
+version = "3.2.0"
description = "An abstract syntax tree for Python with inference support."
optional = false
python-versions = ">=3.8.0"
files = [
- {file = "astroid-3.1.0-py3-none-any.whl", hash = "sha256:951798f922990137ac090c53af473db7ab4e70c770e6d7fae0cec59f74411819"},
- {file = "astroid-3.1.0.tar.gz", hash = "sha256:ac248253bfa4bd924a0de213707e7ebeeb3138abeb48d798784ead1e56d419d4"},
+ {file = "astroid-3.2.0-py3-none-any.whl", hash = "sha256:16ee8ca5c75ac828783028cc1f967777f0e507c6886a295ad143e0f405b975a2"},
+ {file = "astroid-3.2.0.tar.gz", hash = "sha256:f7f829f8506ade59f1b3c6c93d8fac5b1ebc721685fa9af23e9794daf1d450a3"},
]
[[package]]
@@ -398,6 +398,42 @@ files = [
[package.dependencies]
Django = ">=3.2"
+[[package]]
+name = "django-stubs"
+version = "4.2.7"
+description = "Mypy stubs for Django"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "django-stubs-4.2.7.tar.gz", hash = "sha256:8ccd2ff4ee5adf22b9e3b7b1a516d2e1c2191e9d94e672c35cc2bc3dd61e0f6b"},
+ {file = "django_stubs-4.2.7-py3-none-any.whl", hash = "sha256:4cf4de258fa71adc6f2799e983091b9d46cfc67c6eebc68fe111218c9a62b3b8"},
+]
+
+[package.dependencies]
+django = "*"
+django-stubs-ext = ">=4.2.7"
+types-pytz = "*"
+types-PyYAML = "*"
+typing-extensions = "*"
+
+[package.extras]
+compatible-mypy = ["mypy (>=1.7.0,<1.8.0)"]
+
+[[package]]
+name = "django-stubs-ext"
+version = "5.0.0"
+description = "Monkey-patching and extensions for django-stubs"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "django_stubs_ext-5.0.0-py3-none-any.whl", hash = "sha256:8e1334fdf0c8bff87e25d593b33d4247487338aaed943037826244ff788b56a8"},
+ {file = "django_stubs_ext-5.0.0.tar.gz", hash = "sha256:5bacfbb498a206d5938454222b843d81da79ea8b6fcd1a59003f529e775bc115"},
+]
+
+[package.dependencies]
+django = "*"
+typing-extensions = "*"
+
[[package]]
name = "djangorestframework"
version = "3.15.1"
@@ -412,6 +448,29 @@ files = [
[package.dependencies]
django = ">=3.0"
+[[package]]
+name = "djangorestframework-stubs"
+version = "3.14.5"
+description = "PEP-484 stubs for django-rest-framework"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "djangorestframework-stubs-3.14.5.tar.gz", hash = "sha256:5dd6f638aa5291fb7863e6166128a6ed20bf4986e2fc5cf334e6afc841797a09"},
+ {file = "djangorestframework_stubs-3.14.5-py3-none-any.whl", hash = "sha256:43d788fd50cda49b922cd411e59c5b8cdc3f3de49c02febae12ce42139f0269b"},
+]
+
+[package.dependencies]
+django-stubs = ">=4.2.7"
+requests = ">=2.0.0"
+types-PyYAML = ">=5.4.3"
+types-requests = ">=0.1.12"
+typing-extensions = ">=3.10.0"
+
+[package.extras]
+compatible-mypy = ["django-stubs[compatible-mypy]", "mypy (>=1.7.0,<1.8.0)"]
+coreapi = ["coreapi (>=2.0.0)"]
+markdown = ["types-Markdown (>=0.1.5)"]
+
[[package]]
name = "future"
version = "1.0.0"
@@ -600,6 +659,7 @@ files = [
{file = "lxml-5.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9e2addd2d1866fe112bc6f80117bcc6bc25191c5ed1bfbcf9f1386a884252ae8"},
{file = "lxml-5.2.1-cp37-cp37m-win32.whl", hash = "sha256:f51969bac61441fd31f028d7b3b45962f3ecebf691a510495e5d2cd8c8092dbd"},
{file = "lxml-5.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c"},
+ {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3e183c6e3298a2ed5af9d7a356ea823bccaab4ec2349dc9ed83999fd289d14d5"},
{file = "lxml-5.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:804f74efe22b6a227306dd890eecc4f8c59ff25ca35f1f14e7482bbce96ef10b"},
{file = "lxml-5.2.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08802f0c56ed150cc6885ae0788a321b73505d2263ee56dad84d200cab11c07a"},
{file = "lxml-5.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f8c09ed18ecb4ebf23e02b8e7a22a05d6411911e6fabef3a36e4f371f4f2585"},
@@ -861,18 +921,19 @@ files = [
[[package]]
name = "platformdirs"
-version = "4.2.0"
-description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+version = "4.2.1"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.8"
files = [
- {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"},
- {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"},
+ {file = "platformdirs-4.2.1-py3-none-any.whl", hash = "sha256:17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1"},
+ {file = "platformdirs-4.2.1.tar.gz", hash = "sha256:031cd18d4ec63ec53e82dceaac0417d218a6863f7745dfcc9efe7793b7039bdf"},
]
[package.extras]
docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
+type = ["mypy (>=1.8)"]
[[package]]
name = "prompt-toolkit"
@@ -913,17 +974,17 @@ test = ["anyio (>=3.6.2,<4.0)", "mypy (>=1.4.1)", "pproxy (>=2.7)", "pytest (>=6
[[package]]
name = "pylint"
-version = "3.1.0"
+version = "3.2.0"
description = "python code static checker"
optional = false
python-versions = ">=3.8.0"
files = [
- {file = "pylint-3.1.0-py3-none-any.whl", hash = "sha256:507a5b60953874766d8a366e8e8c7af63e058b26345cfcb5f91f89d987fd6b74"},
- {file = "pylint-3.1.0.tar.gz", hash = "sha256:6a69beb4a6f63debebaab0a3477ecd0f559aa726af4954fc948c51f7a2549e23"},
+ {file = "pylint-3.2.0-py3-none-any.whl", hash = "sha256:9f20c05398520474dac03d7abb21ab93181f91d4c110e1e0b32bc0d016c34fa4"},
+ {file = "pylint-3.2.0.tar.gz", hash = "sha256:ad8baf17c8ea5502f23ae38d7c1b7ec78bd865ce34af9a0b986282e2611a8ff2"},
]
[package.dependencies]
-astroid = ">=3.1.0,<=3.2.0-dev0"
+astroid = ">=3.2.0,<=3.3.0-dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = {version = ">=0.3.7", markers = "python_version >= \"3.12\""}
isort = ">=4.2.5,<5.13.0 || >5.13.0,<6"
@@ -935,6 +996,38 @@ tomlkit = ">=0.10.1"
spelling = ["pyenchant (>=3.2,<4.0)"]
testutils = ["gitpython (>3)"]
+[[package]]
+name = "pylint-django"
+version = "2.5.5"
+description = "A Pylint plugin to help Pylint understand the Django web framework"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+ {file = "pylint_django-2.5.5-py3-none-any.whl", hash = "sha256:5abd5c2228e0e5e2a4cb6d0b4fc1d1cef1e773d0be911412f4dd4fc1a1a440b7"},
+ {file = "pylint_django-2.5.5.tar.gz", hash = "sha256:2f339e4bf55776958283395c5139c37700c91bd5ef1d8251ef6ac88b5abbba9b"},
+]
+
+[package.dependencies]
+pylint = ">=2.0,<4"
+pylint-plugin-utils = ">=0.8"
+
+[package.extras]
+with-django = ["Django (>=2.2)"]
+
+[[package]]
+name = "pylint-plugin-utils"
+version = "0.8.2"
+description = "Utilities and helpers for writing Pylint plugins"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+ {file = "pylint_plugin_utils-0.8.2-py3-none-any.whl", hash = "sha256:ae11664737aa2effbf26f973a9e0b6779ab7106ec0adc5fe104b0907ca04e507"},
+ {file = "pylint_plugin_utils-0.8.2.tar.gz", hash = "sha256:d3cebf68a38ba3fba23a873809155562571386d4c1b03e5b4c4cc26c3eee93e4"},
+]
+
+[package.dependencies]
+pylint = ">=1.7"
+
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@@ -974,7 +1067,6 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
- {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -1073,15 +1165,51 @@ doc = ["sphinx"]
[[package]]
name = "tomlkit"
-version = "0.12.4"
+version = "0.12.5"
description = "Style preserving TOML library"
optional = false
python-versions = ">=3.7"
files = [
- {file = "tomlkit-0.12.4-py3-none-any.whl", hash = "sha256:5cd82d48a3dd89dee1f9d64420aa20ae65cfbd00668d6f094d7578a78efbb77b"},
- {file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"},
+ {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"},
+ {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"},
+]
+
+[[package]]
+name = "types-pytz"
+version = "2024.1.0.20240417"
+description = "Typing stubs for pytz"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "types-pytz-2024.1.0.20240417.tar.gz", hash = "sha256:6810c8a1f68f21fdf0f4f374a432487c77645a0ac0b31de4bf4690cf21ad3981"},
+ {file = "types_pytz-2024.1.0.20240417-py3-none-any.whl", hash = "sha256:8335d443310e2db7b74e007414e74c4f53b67452c0cb0d228ca359ccfba59659"},
+]
+
+[[package]]
+name = "types-pyyaml"
+version = "6.0.12.20240311"
+description = "Typing stubs for PyYAML"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "types-PyYAML-6.0.12.20240311.tar.gz", hash = "sha256:a9e0f0f88dc835739b0c1ca51ee90d04ca2a897a71af79de9aec5f38cb0a5342"},
+ {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"},
]
+[[package]]
+name = "types-requests"
+version = "2.31.0.20240406"
+description = "Typing stubs for requests"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "types-requests-2.31.0.20240406.tar.gz", hash = "sha256:4428df33c5503945c74b3f42e82b181e86ec7b724620419a2966e2de604ce1a1"},
+ {file = "types_requests-2.31.0.20240406-py3-none-any.whl", hash = "sha256:6216cdac377c6b9a040ac1c0404f7284bd13199c0e1bb235f4324627e8898cf5"},
+]
+
+[package.dependencies]
+urllib3 = ">=2"
+
[[package]]
name = "typing-extensions"
version = "4.11.0"
@@ -1163,4 +1291,4 @@ watchdog = ["watchdog (>=2.3)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
-content-hash = "a62687b2b399b6549d26d179a7900297de68f3ea3a840da3f2b6b5814c3d20e6"
+content-hash = "8959a6bca173000d1c97b85d5b45c4142bc927e78f93e6b5672c6d63fb8648bb"
diff --git a/pyproject.toml b/pyproject.toml
index ec8dc2387..fa2c7542e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,9 @@ black = "^24.4.2"
mypy = "^1.10.0"
pylint = "^3.1.0"
lxml-stubs = "^0.5.1"
+django-stubs = "^4.2.7"
+pylint-django = "^2.5.5"
+djangorestframework-stubs = "^3.14.5"
[build-system]
requires = ["poetry-core"]