diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parser.py b/app/public/cantusdata/helpers/mei_processing/mei_parser.py index 43ffebc3f..debfad7d5 100644 --- a/app/public/cantusdata/helpers/mei_processing/mei_parser.py +++ b/app/public/cantusdata/helpers/mei_processing/mei_parser.py @@ -8,20 +8,20 @@ between two neume components. - get_contour_from_interval: Computes the contour of an interval. - analyze_neume: Analyzes a neume (a list of neume components) to determine its - neume type, its intervals, and its contour. + neume name, its intervals, and its contour. Defines associated types for the data structures used by the parser. """ from typing import Tuple, Dict, List, Iterator, Optional -from lxml import etree +from lxml import etree # pylint: disable=no-name-in-module from .mei_parsing_types import ( Zone, SyllableText, NeumeComponentElementData, NeumeComponent, ContourType, - NeumeType, + NeumeName, Neume, Syllable, ) @@ -31,24 +31,24 @@ PITCH_CLASS = {"c": 0, "d": 2, "e": 4, "f": 5, "g": 7, "a": 9, "b": 11} # Mapping from neume contours to neume names -NEUME_GROUPS: Dict[str, NeumeType] = { - "": "Punctum", - "u": "Pes", - "d": "Clivis", - "uu": "Scandicus", - "ud": "Torculus", - "du": "Porrectus", - "s": "Distropha", - "ss": "Tristopha", - "sd": "Pressus", - "dd": "Climacus", - "ddu": "Climacus resupinus", - "udu": "Torculus resupinus", - "dud": "Porrectus flexus", - "udd": "Pes subpunctis", - "uud": "Scandicus flexus", - "uudd": "Scandicus subpunctis", - "dudd": "Porrectus subpunctis", +NEUME_GROUPS: Dict[str, NeumeName] = { + "": "punctum", + "u": "pes", + "d": "clivis", + "uu": "scandicus", + "ud": "torculus", + "du": "porrectus", + "r": "distropha", + "rr": "tristopha", + "rd": "pressus", + "dd": "climacus", + "ddu": "climacus_resupinus", + "udu": "torculus_resupinus", + "dud": "porrectus_flexus", + "udd": "pes_subpunctis", + "uud": "scandicus_flexus", + "uudd": "scandicus_subpunctis", + "dudd": "porrectus_subpunctis", } @@ -75,6 +75,7 @@ class MEIParser: def __init__(self, mei_file: str): self.mei_file = mei_file self.mei = etree.parse(self.mei_file) + self._remove_empty_neumes_and_syllables() self.zones = self.parse_zones() self.syllables = self.parse_mei() @@ -182,7 +183,7 @@ def _parse_neume( ) if parsed_neume_component: parsed_nc_elements.append(parsed_neume_component) - neume_type, intervals, contours = analyze_neume(parsed_nc_elements) + neume_name, intervals, contours = analyze_neume(parsed_nc_elements) # If the first neume component of the next syllable can be parsed, # add the interval and contour between the final neume component of # the current syllable and the first neume component of the next syllable. @@ -193,7 +194,7 @@ def _parse_neume( if parsed_next_neume_comp: last_neume_comp = parsed_nc_elements[-1] intervals.append( - get_interval_between_neume_components( + get_semitones_between_neume_components( last_neume_comp, parsed_next_neume_comp ) ) @@ -211,12 +212,13 @@ def _parse_neume( "pname": nc["pname"], "octave": nc["octave"], "bounding_box": nc["bounding_box"], - "interval": intervals[i] if i < len(intervals) else None, + "semitone_interval": intervals[i] if i < len(intervals) else None, "contour": contours[i] if i < len(contours) else None, + "system": neume_system, } ) parsed_neume: Neume = { - "neume_type": neume_type, + "neume_name": neume_name, "neume_components": parsed_neume_components, "bounding_box": combined_bounding_box, "system": neume_system, @@ -323,6 +325,26 @@ def _syllable_iterator( system += 1 current_elem = next(elem_iterator, None) + def _remove_empty_neumes_and_syllables(self) -> None: + """ + Apparently, for a while Rodan was creating invalid MEI files that + contained empty neumes (i.e., neumes with no neume components) and + empty syllables (i.e., syllables with no neumes or only empty neumes). + This method removes those empty neumes and syllables from the MEI being parsed; + it was added as a preprocessing step so that it can, once the base + MEI files are corrected, be removed. + """ + for neume in self.mei.iter(f"{self.MEINS}neume"): + if len(neume.findall(f"{self.MEINS}nc")) == 0: + # Ignoring type because we know that getparent() will + # return an element in this case. + neume.getparent().remove(neume) # type: ignore + for syllable in self.mei.iter(f"{self.MEINS}syllable"): + if len(syllable.findall(f"{self.MEINS}neume")) == 0: + # Ignoring type because we know that getparent() will + # return an element in this case. + syllable.getparent().remove(syllable) # type: ignore + def parse_mei(self) -> List[Syllable]: """ Parses the MEI file into a list of syllables. @@ -351,7 +373,7 @@ def parse_mei(self) -> List[Syllable]: return syllables -def get_interval_between_neume_components( +def get_semitones_between_neume_components( neume_component_1: NeumeComponentElementData, neume_component_2: NeumeComponentElementData, ) -> int: @@ -369,8 +391,8 @@ def get_interval_between_neume_components( try: pc1 = PITCH_CLASS[neume_component_1["pname"]] pc2 = PITCH_CLASS[neume_component_2["pname"]] - except KeyError: - raise ValueError("Invalid pitch name in neume component.") + except KeyError as err: + raise ValueError("Invalid pitch name in neume component.") from err # In MIDI note numbers, C0 = 12. pitch_1 = pc1 + (12 * (neume_component_1["octave"] + 1)) pitch_2 = pc2 + (12 * (neume_component_2["octave"] + 1)) @@ -382,34 +404,36 @@ def get_contour_from_interval(interval: int) -> ContourType: Compute the contour of an interval. :param interval: The size of the interval in semitones - :return: The contour of the interval ("u"[p], "d"[own], or "s"[tay]) + :return: The contour of the interval ("u"[p], "d"[own], or "r"[epeat]) """ if interval < 0: return "d" if interval > 0: return "u" - return "s" + return "r" def analyze_neume( neume: List[NeumeComponentElementData], -) -> Tuple[NeumeType, List[int], List[ContourType]]: +) -> Tuple[NeumeName, List[int], List[ContourType]]: """ Analyze a neume (a list of neume components) to determine: - - Neume type - - Neume intervals - - Neume contour + - The neume type (e.g., punctum, pes, clivis, etc.) + - The intervals in the neume in semitones + - The contour of the nueme :param neume: A list of neume components (a list of NeumeComponentsType dictionaries) :return: A tuple of information about the neume: - Neume type (str) - - Neume intervals (list of ints) - - Neume contour (list of "u"[p], "d"[own], or "s"[tay]) + - Neume intervals in semitones (list of ints) + - Neume contour (list of "u"[p], "d"[own], or "r"[epeat]) """ - intervals: List[int] = [ - get_interval_between_neume_components(nc1, nc2) + semitone_intervals: List[int] = [ + get_semitones_between_neume_components(nc1, nc2) for nc1, nc2 in zip(neume[:-1], neume[1:]) ] - contours: List[ContourType] = [get_contour_from_interval(i) for i in intervals] - neume_type: NeumeType = NEUME_GROUPS.get("".join(contours), "Compound") - return neume_type, intervals, contours + contours: List[ContourType] = [ + get_contour_from_interval(i) for i in semitone_intervals + ] + neume_type: NeumeName = NEUME_GROUPS.get("".join(contours), "compound") + return neume_type, semitone_intervals, contours diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py index 2f47aec9d..ff7507b03 100644 --- a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py +++ b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py @@ -2,7 +2,7 @@ Contains type definitions used in the MEI parsing process. """ -from typing import Tuple, TypedDict, Literal, List, Optional +from typing import Tuple, TypedDict, Literal, List, Optional, NotRequired from typing_extensions import TypeAlias # A type for coordinates of bounding boxes @@ -30,26 +30,26 @@ class Zone(TypedDict): rotate: float -ContourType = Literal["u", "d", "s"] -NeumeType = Literal[ - "Punctum", - "Pes", - "Clivis", - "Scandicus", - "Torculus", - "Porrectus", - "Distropha", - "Tristopha", - "Pressus", - "Climacus", - "Climacus resupinus", - "Torculus resupinus", - "Porrectus flexus", - "Pes subpunctis", - "Scandicus flexus", - "Scandicus subpunctis", - "Porrectus subpunctis", - "Compound", +ContourType = Literal["u", "d", "r"] +NeumeName = Literal[ + "punctum", + "pes", + "clivis", + "scandicus", + "torculus", + "porrectus", + "distropha", + "tristopha", + "pressus", + "climacus", + "climacus_resupinus", + "torculus_resupinus", + "porrectus_flexus", + "pes_subpunctis", + "scandicus_flexus", + "scandicus_subpunctis", + "porrectus_subpunctis", + "compound", ] @@ -74,27 +74,29 @@ class NeumeComponent(NeumeComponentElementData): """A type extending NeumeComponentElementData with interval and contour information. - interval: The interval (in semitones) between the neume component and the + semitone_interval: The interval in semitones between the neume component and the following neume component. If there is no following neume component, this is None. - contour: The contour ("u"[p], "d"[own], or "s"[tay]) of 'interval'. If there is no + contour: The contour ("u"[p], "d"[own], or "r"[epeat]) of 'interval'. If there is no following neume component, this is None. + system: The system number that the neume component is on """ - interval: Optional[int] + semitone_interval: Optional[int] contour: Optional[ContourType] + system: int class Neume(TypedDict): """A type for neumes - neume_type: The name of the neume (ie. "Punctum", "Pes", "Clivis", etc.) + neume_name: The name of the neume (ie. "punctum", "pes", "clivis", etc.) neume_components: A list of neume components (containing pitch infomation) bounding_box: The bounding box of the neume system: The system number that the neume is on """ - neume_type: NeumeType + neume_name: NeumeName neume_components: List[NeumeComponent] bounding_box: Zone system: int @@ -112,3 +114,41 @@ class Syllable(TypedDict): text: SyllableText neumes: List[Neume] + + +class NgramDocument(TypedDict): + """ + A generic type for documents containing n-grams + of information extracted from MEI files. + + ngram_unit: The unit of the n-gram + location: The location of the n-gram in the MEI file (MEI Zones + converted to JSON strings according to bounding_box_utils.stringify_bounding_boxes) + pitch_names: A string containing the pitch names of the neume components in the n-gram, + separated by underscores. + contour: A string containing the contours of the neume components in the n-gram, separated + by underscores. + semitone_interval: A string containing the semitone intervals between the neume components + in the n-gram, separated by underscores. + neume_names: A string containing the names of the neumes in the n-gram, + separated by underscores. This field is not required, and is only present when + the n-gram contains complete neumes. + + The following may be part of an NgramDocument, but are optional because + they will be added when the document is indexed: + manuscript_id: The ID of the manuscript the n-gram belongs to. + folio_number: The number of the folio on which the n-gram exists. + id: The unique ID of the document (corresponds to solr schema's id field) + type: The type of the document (corresponds to solr schema's type field) + """ + + location: str + pitch_names: str + contour: str + semitone_intervals: str + neume_names: NotRequired[str] + manuscript_id: NotRequired[str] + folio: NotRequired[str] + id: NotRequired[str] + type: NotRequired[Literal["omr_ngram"]] + image_uri: NotRequired[str] diff --git a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py index 7ae5b8c8d..12a71dc5e 100644 --- a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py +++ b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py @@ -4,72 +4,19 @@ can then be indexed by a search engine (i.e. for this project, Solr). """ -from typing import List, Iterator, Any, TypedDict, Literal +import uuid +from typing import List, Tuple, Optional, Never, Union from .mei_parser import MEIParser -from .mei_parsing_types import Neume, NeumeComponent +from .mei_parsing_types import ( + Neume, + NeumeComponent, + ContourType, + NeumeName, + NgramDocument, + Zone, +) from .bounding_box_utils import combine_bounding_boxes, stringify_bounding_boxes -NgramUnitType = Literal["neume", "neume_component"] - - -class NgramDocument(TypedDict): - """ - A generic type for documents containing n-grams - of information extracted from MEI files. - - ngram_unit: The unit of the n-gram - location: The location of the n-gram in the MEI file (MEI Zones - converted to JSON strings according to bounding_box_utils.stringify_bounding_boxes) - """ - - ngram_unit: NgramUnitType - location: str - - -class NeumeNgramDocument(NgramDocument): - """ - A type for documents containing n-grams of neume-level information. - - neume_names: A string containing the names of the neumes in the n-gram, - separated by underscores. - """ - - neume_names: str - - -class NeumeComponentNgramDocument(NgramDocument): - """ - A type for documents containing n-grams of neume component-level information. - - pitch_names: A string containing the pitch names of the neume components in the n-gram, - separated by underscores. - intervals: A string containing the intervals between the neume components in the n-gram, - separated by underscores. - contours: A string containing the contours of the neume components in the n-gram, separated - by underscores. - """ - - pitch_names: str - intervals: str - contours: str - - -def generate_ngrams(sequence: List[Any], min_n: int, max_n: int) -> Iterator[List[Any]]: - """ - Generate n-grams from a sequence (list) of items. - - :param sequence: A list of items to generate n-grams from. - :param min_gram: The minimum length of n-grams to generate. - :param max_gram: The maximum length of n-grams to generate. - :yield: A list containing the subset of consecutive items - that make up an n-gram. - """ - # Iterate through all desired n-gram lengths - for i in range(min_n, max_n + 1): - # Iterate through all n-grams of "sequence" of length "i" - for j in range(0, len(sequence) - i + 1): - yield sequence[j : j + i] - class MEITokenizer(MEIParser): """ @@ -85,67 +32,178 @@ def __init__(self, mei_file: str, min_ngram: int, max_ngram: int) -> None: self.min_ngram = min_ngram self.max_ngram = max_ngram - def get_neume_ngram_docs(self) -> List[NeumeNgramDocument]: + @property + def flattened_neumes(self) -> List[Neume]: """ - Generate neume-level documents for search, containing - n-grams of neume names. + Flatten the neumes contained in the syllables of the MEI file. - :return: A list of dictionaries containing the n-grams - of neume names. + :return: A list of neumes. """ - neumes_sequence: List[Neume] = [] + neumes: List[Neume] = [] for syllable in self.syllables: - neumes_sequence.extend(syllable["neumes"]) - neume_documents: List[NeumeNgramDocument] = [] - for ngram in generate_ngrams(neumes_sequence, self.min_ngram, self.max_ngram): - bounding_boxes = [ - (neume["bounding_box"], neume["system"]) for neume in ngram - ] - document_location = combine_bounding_boxes(bounding_boxes) - neume_names = "_".join([neume["neume_type"] for neume in ngram]) - neume_documents.append( - { - "ngram_unit": "neume", - "location": stringify_bounding_boxes(document_location), - "neume_names": neume_names, - } - ) - return neume_documents + neumes.extend(syllable["neumes"]) + return neumes - def get_neume_component_ngram_docs(self) -> List[NeumeComponentNgramDocument]: + def _stringify_neume_component_data( + self, + neume_components: List[NeumeComponent], + ) -> Tuple[str, str, str]: """ - Generate neume component-level documents for search, containing - n-grams of pitch names, intervals, and contours. + Convert pitch, contour, and interval information from a list of + neume components into strings. - :return: A list of dictionaries containing the n-grams - of pitch names, intervals, and contours. + :param neume_components: A list of neumes or neume components to convert into strings. + :return: A tuple containing the pitch names, contours, and intervals + of the neumes or neume components as strings, separated by underscores. """ - neume_components: List[NeumeComponent] = [] - for syllable in self.syllables: - for neume in syllable["neumes"]: - neume_components.extend(neume["neume_components"]) - neume_component_documents: List[NeumeComponentNgramDocument] = [] - for ngram in generate_ngrams( - neume_components, - self.min_ngram, - self.max_ngram, - ): - pitch_names = "_".join([comp["pname"] for comp in ngram]) - # Keep "internal" intervals and contours (in other words, - # the intevals and countours between the pitches in these - # neume components, and not the interval and contour following - # the last pitch in the ngram). - intervals = [str(comp["interval"]) for comp in ngram[:-1]] - contours = [comp["contour"] for comp in ngram[:-1]] - bounding_boxes = [(comp["bounding_box"], neume["system"]) for comp in ngram] - document_location = combine_bounding_boxes(bounding_boxes) - neume_component_documents.append( - { - "ngram_unit": "neume_component", - "location": stringify_bounding_boxes(document_location), - "pitch_names": pitch_names, - "intervals": "_".join(intervals), - "contours": "_".join(contours), - } + pnames: List[str] = [] + contours: List[ContourType] = [] + semitone_intervals: List[str] = [] + for idx, nc in enumerate(neume_components): + pnames.append(nc["pname"]) + # The interval is None if and only if the countour is None, + # so we can safely do this single check. + if nc["contour"] is not None and idx != len(neume_components) - 1: + contours.append(nc["contour"]) + semitone_intervals.append(str(nc["semitone_interval"])) + return "_".join(pnames), "_".join(contours), "_".join(semitone_intervals) + + def _create_document_from_neume_components( + self, + neume_components: List[NeumeComponent], + ) -> NgramDocument: + """ + Create an NgramDocument from a list of neume components and + their corresponding system numbers. + + :param ncs_with_sys: A list of tuples, each containing a neume component + and the system number of that neume component. + :return: An NgramDocument containing the information from the neume components. + """ + pitch_names, contour, intervals = self._stringify_neume_component_data( + neume_components + ) + zones_with_sys: List[Tuple[Zone, int]] = [ + (nc["bounding_box"], nc["system"]) for nc in neume_components + ] + location: str = stringify_bounding_boxes(combine_bounding_boxes(zones_with_sys)) + return { + "location": location, + "pitch_names": pitch_names, + "contour": contour, + "semitone_intervals": intervals, + "id": str(uuid.uuid4()), + "type": "omr_ngram", + } + + def _create_pitch_sequences( + self, + ) -> Tuple[List[NeumeComponent], List[Optional[NeumeName]]]: + """ + Create two lists of equal length: one containing + the pitches (neume components) contained in the parsed file, + and the other containing the names of the neumes that begin + at each pitch (or None if no neume begins at that pitch). + + :return: A tuple containing the list of pitches and the list of neume names. + """ + neume_sequence = self.flattened_neumes + neume_names: List[Optional[NeumeName]] = [] + ncs: List[NeumeComponent] = [] + for neume in neume_sequence: + ncs.extend(neume["neume_components"]) + flattened_neume_names = [neume["neume_name"]] + [None] * ( + len(neume["neume_components"]) - 1 ) - return neume_component_documents + neume_names.extend(flattened_neume_names) + return ncs, neume_names + + def create_ngram_documents(self) -> List[NgramDocument]: + """ + Create a list of ngram documents from the MEI file, + ensuring that we have ngrams that contain n pitches + and n neumes for all n in the range min_ngram to max_ngram. + + In broad strokes, the function: + - Iterates through the pitches in the document, and creates ngrams + of pitches with n = min_ngram, min_ngram + 1, ..., max_ngram. + When an ngram corresponds to a set of complete neumes, neume + names are included in the ngram document. When it doesn't, + no neume names are added. + - Checks whether this has created ngrams of length up to max_ngram + of complete neumes starting at the current pitch. + (Note: this will only be the case if the + current pitch begins a sequence of max_ngram consecutive single- + pitch neumes). + - If this check fails, the function creates remaining ngrams of complete + neumes up to max_ngram of complete neumes. + + :return: A list of NgramDocuments. + """ + pitches, neume_names = self._create_pitch_sequences() + ngram_docs: List[NgramDocument] = [] + num_pitches = len(pitches) + # At each pitch in the file, we'll generate all the necessary + # ngrams that start with that pitch. + for start_idx in range(num_pitches): + largest_num_neumes = 0 + for ngram_length in range(self.min_ngram, self.max_ngram + 1): + # Collect the pitches for an ngram of ngram_length + # pitches starting at start_idx, if we haven't reached the + # end of the pitches. + end_idx = start_idx + ngram_length + if end_idx > num_pitches: + break + nc_ngram = pitches[start_idx:end_idx] + doc = self._create_document_from_neume_components(nc_ngram) + # If the pitch at start_idx is the beginning of a neume + # and the pitch following this ngram is also the beginning + # of a neume (or we've reached the end of the file), + # then our current ngram of pitches overlaps + # with some number of complete neumes. + neume_start = neume_names[start_idx] is not None + if neume_start: + if end_idx == num_pitches or neume_names[end_idx] is not None: + neume_name_list = [ + nn + for nn in neume_names[start_idx:end_idx] + if nn is not None + ] + doc["neume_names"] = "_".join(neume_name_list) + largest_num_neumes = len(neume_name_list) + ngram_docs.append(doc) + # If the current neume component starts a neume and we + # haven't reached the maximum ngram length of neumes + # in our existing documents, generate documents containing + # larger ngrams of neumes until we reach the maximum ngram length. + if neume_start and largest_num_neumes < self.max_ngram: + min_wanted_ngram_length = max(largest_num_neumes + 1, self.min_ngram) + for wanted_ngram_length in range( + min_wanted_ngram_length, self.max_ngram + 1 + ): + ngram_neume_names: List[NeumeName] = [] + ngram_num_pitches = 0 + # We'll add pitches to our ngram until we have the + # number of neumes we want in our ngram or we reach + # the end of the file. + while (len(ngram_neume_names) <= wanted_ngram_length) and ( + start_idx + ngram_num_pitches < len(pitches) + ): + if ( + name_at_pitch := neume_names[start_idx + ngram_num_pitches] + ) is not None and len(ngram_neume_names) < wanted_ngram_length: + ngram_neume_names.append(name_at_pitch) + ngram_num_pitches += 1 + if len(ngram_neume_names) == wanted_ngram_length: + break + # We'll only add this ngram if we've actually gotten to + # the desired number of neumes (if we didn't, it means + # we reached the end of the file) + if len(ngram_neume_names) == wanted_ngram_length: + ngram_pitches = pitches[ + start_idx : start_idx + ngram_num_pitches + ] + doc = self._create_document_from_neume_components(ngram_pitches) + doc["neume_names"] = "_".join(ngram_neume_names) + ngram_docs.append(doc) + return ngram_docs diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py index 63a499da4..ff6a41994 100644 --- a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py +++ b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_parser.py @@ -4,12 +4,13 @@ from cantusdata.helpers.mei_processing.mei_parser import ( MEIParser, get_contour_from_interval, - get_interval_between_neume_components, + get_semitones_between_neume_components, analyze_neume, ) from cantusdata.helpers.mei_processing.mei_parsing_types import ( NeumeComponentElementData, Zone, + Syllable, ) @@ -89,7 +90,7 @@ def test_mei_parser(self) -> None: # Relevant zones (for first syllable and the single neume component in that syllable): ## ## - expected_first_syllable = { + expected_first_syllable: Syllable = { "text": { "text": "Ec", "bounding_box": { @@ -99,7 +100,7 @@ def test_mei_parser(self) -> None: }, "neumes": [ { - "neume_type": "Punctum", + "neume_name": "punctum", "neume_components": [ { "pname": "d", @@ -108,8 +109,9 @@ def test_mei_parser(self) -> None: "coordinates": (2608, 2399, 2678, 2448), "rotate": 0.0, }, - "interval": 0, - "contour": "s", + "semitone_interval": 0, + "contour": "r", + "system": 1, } ], "bounding_box": { @@ -134,7 +136,7 @@ def test_mei_parser(self) -> None: ## ## ## - expected_last_syllable = { + expected_last_syllable: Syllable = { "text": { "text": "gil", "bounding_box": { @@ -144,7 +146,7 @@ def test_mei_parser(self) -> None: }, "neumes": [ { - "neume_type": "Clivis", + "neume_name": "clivis", "neume_components": [ { "pname": "e", @@ -153,8 +155,9 @@ def test_mei_parser(self) -> None: "coordinates": (5037, 7724, 5108, 7774), "rotate": 0.0, }, - "interval": -2, + "semitone_interval": -2, "contour": "d", + "system": 10, }, { "pname": "d", @@ -163,8 +166,9 @@ def test_mei_parser(self) -> None: "coordinates": (5104, 7774, 5175, 7824), "rotate": 0.0, }, - "interval": None, + "semitone_interval": None, "contour": None, + "system": 10, }, ], "bounding_box": { @@ -178,29 +182,37 @@ def test_mei_parser(self) -> None: self.assertEqual(syllables[-1], expected_last_syllable) def test_get_contour_from_interval(self) -> None: - self.assertEqual(get_contour_from_interval(0), "s") + self.assertEqual(get_contour_from_interval(0), "r") self.assertEqual(get_contour_from_interval(1), "u") self.assertEqual(get_contour_from_interval(-3), "d") - def test_get_interval_between_neume_components(self) -> None: + def test_get_semitones_between_neume_components(self) -> None: with self.subTest("Interval test: ascending P5"): self.assertEqual( - get_interval_between_neume_components(self.nc_elem_g3, self.nc_elem_d4), + get_semitones_between_neume_components( + self.nc_elem_g3, self.nc_elem_d4 + ), 7, ) with self.subTest("Interval test: descending P5"): self.assertEqual( - get_interval_between_neume_components(self.nc_elem_d4, self.nc_elem_g3), + get_semitones_between_neume_components( + self.nc_elem_d4, self.nc_elem_g3 + ), -7, ) with self.subTest("Interval test: descending P4"): self.assertEqual( - get_interval_between_neume_components(self.nc_elem_g3, self.nc_elem_d3), + get_semitones_between_neume_components( + self.nc_elem_g3, self.nc_elem_d3 + ), -5, ) with self.subTest("Interval test: descending m6"): self.assertEqual( - get_interval_between_neume_components(self.nc_elem_g3, self.nc_elem_b2), + get_semitones_between_neume_components( + self.nc_elem_g3, self.nc_elem_b2 + ), -8, ) @@ -219,16 +231,16 @@ def test_analyze_neume(self) -> None: ] neume_components_5 = [self.nc_elem_d4] with self.subTest("Analyze Pes"): - self.assertEqual(analyze_neume(neume_components_1), ("Pes", [5], ["u"])) + self.assertEqual(analyze_neume(neume_components_1), ("pes", [5], ["u"])) with self.subTest("Analyze Torculus"): self.assertEqual( - analyze_neume(neume_components_2), ("Torculus", [5, -5], ["u", "d"]) + analyze_neume(neume_components_2), ("torculus", [5, -5], ["u", "d"]) ) with self.subTest("Analyze Clivis"): - self.assertEqual(analyze_neume(neume_components_3), ("Clivis", [-7], ["d"])) + self.assertEqual(analyze_neume(neume_components_3), ("clivis", [-7], ["d"])) with self.subTest("Analyze Tristropha"): self.assertEqual( - analyze_neume(neume_components_4), ("Tristopha", [0, 0], ["s", "s"]) + analyze_neume(neume_components_4), ("tristopha", [0, 0], ["r", "r"]) ) with self.subTest("Analyze Punctum"): - self.assertEqual(analyze_neume(neume_components_5), ("Punctum", [], [])) + self.assertEqual(analyze_neume(neume_components_5), ("punctum", [], [])) diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py index 3a51ca6be..98ca090c0 100644 --- a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py +++ b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py @@ -1,11 +1,9 @@ from unittest import TestCase from os import path import json +from typing import List from cantusdata.settings import BASE_DIR -from cantusdata.helpers.mei_processing.mei_tokenizer import ( - MEITokenizer, - generate_ngrams, -) +from cantusdata.helpers.mei_processing.mei_tokenizer import MEITokenizer, NgramDocument TEST_MEI_FILE = path.join( @@ -20,34 +18,65 @@ ) -class MEITokenizerTestCase(TestCase): +def calculate_expected_total_ngrams( + mei_file: str, min_ngram: int, max_ngram: int +) -> int: + """ + Function to calculate the expected number of ngrams created + from an MEI file. The function uses the "flattened_neumes" property + of the MEITokenizer class, but does not use any functions + of that class that create ngrams. + + The expected number of ngrams is calculated as follows: + - The number of neume components in the MEI file is calculated + and used to determine how many ngrams are created with min_ngram, + min_ngram + 1, ..., max_ngram pitches. + - There will be an additional ngram created for every sequence of + min_ngram, min_ngram + 1, ..., or max_ngram neumes whose commulative + number of pitches is greater than max_ngram. We add one to the count of + expected ngrams for every such sequence. + + + """ + tokenizer = MEITokenizer(mei_file, min_ngram, max_ngram) + parsed_neumes = tokenizer.flattened_neumes + num_neume_components = sum( + len(neume["neume_components"]) for neume in parsed_neumes + ) + # The number of ngrams of pitches for a given n is: + # number of neume components - n + 1 + exp_num_ngrams = sum( + max(0, num_neume_components - i + 1) for i in range(min_ngram, max_ngram + 1) + ) + for i in range(min_ngram, max_ngram + 1): + for j in range(len(parsed_neumes) - i + 1): + if ( + sum( + len(neume["neume_components"]) for neume in parsed_neumes[j : j + i] + ) + > max_ngram + ): + exp_num_ngrams += 1 + return exp_num_ngrams - def test_generate_ngrams(self) -> None: - with self.subTest("Ngrams from 2 to 3"): - sequence = [1, 2, 3, 4, 5] - min_ngram = 2 - max_ngram = 3 - ngrams = list(generate_ngrams(sequence, min_ngram, max_ngram)) - self.assertEqual( - ngrams, - [[1, 2], [2, 3], [3, 4], [4, 5], [1, 2, 3], [2, 3, 4], [3, 4, 5]], - ) - with self.subTest("Ngrams from 3 to 5"): - sequence = [1, 2, 3, 4, 5] - min_ngram = 3 - max_ngram = 5 - ngrams = list(generate_ngrams(sequence, min_ngram, max_ngram)) - self.assertEqual( - ngrams, - [ - [1, 2, 3], - [2, 3, 4], - [3, 4, 5], - [1, 2, 3, 4], - [2, 3, 4, 5], - [1, 2, 3, 4, 5], - ], - ) + +def prepare_tokenizer_results( + tokenizer: MEITokenizer, +) -> List[NgramDocument]: + """ + This function prepares the results of a tokenizer for comparison + with expected results by: + - removing the unique ID from generated ngram documents + - removing the "type" field from generated ngram documents + """ + ngram_docs = tokenizer.create_ngram_documents() + for doc in ngram_docs: + doc.pop("id") + doc.pop("type") + return ngram_docs + + +class MEITokenizerTestCase(TestCase): def test_mei_tokenizer(self) -> None: tokenizer_1_2 = MEITokenizer( @@ -55,37 +84,34 @@ def test_mei_tokenizer(self) -> None: min_ngram=1, max_ngram=2, ) - neume_docs_1_2_grams = tokenizer_1_2.get_neume_ngram_docs() - neume_component_docs_1_2_grams = tokenizer_1_2.get_neume_component_ngram_docs() + ngram_docs_1_2 = prepare_tokenizer_results(tokenizer_1_2) tokenizer_2_3 = MEITokenizer( TEST_MEI_FILE, min_ngram=2, max_ngram=3, ) - neume_docs_2_3_grams = tokenizer_2_3.get_neume_ngram_docs() - neume_component_docs_2_3_grams = tokenizer_2_3.get_neume_component_ngram_docs() + ngram_docs_2_3 = prepare_tokenizer_results(tokenizer_2_3) tokenizer_3_5 = MEITokenizer( TEST_MEI_FILE, min_ngram=3, max_ngram=5, ) - neume_docs_3_5_grams = tokenizer_3_5.get_neume_ngram_docs() - neume_component_docs_3_5_grams = tokenizer_3_5.get_neume_component_ngram_docs() - with self.subTest("Number of ngrams"): - # Number of neumes in file: 117 - # => Number of 1- and 2-grams: 117 + 116 = 233 - # => Number of 2- and 3-grams: 116 + 115 = 231 - # => Number of 3-, 4-, and 5-grams: 115 + 114 + 113 = 342 - self.assertEqual(len(neume_docs_1_2_grams), 233) - self.assertEqual(len(neume_docs_2_3_grams), 231) - self.assertEqual(len(neume_docs_3_5_grams), 342) - # Number of neume components in file: 179 - # => Number of 1- and 2-grams: 179 + 178 = 357 - # => Number of 2- and 3-grams: 178 + 177 = 355 - # => Number of 3-, 4-, and 5-grams: 177 + 176 + 175 = 528 - self.assertEqual(len(neume_component_docs_1_2_grams), 357) - self.assertEqual(len(neume_component_docs_2_3_grams), 355) - self.assertEqual(len(neume_component_docs_3_5_grams), 528) + ngram_docs_3_5 = prepare_tokenizer_results(tokenizer_3_5) + with self.subTest("Total number of ngrams: 1- and 2-grams"): + expected_num_ngrams_1_2 = calculate_expected_total_ngrams( + TEST_MEI_FILE, 1, 2 + ) + self.assertEqual(len(ngram_docs_1_2), expected_num_ngrams_1_2) + with self.subTest("Total number of ngrams: 2- and 3-grams"): + expected_num_ngrams_2_3 = calculate_expected_total_ngrams( + TEST_MEI_FILE, 2, 3 + ) + self.assertEqual(len(ngram_docs_2_3), expected_num_ngrams_2_3) + with self.subTest("Total number of ngrams: 3- to 5-grams"): + expected_num_ngrams_3_5 = calculate_expected_total_ngrams( + TEST_MEI_FILE, 3, 5 + ) + self.assertEqual(len(ngram_docs_3_5), expected_num_ngrams_3_5) # First three neumes in test file: # # @@ -102,63 +128,95 @@ def test_mei_tokenizer(self) -> None: # # # - with self.subTest("First neume 1-gram"): - expected_neume_1gram = { - "ngram_unit": "neume", - "location": json.dumps( - [{"ulx": 2608, "uly": 2399, "width": 70, "height": 49}] - ), - "neume_names": "Punctum", - } - self.assertEqual(neume_docs_1_2_grams[0], expected_neume_1gram) - with self.subTest("First neume component 1-gram"): - expected_first_neume_component_1gram = { - "ngram_unit": "neume_component", + # Last two neumes in test file: + # + # + # + # + # + # + # + # + # Relevant zones for the last two neumes: + # + # + # + # + with self.subTest("First 1-gram"): + expected_1gram: NgramDocument = { "location": json.dumps( [{"ulx": 2608, "uly": 2399, "width": 70, "height": 49}] ), "pitch_names": "d", - "intervals": "", - "contours": "", + "contour": "", + "semitone_intervals": "", + "neume_names": "punctum", } - self.assertEqual( - neume_component_docs_1_2_grams[0], - expected_first_neume_component_1gram, - ) - with self.subTest("First neume 3-gram"): - expected_neume_3gram = { - "ngram_unit": "neume", + self.assertEqual(expected_1gram, ngram_docs_1_2[0]) + with self.subTest("Ngram of first 3 neumes"): + expected_3gram: NgramDocument = { "location": json.dumps( [{"ulx": 2608, "uly": 2292, "width": 477, "height": 201}] ), - "neume_names": "Punctum_Clivis_Punctum", + "neume_names": "punctum_clivis_punctum", + "pitch_names": "d_d_c_f", + "contour": "r_d_u", + "semitone_intervals": "0_-2_5", } - self.assertEqual(neume_docs_3_5_grams[0], expected_neume_3gram) - with self.subTest("First neume component 3-gram"): - expected_first_neume_component_3gram = { - "ngram_unit": "neume_component", + self.assertEqual(expected_3gram, ngram_docs_3_5[1]) + self.assertEqual(expected_3gram, ngram_docs_2_3[2]) + with self.subTest("Pitch 3-gram: second three pitches"): + # This 3-gram is constructed from the second three + # pitches of the sample above. + pitch_3gram: NgramDocument = { "location": json.dumps( - [{"ulx": 2608, "uly": 2396, "width": 257, "height": 97}] + [{"ulx": 2725, "uly": 2292, "width": 360, "height": 201}] ), - "pitch_names": "d_d_c", - "intervals": "0_-2", - "contours": "s_d", + "pitch_names": "d_c_f", + "semitone_intervals": "-2_5", + "contour": "d_u", + "neume_names": "clivis_punctum", } self.assertEqual( - neume_component_docs_3_5_grams[0], - expected_first_neume_component_3gram, + pitch_3gram, + ngram_docs_2_3[4], + ) + self.assertEqual( + pitch_3gram, + ngram_docs_3_5[4], ) - with self.subTest("Second neume component 3-gram"): - expected_second_neume_component_3gram = { - "ngram_unit": "neume_component", + with self.subTest("Pitch 3-gram: last three pitches"): + # This 4-gram is constructed from the last three + # pitches of the test document. + pitch_3gram_1: NgramDocument = { "location": json.dumps( - [{"ulx": 2725, "uly": 2292, "width": 360, "height": 201}] + [{"ulx": 4811, "uly": 7724, "width": 364, "height": 150}] ), - "pitch_names": "d_c_f", - "intervals": "-2_5", - "contours": "d_u", + "pitch_names": "c_e_d", + "semitone_intervals": "4_-2", + "contour": "u_d", } - self.assertEqual( - neume_component_docs_3_5_grams[1], - expected_second_neume_component_3gram, + self.assertIn( + pitch_3gram_1, + ngram_docs_2_3, + ) + self.assertIn( + pitch_3gram_1, + ngram_docs_3_5, + ) + with self.subTest("Pitch 4-gram: last 4 pitches"): + # This 4-gram is constructed from the last four + # pitches of the test document. + pitch_4gram: NgramDocument = { + "location": json.dumps( + [{"ulx": 4750, "uly": 7724, "width": 425, "height": 150}] + ), + "pitch_names": "d_c_e_d", + "semitone_intervals": "-2_4_-2", + "contour": "d_u_d", + "neume_names": "clivis_clivis", + } + self.assertIn( + pitch_4gram, + ngram_docs_3_5, ) diff --git a/poetry.lock b/poetry.lock index ed4ad2a95..02d626e8c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -30,13 +30,13 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] [[package]] name = "astroid" -version = "3.1.0" +version = "3.2.0" description = "An abstract syntax tree for Python with inference support." optional = false python-versions = ">=3.8.0" files = [ - {file = "astroid-3.1.0-py3-none-any.whl", hash = "sha256:951798f922990137ac090c53af473db7ab4e70c770e6d7fae0cec59f74411819"}, - {file = "astroid-3.1.0.tar.gz", hash = "sha256:ac248253bfa4bd924a0de213707e7ebeeb3138abeb48d798784ead1e56d419d4"}, + {file = "astroid-3.2.0-py3-none-any.whl", hash = "sha256:16ee8ca5c75ac828783028cc1f967777f0e507c6886a295ad143e0f405b975a2"}, + {file = "astroid-3.2.0.tar.gz", hash = "sha256:f7f829f8506ade59f1b3c6c93d8fac5b1ebc721685fa9af23e9794daf1d450a3"}, ] [[package]] @@ -398,6 +398,42 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-stubs" +version = "4.2.7" +description = "Mypy stubs for Django" +optional = false +python-versions = ">=3.8" +files = [ + {file = "django-stubs-4.2.7.tar.gz", hash = "sha256:8ccd2ff4ee5adf22b9e3b7b1a516d2e1c2191e9d94e672c35cc2bc3dd61e0f6b"}, + {file = "django_stubs-4.2.7-py3-none-any.whl", hash = "sha256:4cf4de258fa71adc6f2799e983091b9d46cfc67c6eebc68fe111218c9a62b3b8"}, +] + +[package.dependencies] +django = "*" +django-stubs-ext = ">=4.2.7" +types-pytz = "*" +types-PyYAML = "*" +typing-extensions = "*" + +[package.extras] +compatible-mypy = ["mypy (>=1.7.0,<1.8.0)"] + +[[package]] +name = "django-stubs-ext" +version = "5.0.0" +description = "Monkey-patching and extensions for django-stubs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "django_stubs_ext-5.0.0-py3-none-any.whl", hash = "sha256:8e1334fdf0c8bff87e25d593b33d4247487338aaed943037826244ff788b56a8"}, + {file = "django_stubs_ext-5.0.0.tar.gz", hash = "sha256:5bacfbb498a206d5938454222b843d81da79ea8b6fcd1a59003f529e775bc115"}, +] + +[package.dependencies] +django = "*" +typing-extensions = "*" + [[package]] name = "djangorestframework" version = "3.15.1" @@ -412,6 +448,29 @@ files = [ [package.dependencies] django = ">=3.0" +[[package]] +name = "djangorestframework-stubs" +version = "3.14.5" +description = "PEP-484 stubs for django-rest-framework" +optional = false +python-versions = ">=3.8" +files = [ + {file = "djangorestframework-stubs-3.14.5.tar.gz", hash = "sha256:5dd6f638aa5291fb7863e6166128a6ed20bf4986e2fc5cf334e6afc841797a09"}, + {file = "djangorestframework_stubs-3.14.5-py3-none-any.whl", hash = "sha256:43d788fd50cda49b922cd411e59c5b8cdc3f3de49c02febae12ce42139f0269b"}, +] + +[package.dependencies] +django-stubs = ">=4.2.7" +requests = ">=2.0.0" +types-PyYAML = ">=5.4.3" +types-requests = ">=0.1.12" +typing-extensions = ">=3.10.0" + +[package.extras] +compatible-mypy = ["django-stubs[compatible-mypy]", "mypy (>=1.7.0,<1.8.0)"] +coreapi = ["coreapi (>=2.0.0)"] +markdown = ["types-Markdown (>=0.1.5)"] + [[package]] name = "future" version = "1.0.0" @@ -600,6 +659,7 @@ files = [ {file = "lxml-5.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9e2addd2d1866fe112bc6f80117bcc6bc25191c5ed1bfbcf9f1386a884252ae8"}, {file = "lxml-5.2.1-cp37-cp37m-win32.whl", hash = "sha256:f51969bac61441fd31f028d7b3b45962f3ecebf691a510495e5d2cd8c8092dbd"}, {file = "lxml-5.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c"}, + {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3e183c6e3298a2ed5af9d7a356ea823bccaab4ec2349dc9ed83999fd289d14d5"}, {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:804f74efe22b6a227306dd890eecc4f8c59ff25ca35f1f14e7482bbce96ef10b"}, {file = "lxml-5.2.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08802f0c56ed150cc6885ae0788a321b73505d2263ee56dad84d200cab11c07a"}, {file = "lxml-5.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f8c09ed18ecb4ebf23e02b8e7a22a05d6411911e6fabef3a36e4f371f4f2585"}, @@ -861,18 +921,19 @@ files = [ [[package]] name = "platformdirs" -version = "4.2.0" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +version = "4.2.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, - {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, + {file = "platformdirs-4.2.1-py3-none-any.whl", hash = "sha256:17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1"}, + {file = "platformdirs-4.2.1.tar.gz", hash = "sha256:031cd18d4ec63ec53e82dceaac0417d218a6863f7745dfcc9efe7793b7039bdf"}, ] [package.extras] docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] [[package]] name = "prompt-toolkit" @@ -913,17 +974,17 @@ test = ["anyio (>=3.6.2,<4.0)", "mypy (>=1.4.1)", "pproxy (>=2.7)", "pytest (>=6 [[package]] name = "pylint" -version = "3.1.0" +version = "3.2.0" description = "python code static checker" optional = false python-versions = ">=3.8.0" files = [ - {file = "pylint-3.1.0-py3-none-any.whl", hash = "sha256:507a5b60953874766d8a366e8e8c7af63e058b26345cfcb5f91f89d987fd6b74"}, - {file = "pylint-3.1.0.tar.gz", hash = "sha256:6a69beb4a6f63debebaab0a3477ecd0f559aa726af4954fc948c51f7a2549e23"}, + {file = "pylint-3.2.0-py3-none-any.whl", hash = "sha256:9f20c05398520474dac03d7abb21ab93181f91d4c110e1e0b32bc0d016c34fa4"}, + {file = "pylint-3.2.0.tar.gz", hash = "sha256:ad8baf17c8ea5502f23ae38d7c1b7ec78bd865ce34af9a0b986282e2611a8ff2"}, ] [package.dependencies] -astroid = ">=3.1.0,<=3.2.0-dev0" +astroid = ">=3.2.0,<=3.3.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = {version = ">=0.3.7", markers = "python_version >= \"3.12\""} isort = ">=4.2.5,<5.13.0 || >5.13.0,<6" @@ -935,6 +996,38 @@ tomlkit = ">=0.10.1" spelling = ["pyenchant (>=3.2,<4.0)"] testutils = ["gitpython (>3)"] +[[package]] +name = "pylint-django" +version = "2.5.5" +description = "A Pylint plugin to help Pylint understand the Django web framework" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "pylint_django-2.5.5-py3-none-any.whl", hash = "sha256:5abd5c2228e0e5e2a4cb6d0b4fc1d1cef1e773d0be911412f4dd4fc1a1a440b7"}, + {file = "pylint_django-2.5.5.tar.gz", hash = "sha256:2f339e4bf55776958283395c5139c37700c91bd5ef1d8251ef6ac88b5abbba9b"}, +] + +[package.dependencies] +pylint = ">=2.0,<4" +pylint-plugin-utils = ">=0.8" + +[package.extras] +with-django = ["Django (>=2.2)"] + +[[package]] +name = "pylint-plugin-utils" +version = "0.8.2" +description = "Utilities and helpers for writing Pylint plugins" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "pylint_plugin_utils-0.8.2-py3-none-any.whl", hash = "sha256:ae11664737aa2effbf26f973a9e0b6779ab7106ec0adc5fe104b0907ca04e507"}, + {file = "pylint_plugin_utils-0.8.2.tar.gz", hash = "sha256:d3cebf68a38ba3fba23a873809155562571386d4c1b03e5b4c4cc26c3eee93e4"}, +] + +[package.dependencies] +pylint = ">=1.7" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -974,7 +1067,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -1073,15 +1165,51 @@ doc = ["sphinx"] [[package]] name = "tomlkit" -version = "0.12.4" +version = "0.12.5" description = "Style preserving TOML library" optional = false python-versions = ">=3.7" files = [ - {file = "tomlkit-0.12.4-py3-none-any.whl", hash = "sha256:5cd82d48a3dd89dee1f9d64420aa20ae65cfbd00668d6f094d7578a78efbb77b"}, - {file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"}, + {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"}, + {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"}, +] + +[[package]] +name = "types-pytz" +version = "2024.1.0.20240417" +description = "Typing stubs for pytz" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-pytz-2024.1.0.20240417.tar.gz", hash = "sha256:6810c8a1f68f21fdf0f4f374a432487c77645a0ac0b31de4bf4690cf21ad3981"}, + {file = "types_pytz-2024.1.0.20240417-py3-none-any.whl", hash = "sha256:8335d443310e2db7b74e007414e74c4f53b67452c0cb0d228ca359ccfba59659"}, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.20240311" +description = "Typing stubs for PyYAML" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-PyYAML-6.0.12.20240311.tar.gz", hash = "sha256:a9e0f0f88dc835739b0c1ca51ee90d04ca2a897a71af79de9aec5f38cb0a5342"}, + {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"}, ] +[[package]] +name = "types-requests" +version = "2.31.0.20240406" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.31.0.20240406.tar.gz", hash = "sha256:4428df33c5503945c74b3f42e82b181e86ec7b724620419a2966e2de604ce1a1"}, + {file = "types_requests-2.31.0.20240406-py3-none-any.whl", hash = "sha256:6216cdac377c6b9a040ac1c0404f7284bd13199c0e1bb235f4324627e8898cf5"}, +] + +[package.dependencies] +urllib3 = ">=2" + [[package]] name = "typing-extensions" version = "4.11.0" @@ -1163,4 +1291,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "a62687b2b399b6549d26d179a7900297de68f3ea3a840da3f2b6b5814c3d20e6" +content-hash = "8959a6bca173000d1c97b85d5b45c4142bc927e78f93e6b5672c6d63fb8648bb" diff --git a/pyproject.toml b/pyproject.toml index ec8dc2387..fa2c7542e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,9 @@ black = "^24.4.2" mypy = "^1.10.0" pylint = "^3.1.0" lxml-stubs = "^0.5.1" +django-stubs = "^4.2.7" +pylint-django = "^2.5.5" +djangorestframework-stubs = "^3.14.5" [build-system] requires = ["poetry-core"]