From ab2b45e180398ad9fb6d894679848dba7baf0915 Mon Sep 17 00:00:00 2001 From: Andrew Paseltiner Date: Sat, 13 Jul 2024 12:43:09 -0400 Subject: [PATCH] Fix m-070 - Treat glossary values as string literals, not patterns - Search for glossary values surrounded by word boundaries --- se/se_epub_lint.py | 2 +- tests/lint/metadata/m-070/golden/m-070-out.txt | 2 +- tests/lint/metadata/m-070/in/src/epub/text/chapter-1.xhtml | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py index 89b66c8e..dd9ca2a6 100644 --- a/se/se_epub_lint.py +++ b/se/se_epub_lint.py @@ -3619,7 +3619,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N nodes = dom.xpath("/html/body//dd[contains(@epub:type, 'glossdef')]") source_text = " ".join([node.inner_text() for node in nodes]) for glossary_index, glossary_value in enumerate(glossary_usage): - if glossary_value[1] is False and regex.search(glossary_value[0], source_text, flags=regex.IGNORECASE): + if glossary_value[1] is False and regex.search(r"\b\L\b", source_text, flags=regex.IGNORECASE, val=[glossary_value[0]]): glossary_usage[glossary_index] = (glossary_value[0], True) # Test against word boundaries to not match `halftitlepage` diff --git a/tests/lint/metadata/m-070/golden/m-070-out.txt b/tests/lint/metadata/m-070/golden/m-070-out.txt index 3992efad..6034b8e7 100644 --- a/tests/lint/metadata/m-070/golden/m-070-out.txt +++ b/tests/lint/metadata/m-070/golden/m-070-out.txt @@ -1,2 +1,2 @@ m-070 [Error] glossary-search-key-map.xml Glossary entry not found in the text. - R+L=J + foo diff --git a/tests/lint/metadata/m-070/in/src/epub/text/chapter-1.xhtml b/tests/lint/metadata/m-070/in/src/epub/text/chapter-1.xhtml index 35ad0f62..c4c65576 100644 --- a/tests/lint/metadata/m-070/in/src/epub/text/chapter-1.xhtml +++ b/tests/lint/metadata/m-070/in/src/epub/text/chapter-1.xhtml @@ -8,9 +8,7 @@

I

-

He ate some food.

-

A common theory was R+L=J.