From 17a9b29bde1741b50f39e8e469798a7474eac919 Mon Sep 17 00:00:00 2001
From: Andrew Paseltiner
Date: Sat, 22 Jun 2024 14:54:45 -0400
Subject: [PATCH] Use += with lists to avoid unnecessary temporary list
creation
This also makes long lines easier to read and measure using a line-based
profiler.
---
se/commands/build_ids.py | 2 +-
se/formatting.py | 2 +-
se/se_epub.py | 14 +++++-----
se/se_epub_lint.py | 60 +++++++++++++++++++++-------------------
4 files changed, 41 insertions(+), 37 deletions(-)
diff --git a/se/commands/build_ids.py b/se/commands/build_ids.py
index 2726e92d..d8a418bf 100644
--- a/se/commands/build_ids.py
+++ b/se/commands/build_ids.py
@@ -69,7 +69,7 @@ def build_ids(plain_output: bool) -> int:
id_counter = id_counter + 1
# Now, get a list of what we expect all eligible IDs to be.
- replacements = replacements + se.formatting.find_unexpected_ids(dom)
+ replacements += se.formatting.find_unexpected_ids(dom)
# Write our wiped file, we'll update it later
with open(filename, "w", encoding="utf-8") as file:
diff --git a/se/formatting.py b/se/formatting.py
index 06b8888f..b53fa700 100644
--- a/se/formatting.py
+++ b/se/formatting.py
@@ -1533,7 +1533,7 @@ def _get_flattened_children(node: EasyXmlElement, allow_header: bool) -> List[Ea
if child.tag not in sectioning_elements and not is_endnote and not is_glossdef:
result.append(child)
- result = result + _get_flattened_children(child, allow_header)
+ result += _get_flattened_children(child, allow_header)
return result
diff --git a/se/se_epub.py b/se/se_epub.py
index a34780a3..4390a3cf 100644
--- a/se/se_epub.py
+++ b/se/se_epub.py
@@ -553,7 +553,7 @@ def recompose(self, output_xhtml5: bool, extra_css_file: Union[Path,None] = None
for filepath in css_filenames:
file_css = self.get_file(filepath)
- namespaces = namespaces + regex.findall(r"@namespace.+?;", file_css)
+ namespaces += regex.findall(r"@namespace.+?;", file_css)
file_css = regex.sub(r"\s*@(charset|namespace).+?;\s*", "\n", file_css).strip()
@@ -1248,15 +1248,15 @@ def generate_spine(self) -> se.easy_xml.EasyXmlElement:
halftitlepage, frontmatter = self.__add_to_spine([], frontmatter, "halftitlepage")
# Add any remaining frontmatter
- spine = spine + natsorted([file_path.name for file_path in frontmatter])
+ spine += natsorted([file_path.name for file_path in frontmatter])
# The half title page is always the last front matter
- spine = spine + halftitlepage
+ spine += halftitlepage
# Add bodymatter
spine, bodymatter = self.__add_to_spine(spine, bodymatter, "prologue")
- spine = spine + natsorted([file_path.name for file_path in bodymatter])
+ spine += natsorted([file_path.name for file_path in bodymatter])
# Add backmatter
spine, backmatter = self.__add_to_spine(spine, backmatter, "afterword")
@@ -1270,11 +1270,11 @@ def generate_spine(self) -> se.easy_xml.EasyXmlElement:
copyright_page, backmatter = self.__add_to_spine([], backmatter, "copyright-page")
# Add any remaining backmatter
- spine = spine + natsorted([file_path.name for file_path in backmatter])
+ spine += natsorted([file_path.name for file_path in backmatter])
# Colophon and copyright page are always last
- spine = spine + colophon
- spine = spine + copyright_page
+ spine += colophon
+ spine += copyright_page
# Now build the spine output
spine_xml = "\n"
diff --git a/se/se_epub_lint.py b/se/se_epub_lint.py
index 22309d4e..53a924c9 100644
--- a/se/se_epub_lint.py
+++ b/se/se_epub_lint.py
@@ -693,7 +693,8 @@ def _lint_metadata_checks(self) -> list:
messages.append(LintMessage("m-015", f"Metadata long description is not valid XHTML. LXML says: {ex}", se.MESSAGE_TYPE_ERROR, self.metadata_file_path))
# Check for apostrophes outside links in long description
- matches = regex.findall(r"’s", long_description) + regex.findall(r"s’", long_description)
+ matches = regex.findall(r"’s", long_description)
+ matches += regex.findall(r"s’", long_description)
if matches:
messages.append(LintMessage("m-044", "Possessive [text]’[/] or [text]’s[/] outside of [xhtml][/] element in long description.", se.MESSAGE_TYPE_ERROR, self.metadata_file_path, matches))
@@ -897,7 +898,7 @@ def _lint_metadata_checks(self) -> list:
# Check for common typos in description
for node in self.metadata_dom.xpath("/package/metadata/dc:description") + self.metadata_dom.xpath("/package/metadata/meta[@property='se:long-description']"):
matches = regex.findall(r"(?[/].", se.MESSAGE_TYPE_ERROR, filename, nodes))
@@ -2284,10 +2285,10 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
matches = [match for match in matches if "
" not in match and "" not in match]
# xpath to check for opening quote in p, without a next child p that starts with an opening quote or an opening bracket (for editorial insertions within paragraphs of quotation); or that consists of only an ellipses (like an elided part of a longer quotation)
# Matching s can't have a poem/verse ancestor as formatting is often special for those.
- matches = matches + [regex.findall(r"“[^”]+
", node.to_string())[0] for node in dom.xpath("/html/body//p[re:test(., '“[^‘”]+$')][not(ancestor::*[re:test(@epub:type, 'z3998:(verse|poem|song|hymn|lyrics)')])][(following-sibling::*[1])[name()='p'][not(re:test(normalize-space(.), '^[“\\[]') or re:test(normalize-space(.), '^…$'))]]")]
+ matches += [regex.findall(r"“[^”]+", node.to_string())[0] for node in dom.xpath("/html/body//p[re:test(., '“[^‘”]+$')][not(ancestor::*[re:test(@epub:type, 'z3998:(verse|poem|song|hymn|lyrics)')])][(following-sibling::*[1])[name()='p'][not(re:test(normalize-space(.), '^[“\\[]') or re:test(normalize-space(.), '^…$'))]]")]
# Additionally, match short tags (< 100 chars) that lack closing quote, and whose direct siblings do have closing quotes (to exclude runs of same-speaker dialog), and that is not within a blockquote, verse, or letter
- matches = matches + [regex.findall(r"“[^”]+
", node.to_string())[0] for node in dom.xpath("/html/body//p[re:test(., '“[^‘”]+$') and not(re:test(., '[…:]$')) and string-length(normalize-space(.)) <=100][(following-sibling::*[1])[not(re:test(., '“[^”]+$'))] and (preceding-sibling::*[1])[not(re:test(., '“[^”]+$'))]][not(ancestor::*[re:test(@epub:type, 'z3998:(verse|poem|song|hymn|lyrics)')]) and not(ancestor::blockquote) and not (ancestor::*[contains(@epub:type, 'z3998:letter')])][(following-sibling::*[1])[name()='p'][re:test(normalize-space(.), '^[“\\[]') and not(contains(., 'continued'))]]")]
+ matches += [regex.findall(r"“[^”]+", node.to_string())[0] for node in dom.xpath("/html/body//p[re:test(., '“[^‘”]+$') and not(re:test(., '[…:]$')) and string-length(normalize-space(.)) <=100][(following-sibling::*[1])[not(re:test(., '“[^”]+$'))] and (preceding-sibling::*[1])[not(re:test(., '“[^”]+$'))]][not(ancestor::*[re:test(@epub:type, 'z3998:(verse|poem|song|hymn|lyrics)')]) and not(ancestor::blockquote) and not (ancestor::*[contains(@epub:type, 'z3998:letter')])][(following-sibling::*[1])[name()='p'][re:test(normalize-space(.), '^[“\\[]') and not(contains(., 'continued'))]]")]
if matches:
messages.append(LintMessage("t-003", "[text]“[/] missing matching [text]”[/]. Note: When dialog from the same speaker spans multiple [xhtml][/] elements, it’s correct grammar to omit closing [text]”[/] until the last [xhtml]
[/] of dialog.", se.MESSAGE_TYPE_WARNING, filename, matches))
@@ -2313,7 +2314,9 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
# Check for repeated punctuation, but first remove `&` so we don't match `&,`
# Remove tds with repeated ” as they are probably ditto marks
- matches = regex.findall(r"[,;]{2,}.{0,20}", file_contents.replace("&", "")) + regex.findall(r"(?:“\s*“|”\s*”|’ ’|‘\s*‘).{0,20}", regex.sub(r"
[”\s]+?(.+?)? | ", "", file_contents)) + regex.findall(r"[\p{Letter}][,\.:;]\s[,\.:;]\s?[\p{Letter}<].{0,20}", file_contents)
+ matches = regex.findall(r"[,;]{2,}.{0,20}", file_contents.replace("&", ""))
+ matches += regex.findall(r"(?:“\s*“|”\s*”|’ ’|‘\s*‘).{0,20}", regex.sub(r"[”\s]+?(.+?)? | ", "", file_contents))
+ matches += regex.findall(r"[\p{Letter}][,\.:;]\s[,\.:;]\s?[\p{Letter}<].{0,20}", file_contents)
if matches:
messages.append(LintMessage("t-008", "Repeated punctuation.", se.MESSAGE_TYPE_WARNING, filename, matches))
@@ -2385,12 +2388,12 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
matches = [node.to_string() for node in dom.xpath("(//b | //i)[contains(@epub:type, 'se:name') and not(contains(@epub:type, 'z3998:stage-direction'))][(text()[last()])[re:test(., '[\\.,!\\?;:]$')]]")]
# Match b or i elements that are not stage directions, and that end in a comma followed by a lowercase letter
- matches = matches + [node.to_string() for node in dom.xpath("(//b | //i)[not(contains(@epub:type, 'z3998:stage-direction'))][(text()[last()])[re:test(., ',$')] and following-sibling::node()[re:test(., '^\\s*[a-z]')] ]")]
+ matches += [node.to_string() for node in dom.xpath("(//b | //i)[not(contains(@epub:type, 'z3998:stage-direction'))][(text()[last()])[re:test(., ',$')] and following-sibling::node()[re:test(., '^\\s*[a-z]')] ]")]
# ...and also check for ending punctuation inside em tags, if it looks like a *part* of a clause
# instead of a whole clause. If the is preceded by an em dash or quotes, or if there's punctuation
# and a space before it, then it's presumed to be a whole clause.
- matches = matches + [match.strip() for match in regex.findall(r"(?]|[!\.\?…;:]\s)(?:\w+?\s*)+[\.,\!\?;]", file_contents) if match.islower()]
+ matches += [match.strip() for match in regex.findall(r"(?]|[!\.\?…;:]\s)(?:\w+?\s*)+[\.,\!\?;]", file_contents) if match.islower()]
if matches:
messages.append(LintMessage("t-017", "Ending punctuation inside formatting like bold, small caps, or italics. Ending punctuation is only allowed within formatting if the phrase is an independent clause.", se.MESSAGE_TYPE_WARNING, filename, list(set(matches))))
@@ -2404,7 +2407,8 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
# Outer wrapping match is so that .findall returns the entire match and not the subgroup
# The first regex also matches the first few characters before the first double quote; we use those for more sophisticated
# checks below, to give fewer false positives like `with its downy red hairs and its “doigts de faune.”`
- matches = regex.findall(r"((?:.{1,2}\s)?“<(i|em)[^>]*?>[^<]+?\2>[\!\?\.])", file_contents) + regex.findall(r"([\.\!\?] <(i|em)[^>]*?>[^<]+?\2>[\!\?\.])", file_contents)
+ matches = regex.findall(r"((?:.{1,2}\s)?“<(i|em)[^>]*?>[^<]+?\2>[\!\?\.])", file_contents)
+ matches += regex.findall(r"([\.\!\?] <(i|em)[^>]*?>[^<]+?\2>[\!\?\.])", file_contents)
# But, if we've matched a name of something, don't include that as an error. For example, `He said, “The Decameron.”`
# We also exclude the match from the list if:
@@ -2496,7 +2500,8 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
messages.append(LintMessage("s-004", "[xhtml]img[/] element missing [attr]alt[/] attribute.", se.MESSAGE_TYPE_ERROR, filename, img_no_alt))
# Check for low-hanging misquoted fruit
- matches = regex.findall(r"[\p{Letter}]+[“‘]", file_contents) + regex.findall(r"[^>]+(?:em|i|b|span)>‘[\p{Lowercase_Letter}]+", file_contents)
+ matches = regex.findall(r"[\p{Letter}]+[“‘]", file_contents)
+ matches += regex.findall(r"[^>]+(?:em|i|b|span)>‘[\p{Lowercase_Letter}]+", file_contents)
if matches:
messages.append(LintMessage("t-028", "Possible mis-curled quotation mark.", se.MESSAGE_TYPE_WARNING, filename, matches))
@@ -2884,7 +2889,7 @@ def _lint_xhtml_typo_checks(filename: Path, dom: se.easy_xml.EasyXmlTree, file_c
if special_file != "titlepage":
# Don't check the titlepage because it has a standard format and may raise false positives
typos = regex.findall(r"(?= 2
ebook_flags["has_other_sources"] = other_source_count > 0
- messages = messages + _lint_metadata_checks(self)
+ messages += _lint_metadata_checks(self)
# Check for double spacing (done here so double_spaced_files doesn't have to be passed to function)
if self.metadata_dom.xpath(f"/package/metadata/*[re:test(., '[{se.NO_BREAK_SPACE}{se.HAIR_SPACE} ]{{2,}}')]"):
double_spaced_files.append(self.metadata_file_path)
# Check for malformed URLs
- messages = messages + _get_malformed_urls(self.metadata_dom, self.metadata_file_path)
+ messages += _get_malformed_urls(self.metadata_dom, self.metadata_file_path)
# Make sure some static files are unchanged
if self.is_se_ebook:
@@ -3472,7 +3477,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if filename.suffix in BINARY_EXTENSIONS or filename.name == "core.css":
if filename.suffix in (".jpg", ".jpeg", ".tif", ".tiff", ".png"):
- messages = messages + _lint_image_checks(self, filename)
+ messages += _lint_image_checks(self, filename)
continue
# Read the file and start doing some serious checks!
@@ -3495,7 +3500,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if filename.suffix == ".svg":
svg_dom = self.get_dom(filename)
- messages = messages + _lint_svg_checks(self, filename, file_contents, svg_dom, root)
+ messages += _lint_svg_checks(self, filename, file_contents, svg_dom, root)
if self.cover_path and filename.name == self.cover_path.name:
# For later comparison with titlepage
cover_svg_title = svg_dom.xpath("/svg/title/text()", True).replace("The cover for ", "") # can appear on any element in SVG, but we only want to check the root one
@@ -3537,10 +3542,10 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
css_filename = (filename.parent / node.get_attr("href")).resolve()
dom.apply_css(self.get_file(css_filename), str(css_filename))
- messages = messages + _get_malformed_urls(dom, filename)
+ messages += _get_malformed_urls(dom, filename)
# Extract ID attributes for later checks
- id_attrs = id_attrs + dom.xpath("//*[name() != 'section' and name() != 'article' and name() != 'figure' and name() != 'nav']/@id")
+ id_attrs += dom.xpath("//*[name() != 'section' and name() != 'article' and name() != 'figure' and name() != 'nav']/@id")
# Add to the short story count for later checks
short_story_count += len(dom.xpath("/html/body//article[contains(@epub:type, 'se:short-story')]"))
@@ -3654,23 +3659,22 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
special_file = None
if special_file in SPECIAL_FILES:
- messages = messages + _lint_special_file_checks(self, filename, dom, file_contents, ebook_flags, special_file)
+ messages += _lint_special_file_checks(self, filename, dom, file_contents, ebook_flags, special_file)
- missing_styles = missing_styles + _update_missing_styles(filename, dom, local_css)
+ missing_styles += _update_missing_styles(filename, dom, local_css)
- messages = messages + _lint_xhtml_css_checks(filename, dom, local_css_path)
+ messages += _lint_xhtml_css_checks(filename, dom, local_css_path)
- messages = messages + _lint_xhtml_metadata_checks(self, filename, dom)
+ messages += _lint_xhtml_metadata_checks(self, filename, dom)
- messages = messages + _lint_xhtml_syntax_checks(self, filename, dom, file_contents, ebook_flags, language, section_tree)
+ messages += _lint_xhtml_syntax_checks(self, filename, dom, file_contents, ebook_flags, language, section_tree)
(typography_messages, missing_files) = _lint_xhtml_typography_checks(filename, dom, file_contents, special_file, ebook_flags, missing_files, self)
- if typography_messages:
- messages = messages + typography_messages
+ messages += typography_messages
- messages = messages + _lint_xhtml_xhtml_checks(filename, dom, file_contents, local_css_path)
+ messages += _lint_xhtml_xhtml_checks(filename, dom, file_contents, local_css_path)
- messages = messages + _lint_xhtml_typo_checks(filename, dom, file_contents, special_file)
+ messages += _lint_xhtml_typo_checks(filename, dom, file_contents, special_file)
if self.cover_path and cover_svg_title != titlepage_svg_title:
messages.append(LintMessage("s-028", f"[path][link=file://{self.cover_path}]{self.cover_path.name}[/][/] and [path][link=file://{self.path / 'images/titlepage.svg'}]titlepage.svg[/][/] [xhtml][/] elements don’t match.", se.MESSAGE_TYPE_ERROR, self.cover_path))
@@ -3810,7 +3814,7 @@ def lint(self, skip_lint_ignore: bool, allowed_messages: Optional[List[str]] = N
if f"[epub|type~=\"{value}\"]" not in self.local_css:
missing_styles.append(element.to_tag_string())
- messages = messages + _lint_image_metadata_checks(self, ebook_flags["has_images"])
+ messages += _lint_image_metadata_checks(self, ebook_flags["has_images"])
if missing_styles:
messages.append(LintMessage("c-006", f"Semantic found, but missing corresponding style in [path][link=file://{local_css_path}]local.css[/][/].", se.MESSAGE_TYPE_ERROR, local_css_path, sorted(set(missing_styles))))