add a test to cover raise_unicode_errors parameter

jsvine · Nov 22, 2024 · c80c78d · c80c78d
1 parent 1e4b48a
commit c80c78d
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 2 deletions.
diff --git a/pdfplumber/page.py b/pdfplumber/page.py
@@ -313,8 +313,8 @@ def parse(annot: T_obj) -> T_obj:
                             if self.pdf.raise_unicode_errors:
                                 raise
                             warn(
-                                f"Could not decode {k} for annotation."
-                                " {k} will be missing."
+                                f"Could not decode {k} of annotation."
+                                f" {k} will be missing."
                             )
 
             parsed = {

diff --git a/tests/pdfs/annotations-unicode-issues.pdf b/tests/pdfs/annotations-unicode-issues.pdf
diff --git a/tests/test_issues.py b/tests/test_issues.py
@@ -9,6 +9,8 @@
     resource = None
 import unittest
 
+import pytest
+
 import pdfplumber
 
 logging.disable(logging.ERROR)
@@ -332,3 +334,24 @@ def test_issue_1181(self):
                 ["Bar10", "Bar11", "Bar12"],
                 ["", "", ""],
             ]
+
+    def test_pr_1195(self):
+        """
+        In certain scenarios, annotations may include invalid or extraneous data that can
+        obstruct the annotation processing workflow.
+        To mitigate this, the raise_unicode_errors parameter in the PDF initializer
+        and the .open() method provides a configurable option to bypass these errors and
+        generate warnings instead, ensuring smoother handling of such anomalies.
+
+        The following tests verifies the functionality of the raise_unicode_errors parameter.
+        """
+        path = os.path.join(HERE, "pdfs/annotations-unicode-issues.pdf")
+        with pdfplumber.open(path) as pdf, pytest.raises(UnicodeDecodeError):
+            for _ in pdf.annots:
+                pass
+
+        with pdfplumber.open(path, raise_unicode_errors=False) as pdf, pytest.warns(
+            UserWarning
+        ):
+            for _ in pdf.annots:
+                pass