Skip to content

Commit

Permalink
add a test to cover raise_unicode_errors parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
stolarczyk committed Nov 22, 2024
1 parent 1e4b48a commit c80c78d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
4 changes: 2 additions & 2 deletions pdfplumber/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ def parse(annot: T_obj) -> T_obj:
if self.pdf.raise_unicode_errors:
raise
warn(
f"Could not decode {k} for annotation."
" {k} will be missing."
f"Could not decode {k} of annotation."
f" {k} will be missing."
)

parsed = {
Expand Down
Binary file added tests/pdfs/annotations-unicode-issues.pdf
Binary file not shown.
23 changes: 23 additions & 0 deletions tests/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
resource = None
import unittest

import pytest

import pdfplumber

logging.disable(logging.ERROR)
Expand Down Expand Up @@ -332,3 +334,24 @@ def test_issue_1181(self):
["Bar10", "Bar11", "Bar12"],
["", "", ""],
]

def test_pr_1195(self):
"""
In certain scenarios, annotations may include invalid or extraneous data that can
obstruct the annotation processing workflow.
To mitigate this, the raise_unicode_errors parameter in the PDF initializer
and the .open() method provides a configurable option to bypass these errors and
generate warnings instead, ensuring smoother handling of such anomalies.
The following tests verifies the functionality of the raise_unicode_errors parameter.
"""
path = os.path.join(HERE, "pdfs/annotations-unicode-issues.pdf")
with pdfplumber.open(path) as pdf, pytest.raises(UnicodeDecodeError):
for _ in pdf.annots:
pass

with pdfplumber.open(path, raise_unicode_errors=False) as pdf, pytest.warns(
UserWarning
):
for _ in pdf.annots:
pass

0 comments on commit c80c78d

Please sign in to comment.