Skip to content

Commit

Permalink
chore: <breaking> changed 'force_ocr' to kwarg
Browse files Browse the repository at this point in the history
  • Loading branch information
Goldziher committed Feb 11, 2025
1 parent 5cc770a commit 29731fc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions kreuzberg/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ class ExtractionResult(NamedTuple):
async def extract_bytes(
content: bytes,
mime_type: str,
force_ocr: bool = False,
*,
force_ocr: bool = False,
config: Config | None = None,
) -> ExtractionResult:
"""Extract the textual content from a given byte string representing a file's contents.
Expand Down Expand Up @@ -159,8 +159,8 @@ def extract_file_sync(
async def extract_file(
file_path: Path | str,
mime_type: str | None = None,
force_ocr: bool = False,
*,
force_ocr: bool = False,
config: Config | None = None,
) -> ExtractionResult:
"""Extract the textual content from a given file.
Expand Down
4 changes: 2 additions & 2 deletions tests/extraction_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ async def test_extract_bytes_pdf(pdf_document: Path) -> None:

async def test_extract_bytes_force_ocr_pdf(non_ascii_pdf: Path) -> None:
content = non_ascii_pdf.read_bytes()
result = await extract_bytes(content, PDF_MIME_TYPE, True)
result = await extract_bytes(content, PDF_MIME_TYPE, force_ocr=True)
assert result.mime_type == PLAIN_TEXT_MIME_TYPE
assert result.content.startswith("AMTSBLATT")
assert isinstance(result.content, str)
Expand Down Expand Up @@ -104,7 +104,7 @@ async def test_extract_file_pdf(pdf_document: Path) -> None:


async def test_extract_file_force_ocr_pdf(non_ascii_pdf: Path) -> None:
result = await extract_file(non_ascii_pdf, PDF_MIME_TYPE, True)
result = await extract_file(non_ascii_pdf, PDF_MIME_TYPE, force_ocr=True)
assert result.mime_type == PLAIN_TEXT_MIME_TYPE
assert result.content.startswith("AMTSBLATT")
assert isinstance(result.content, str)
Expand Down

0 comments on commit 29731fc

Please sign in to comment.