Merge master

VikParuchuri · Feb 6, 2025 · d8350de · d8350de
2 parents 499ec89 + 06a3cc6
commit d8350de
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 5 deletions.
diff --git a/surya/detection/util.py b/surya/detection/util.py
@@ -27,10 +27,10 @@ def split_image(img, height):
             if bottom > img_height:
                 bottom = img_height
             cropped = img.crop((0, top, img.size[0], bottom))
-            height = bottom - top
-            if height < height:
+            chunk_height = bottom - top
+            if chunk_height < height:
                 cropped = ImageOps.pad(cropped, (img.size[0], height), color=255, centering=(0, 0))
             splits.append(cropped)
-            split_heights.append(height)
+            split_heights.append(chunk_height)
         return splits, split_heights
     return [img.copy()], [img_height]
diff --git a/surya/settings.py b/surya/settings.py
@@ -49,7 +49,7 @@ def TORCH_DEVICE_MODEL(self) -> str:
     DETECTOR_BENCH_DATASET_NAME: str = "vikp/doclaynet_bench"
     DETECTOR_IMAGE_CHUNK_HEIGHT: int = 1400 # Height at which to slice images vertically
     DETECTOR_TEXT_THRESHOLD: float = 0.6 # Threshold for text detection (above this is considered text)
-    DETECTOR_BLANK_THRESHOLD: float = 0.35 # Threshold for blank space (below this is considered blank)
+    DETECTOR_BLANK_THRESHOLD: float = 0.425 # Threshold for blank space (below this is considered blank)
     DETECTOR_POSTPROCESSING_CPU_WORKERS: int = min(8, os.cpu_count()) # Number of workers for postprocessing
     DETECTOR_MIN_PARALLEL_THRESH: int = 3 # Minimum number of images before we parallelize
     COMPILE_DETECTOR: bool = False

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -64,3 +64,11 @@ def test_image():
               font_size=24)
     return image
 
+@pytest.fixture()
+def test_image_tall():
+    image = Image.new("RGB", (4096, 4096), "white")
+    draw = ImageDraw.Draw(image)
+    draw.text((10, 10), "Hello World", fill="black", font_size=72)
+    draw.text((4000, 4000), "This is a sentence of text.\n\nNow it is a paragraph.\n\nA three-line one.", fill="black",  font_size=24)
+    return image
+
diff --git a/tests/test_detection.py b/tests/test_detection.py
@@ -5,4 +5,15 @@ def test_detection(detection_predictor, test_image):
     assert detection_results[0].image_bbox == [0, 0, 1024, 1024]
 
     bboxes = detection_results[0].bboxes
-    assert len(bboxes) == 4
+    assert len(bboxes) == 4
+
+
+def test_detection_chunking(detection_predictor, test_image_tall):
+    detection_results = detection_predictor([test_image_tall])
+
+    assert len(detection_results) == 1
+    assert detection_results[0].image_bbox == [0, 0, 4096, 4096]
+
+    bboxes = detection_results[0].bboxes
+    assert len(bboxes) >= 3 # Sometimes merges into 3
+    assert abs(4000 - bboxes[1].polygon[0][0]) < 50