From d385aaf7fb49a482ef5c9f1bf32c5e0c751f1f8c Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Mon, 27 Jan 2025 13:19:07 +0100 Subject: [PATCH] Sort lines with identical vertical position by horizontal position in DataExtractor --- src/stratigraphy/data_extractor/data_extractor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/stratigraphy/data_extractor/data_extractor.py b/src/stratigraphy/data_extractor/data_extractor.py index 8ab6578..f8aec8b 100644 --- a/src/stratigraphy/data_extractor/data_extractor.py +++ b/src/stratigraphy/data_extractor/data_extractor.py @@ -195,8 +195,12 @@ def get_lines_near_key(self, lines, key_line: TextLine) -> list[TextLine]: feature_lines.insert(0, key_line) feature_lines = list(dict.fromkeys(feature_lines)) - # Sort by vertical distance between the top of the feature line and the top of key_line - feature_lines_sorted = sorted(feature_lines, key=lambda line: abs(line.rect.y0 - key_line.rect.y0)) + # Sort by + # - vertical distance between the top of the feature line and the top of key_line + # - horizontal position (left-first) for lines with identical vertical position + feature_lines_sorted = sorted( + feature_lines, key=lambda line: (abs(line.rect.y0 - key_line.rect.y0), line.rect.x0) + ) return feature_lines_sorted