From 472e1a242612ae3f7f4ce3e26738e94f09b92d65 Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 08:30:47 +0100 Subject: [PATCH 1/8] LGVISIUM-102: merge AToBInterval and AToBDepthColumnEntry --- .../depthcolumn/depthcolumnentry.py | 150 ------------------ src/stratigraphy/depthcolumnentry/__init__.py | 6 + .../depthcolumnentry/depthcolumnentry.py | 35 ++++ .../depthcolumnentry_extractor.py | 45 ++++++ src/stratigraphy/depthcolumnentry/util.py | 10 ++ src/stratigraphy/layer/layer.py | 2 +- src/stratigraphy/sidebar/a_above_b_sidebar.py | 2 +- .../sidebar/a_above_b_sidebar_extractor.py | 4 +- .../sidebar/a_above_b_sidebar_validator.py | 2 +- src/stratigraphy/sidebar/a_to_b_sidebar.py | 5 +- .../sidebar/a_to_b_sidebar_extractor.py | 7 +- src/stratigraphy/sidebar/sidebar.py | 2 +- .../util/a_to_b_interval_extractor.py | 82 ++++++++++ src/stratigraphy/util/interval.py | 50 +----- tests/test_depthcolumn.py | 2 +- tests/test_find_sidebar.py | 8 +- tests/test_interval.py | 5 +- 17 files changed, 205 insertions(+), 212 deletions(-) delete mode 100644 src/stratigraphy/depthcolumn/depthcolumnentry.py create mode 100644 src/stratigraphy/depthcolumnentry/__init__.py create mode 100644 src/stratigraphy/depthcolumnentry/depthcolumnentry.py create mode 100644 src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py create mode 100644 src/stratigraphy/depthcolumnentry/util.py create mode 100644 src/stratigraphy/util/a_to_b_interval_extractor.py diff --git a/src/stratigraphy/depthcolumn/depthcolumnentry.py b/src/stratigraphy/depthcolumn/depthcolumnentry.py deleted file mode 100644 index f20200f..0000000 --- a/src/stratigraphy/depthcolumn/depthcolumnentry.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Contains dataclasses for entries in a depth column.""" - -from __future__ import annotations - -import re -from dataclasses import dataclass -from typing import Any - -import fitz -from stratigraphy.lines.line import TextWord - - -@dataclass -class DepthColumnEntry: # noqa: D101 - """Class to represent a depth column entry.""" - - rect: fitz.Rect - value: float - - def __repr__(self) -> str: - return str(self.value) - - def to_json(self) -> dict[str, Any]: - """Convert the depth column entry to a JSON serializable format.""" - return {"value": self.value, "rect": [self.rect.x0, self.rect.y0, self.rect.x1, self.rect.y1]} - - @classmethod - def from_json(cls, data: dict) -> DepthColumnEntry: - """Converts a dictionary to an object. - - Args: - data (dict): A dictionary representing the depth column entry. - - Returns: - DepthColumnEntry: The depth column entry object. - """ - return cls(rect=fitz.Rect(data["rect"]), value=data["value"]) - - @classmethod - def find_in_words(cls, all_words: list[TextWord], include_splits: bool) -> list[DepthColumnEntry]: - """Find all depth column entries given a list of TextWord objects. - - Note: Only depths up to two digits before the decimal point are supported. - - Args: - all_words (list[TextWord]): List of text words to extract depth column entries from. - include_splits (bool): Whether to include split entries. - - Returns: - list[DepthColumnEntry]: The extracted depth column entries. - """ - entries = [] - for word in sorted(all_words, key=lambda word: word.rect.y0): - try: - input_string = word.text.strip().replace(",", ".") - regex = re.compile(r"^-?\.?([0-9]+(\.[0-9]+)?)[müMN\\.]*$") - # numbers such as '.40' are not supported. The reason is that sometimes the OCR - # recognizes a '-' as a '.' and we just ommit the leading '.' to avoid this issue. - match = regex.match(input_string) - if match: - value = value_as_float(match.group(1)) - entries.append(DepthColumnEntry(word.rect, value)) - elif include_splits: - # support for e.g. "1.10-1.60m" extracted as a single word - a_to_b_depth_column_entry = AToBDepthColumnEntry.from_text(input_string, word.rect) - entries.extend( - [a_to_b_depth_column_entry.start, a_to_b_depth_column_entry.end] - if a_to_b_depth_column_entry - else [] - ) - except ValueError: - pass - return entries - - -@dataclass -class AToBDepthColumnEntry: # noqa: D101 - """Class to represent a depth column entry of the form "1m - 3m".""" - - # TODO do we need both this class as well as AToBInterval, or can we combine the two classes? - - start: DepthColumnEntry - end: DepthColumnEntry - - def __repr__(self) -> str: - return f"{self.start.value}-{self.end.value}" - - @property - def rect(self) -> fitz.Rect: - """Get the rectangle of the layer depth column entry.""" - return fitz.Rect(self.start.rect).include_rect(self.end.rect) - - def to_json(self) -> dict[str, Any]: - """Convert the layer depth column entry to a JSON serializable format.""" - return {"start": self.start.to_json(), "end": self.end.to_json()} - - @classmethod - def from_json(cls, data: dict) -> AToBDepthColumnEntry: - """Converts a dictionary to an object. - - Args: - data (dict): A dictionary representing the layer depth column entry. - - Returns: - AToBDepthColumnEntry: The A-to-B depth column entry object. - """ - start = DepthColumnEntry.from_json(data["start"]) - end = DepthColumnEntry.from_json(data["end"]) - return cls(start, end) - - @classmethod - def from_text( - cls, text: str, rect: fitz.Rect, require_start_of_string: bool = True - ) -> AToBDepthColumnEntry | None: - """Attempts to extract a AToBDepthColumnEntry from a string. - - Args: - text (str): The string to extract the depth interval from. - rect (fitz.Rect): The rectangle of the text. - require_start_of_string (bool, optional): Whether the number to extract needs to be - at the start of a string. Defaults to True. - - Returns: - AToBDepthColumnEntry | None: The extracted LayerDepthColumnEntry or None if none is found. - """ - input_string = text.strip().replace(",", ".") - - query = r"-?([0-9]+(\.[0-9]+)?)[müMN\]*[\s-]+([0-9]+(\.[0-9]+)?)[müMN\\.]*" - if not require_start_of_string: - query = r".*?" + query - regex = re.compile(query) - match = regex.match(input_string) - if match: - value1 = value_as_float(match.group(1)) - first_half_rect = fitz.Rect(rect.x0, rect.y0, rect.x1 - rect.width / 2, rect.y1) - - value2 = value_as_float(match.group(3)) - second_half_rect = fitz.Rect(rect.x0 + rect.width / 2, rect.y0, rect.x1, rect.y1) - return AToBDepthColumnEntry( - DepthColumnEntry(first_half_rect, value1), - DepthColumnEntry(second_half_rect, value2), - ) - return None - - -def value_as_float(string_value: str) -> float: # noqa: D103 - """Converts a string to a float.""" - # OCR sometimes tends to miss the decimal comma - parsed_text = re.sub(r"^-?([0-9]+)([0-9]{2})", r"\1.\2", string_value) - return abs(float(parsed_text)) diff --git a/src/stratigraphy/depthcolumnentry/__init__.py b/src/stratigraphy/depthcolumnentry/__init__.py new file mode 100644 index 0000000..6bba7e8 --- /dev/null +++ b/src/stratigraphy/depthcolumnentry/__init__.py @@ -0,0 +1,6 @@ +"""Modules for depth column entries, values that indicate the measured depth of an interface between layers.""" + +from .depthcolumnentry import DepthColumnEntry +from .depthcolumnentry_extractor import DepthColumnEntryExtractor + +__all__ = ["DepthColumnEntry", "DepthColumnEntryExtractor"] diff --git a/src/stratigraphy/depthcolumnentry/depthcolumnentry.py b/src/stratigraphy/depthcolumnentry/depthcolumnentry.py new file mode 100644 index 0000000..45de275 --- /dev/null +++ b/src/stratigraphy/depthcolumnentry/depthcolumnentry.py @@ -0,0 +1,35 @@ +"""Contains a dataclass for depth column entries, which indicate the measured depth of an interface between layers.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import fitz + + +@dataclass +class DepthColumnEntry: # noqa: D101 + """Class to represent a depth column entry.""" + + rect: fitz.Rect + value: float + + def __repr__(self) -> str: + return str(self.value) + + def to_json(self) -> dict[str, Any]: + """Convert the depth column entry to a JSON serializable format.""" + return {"value": self.value, "rect": [self.rect.x0, self.rect.y0, self.rect.x1, self.rect.y1]} + + @classmethod + def from_json(cls, data: dict) -> DepthColumnEntry: + """Converts a dictionary to an object. + + Args: + data (dict): A dictionary representing the depth column entry. + + Returns: + DepthColumnEntry: The depth column entry object. + """ + return cls(rect=fitz.Rect(data["rect"]), value=data["value"]) diff --git a/src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py b/src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py new file mode 100644 index 0000000..c9d8ebf --- /dev/null +++ b/src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py @@ -0,0 +1,45 @@ +"""Contains logic for finding depth column entries in text.""" + +import re + +from stratigraphy.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry.util import value_as_float +from stratigraphy.lines.line import TextWord +from stratigraphy.util.a_to_b_interval_extractor import AToBIntervalExtractor + + +class DepthColumnEntryExtractor: + """Methods for finding depth column entries in a text.""" + + @classmethod + def find_in_words(cls, all_words: list[TextWord], include_splits: bool) -> list[DepthColumnEntry]: + """Find all depth column entries given a list of TextWord objects. + + Note: Only depths up to two digits before the decimal point are supported. + + Args: + all_words (list[TextWord]): List of text words to extract depth column entries from. + include_splits (bool): Whether to include split entries. + + Returns: + list[DepthColumnEntry]: The extracted depth column entries. + """ + entries = [] + for word in sorted(all_words, key=lambda word: word.rect.y0): + try: + input_string = word.text.strip().replace(",", ".") + regex = re.compile(r"^-?\.?([0-9]+(\.[0-9]+)?)[müMN\\.]*$") + # numbers such as '.40' are not supported. The reason is that sometimes the OCR + # recognizes a '-' as a '.' and we just ommit the leading '.' to avoid this issue. + match = regex.match(input_string) + if match: + value = value_as_float(match.group(1)) + entries.append(DepthColumnEntry(word.rect, value)) + + elif include_splits: + # support for e.g. "1.10-1.60m" extracted as a single word + a_to_b_interval = AToBIntervalExtractor.from_text(input_string, word.rect) + entries.extend([a_to_b_interval.start, a_to_b_interval.end] if a_to_b_interval else []) + except ValueError: + pass + return entries diff --git a/src/stratigraphy/depthcolumnentry/util.py b/src/stratigraphy/depthcolumnentry/util.py new file mode 100644 index 0000000..eaaeaa7 --- /dev/null +++ b/src/stratigraphy/depthcolumnentry/util.py @@ -0,0 +1,10 @@ +"""Contains utility functions for depth column entries.""" + +import re + + +def value_as_float(string_value: str) -> float: # noqa: D103 + """Converts a string to a float.""" + # OCR sometimes tends to miss the decimal comma + parsed_text = re.sub(r"^-?([0-9]+)([0-9]{2})", r"\1.\2", string_value) + return abs(float(parsed_text)) diff --git a/src/stratigraphy/layer/layer.py b/src/stratigraphy/layer/layer.py index a653d31..58a798a 100644 --- a/src/stratigraphy/layer/layer.py +++ b/src/stratigraphy/layer/layer.py @@ -4,7 +4,7 @@ import fitz from stratigraphy.data_extractor.data_extractor import ExtractedFeature, FeatureOnPage -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntry from stratigraphy.text.textblock import MaterialDescription, TextBlock from stratigraphy.util.interval import AAboveBInterval, Interval from stratigraphy.util.util import parse_text diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar.py b/src/stratigraphy/sidebar/a_above_b_sidebar.py index 7a13ad5..be9f127 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar.py @@ -7,7 +7,7 @@ import fitz import numpy as np -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntry from stratigraphy.lines.line import TextLine from stratigraphy.text.find_description import get_description_blocks from stratigraphy.util.dataclasses import Line diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py b/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py index a8391e0..5ce9131 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py @@ -2,7 +2,7 @@ import fitz -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntryExtractor from stratigraphy.lines.line import TextWord from stratigraphy.sidebar.a_above_b_sidebar import AAboveBSidebar from stratigraphy.sidebar.a_above_b_sidebar_validator import AAboveBSidebarValidator @@ -27,7 +27,7 @@ def find_in_words( """ entries = [ entry - for entry in DepthColumnEntry.find_in_words(all_words, include_splits=False) + for entry in DepthColumnEntryExtractor.find_in_words(all_words, include_splits=False) if entry.rect not in used_entry_rects ] diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py b/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py index 3dbd012..ed65cc4 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py @@ -2,7 +2,7 @@ import dataclasses -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntry from stratigraphy.lines.line import TextWord from .a_above_b_sidebar import AAboveBSidebar diff --git a/src/stratigraphy/sidebar/a_to_b_sidebar.py b/src/stratigraphy/sidebar/a_to_b_sidebar.py index d1e1fe1..995cfe7 100644 --- a/src/stratigraphy/sidebar/a_to_b_sidebar.py +++ b/src/stratigraphy/sidebar/a_to_b_sidebar.py @@ -6,7 +6,6 @@ import fitz -from stratigraphy.depthcolumn.depthcolumnentry import AToBDepthColumnEntry from stratigraphy.lines.line import TextLine from stratigraphy.util.dataclasses import Line from stratigraphy.util.interval import AToBInterval @@ -16,7 +15,7 @@ @dataclass -class AToBSidebar(Sidebar[AToBDepthColumnEntry]): +class AToBSidebar(Sidebar[AToBInterval]): """Represents a sidebar where the upper and lower depths of each layer are explicitly specified. Example:: @@ -26,7 +25,7 @@ class AToBSidebar(Sidebar[AToBDepthColumnEntry]): ... """ - entries: list[AToBDepthColumnEntry] + entries: list[AToBInterval] def __repr__(self): """Converts the object to a string. diff --git a/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py b/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py index 2751e4d..c972ae8 100644 --- a/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py +++ b/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py @@ -2,9 +2,10 @@ import re -from stratigraphy.depthcolumn.depthcolumnentry import AToBDepthColumnEntry, DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntry, DepthColumnEntryExtractor from stratigraphy.lines.line import TextWord from stratigraphy.sidebar import AToBSidebar +from stratigraphy.util.interval import AToBInterval class AToBSidebarExtractor: @@ -27,7 +28,7 @@ def find_in_words(all_words: list[TextWord]) -> list[AToBSidebar]: Returns: list[AToBSidebar]: List of all AToBSidebars identified. """ - entries = DepthColumnEntry.find_in_words(all_words, include_splits=True) + entries = DepthColumnEntryExtractor.find_in_words(all_words, include_splits=True) def find_pair(entry: DepthColumnEntry) -> DepthColumnEntry | None: # noqa: D103 min_y0 = entry.rect.y0 - entry.rect.height / 2 @@ -57,7 +58,7 @@ def find_pair(entry: DepthColumnEntry) -> DepthColumnEntry | None: # noqa: D103 sidebars = [] for first, second in pairs: if second is not None: - entry = AToBDepthColumnEntry(first, second) + entry = AToBInterval(first, second) is_matched = False for sidebar in sidebars: column_rect = sidebar.rect() diff --git a/src/stratigraphy/sidebar/sidebar.py b/src/stratigraphy/sidebar/sidebar.py index b53781a..0b0f256 100644 --- a/src/stratigraphy/sidebar/sidebar.py +++ b/src/stratigraphy/sidebar/sidebar.py @@ -8,7 +8,7 @@ import fitz -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntry from stratigraphy.lines.line import TextLine, TextWord from stratigraphy.sidebar.interval_block_group import IntervalBlockGroup from stratigraphy.util.dataclasses import Line diff --git a/src/stratigraphy/util/a_to_b_interval_extractor.py b/src/stratigraphy/util/a_to_b_interval_extractor.py new file mode 100644 index 0000000..445b3e1 --- /dev/null +++ b/src/stratigraphy/util/a_to_b_interval_extractor.py @@ -0,0 +1,82 @@ +"""Contains logic for finding AToBInterval instances in a text.""" + +import re + +import fitz + +from stratigraphy.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry.util import value_as_float +from stratigraphy.lines.line import TextLine +from stratigraphy.util.interval import AToBInterval + + +class AToBIntervalExtractor: + """Methods for finding AToBInterval instances (e.g. "0.5m - 1.8m") in a text.""" + + @classmethod + def get_depth_interval_from_lines(cls, lines: list[TextLine]) -> AToBInterval | None: + """Extract depth interval from text lines. + + For borehole profiles in the Deriaz layout, the depth interval is usually found in the text of the material + description. Often, these text descriptions contain a further separation into multiple sub layers. + These sub layers have their own depth intervals. This function extracts the overall depth interval, + spanning across all mentioned sub layers. + + Args: + lines (list[TextLine]): The lines to extract the depth interval from. + + Returns: + AToBInterval | None: The depth interval (if any) or None (if no depth interval was found). + """ + depth_entries = [] + for line in lines: + try: + a_to_b_depth_entry = AToBIntervalExtractor.from_text( + line.text, line.rect, require_start_of_string=False + ) + # require_start_of_string = False because the depth interval may not always start at the beginning + # of the line e.g. "Remblais Heterogene: 0.00 - 0.5m" + if a_to_b_depth_entry: + depth_entries.append(a_to_b_depth_entry) + except ValueError: + pass + + if depth_entries: + # Merge the sub layers into one depth interval. + start = min([entry.start for entry in depth_entries], key=lambda start_entry: start_entry.value) + end = max([entry.end for entry in depth_entries], key=lambda end_entry: end_entry.value) + return AToBInterval(start, end) + else: + return None + + @classmethod + def from_text(cls, text: str, rect: fitz.Rect, require_start_of_string: bool = True) -> AToBInterval | None: + """Attempts to extract a AToBInterval from a string. + + Args: + text (str): The string to extract the depth interval from. + rect (fitz.Rect): The rectangle of the text. + require_start_of_string (bool, optional): Whether the number to extract needs to be + at the start of a string. Defaults to True. + + Returns: + AToBInterval | None: The extracted AToBInterval or None if none is found. + """ + input_string = text.strip().replace(",", ".") + + query = r"-?([0-9]+(\.[0-9]+)?)[müMN\]*[\s-]+([0-9]+(\.[0-9]+)?)[müMN\\.]*" + if not require_start_of_string: + query = r".*?" + query + regex = re.compile(query) + match = regex.match(input_string) + if match: + value1 = value_as_float(match.group(1)) + first_half_rect = fitz.Rect(rect.x0, rect.y0, rect.x1 - rect.width / 2, rect.y1) + + value2 = value_as_float(match.group(3)) + second_half_rect = fitz.Rect(rect.x0 + rect.width / 2, rect.y0, rect.x1, rect.y1) + return AToBInterval( + DepthColumnEntry(first_half_rect, value1), + DepthColumnEntry(second_half_rect, value2), + ) + return None diff --git a/src/stratigraphy/util/interval.py b/src/stratigraphy/util/interval.py index 479e8ff..3aebe0a 100644 --- a/src/stratigraphy/util/interval.py +++ b/src/stratigraphy/util/interval.py @@ -6,10 +6,7 @@ import fitz -from stratigraphy.depthcolumn.depthcolumnentry import ( - AToBDepthColumnEntry, - DepthColumnEntry, -) +from stratigraphy.depthcolumnentry import DepthColumnEntry from stratigraphy.lines.line import TextLine from stratigraphy.text.textblock import TextBlock @@ -143,9 +140,13 @@ def matching_blocks( class AToBInterval(Interval): """Class for intervals that are defined in a single line like "1.00 - 2.30m".""" - def __init__(self, layer_depth_column_entry: AToBDepthColumnEntry): - self.entry = layer_depth_column_entry - super().__init__(layer_depth_column_entry.start, layer_depth_column_entry.end) + def __init__(self, start: DepthColumnEntry, end: DepthColumnEntry): + super().__init__(start, end) + + @property + def rect(self) -> fitz.Rect: + """Get the rectangle surrounding the interval.""" + return fitz.Rect(self.start.rect).include_rect(self.end.rect) @property def line_anchor(self) -> fitz.Point | None: @@ -177,38 +178,3 @@ def matching_blocks( return [TextBlock(matched_lines)] else: return [] - - @classmethod - def get_depth_interval_from_lines(cls, lines: list[TextLine]) -> AToBInterval | None: - """Extract depth interval from text lines. - - For borehole profiles in the Deriaz layout, the depth interval is usually found in the text of the material - description. Often, these text descriptions contain a further separation into multiple sub layers. - These sub layers have their own depth intervals. This function extracts the overall depth interval, - spanning across all mentioned sub layers. - - Args: - lines (list[TextLine]): The lines to extract the depth interval from. - - Returns: - AToBInterval | None: The depth interval (if any) or None (if no depth interval was found). - """ - depth_entries = [] - for line in lines: - try: - layer_depth_entry = AToBDepthColumnEntry.from_text(line.text, line.rect, require_start_of_string=False) - # require_start_of_string = False because the depth interval may not always start at the beginning - # of the line e.g. "Remblais Heterogene: 0.00 - 0.5m" - if layer_depth_entry: - depth_entries.append(layer_depth_entry) - except ValueError: - pass - - if depth_entries: - # Merge the sub layers into one depth interval. - start = min([entry.start for entry in depth_entries], key=lambda start_entry: start_entry.value) - end = max([entry.end for entry in depth_entries], key=lambda end_entry: end_entry.value) - - return AToBInterval(AToBDepthColumnEntry(start, end)) - else: - return None diff --git a/tests/test_depthcolumn.py b/tests/test_depthcolumn.py index 2ca9817..099946c 100644 --- a/tests/test_depthcolumn.py +++ b/tests/test_depthcolumn.py @@ -1,7 +1,7 @@ """Test suite for the find_depth_columns module.""" import fitz -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry.depthcolumnentry import DepthColumnEntry from stratigraphy.sidebar import AAboveBSidebar diff --git a/tests/test_find_sidebar.py b/tests/test_find_sidebar.py index 51178ec..c7b2ff7 100644 --- a/tests/test_find_sidebar.py +++ b/tests/test_find_sidebar.py @@ -2,7 +2,7 @@ import fitz import pytest -from stratigraphy.depthcolumn.depthcolumnentry import DepthColumnEntry +from stratigraphy.depthcolumnentry import DepthColumnEntryExtractor from stratigraphy.lines.line import TextWord from stratigraphy.sidebar import AAboveBSidebarExtractor, AToBSidebarExtractor @@ -17,7 +17,7 @@ def test_depth_column_entries(): # noqa: D103 TextWord(fitz.Rect(0, 4, 5, 5), "30.0m", PAGE_NUMBER), TextWord(fitz.Rect(0, 6, 5, 7), "40.0m", PAGE_NUMBER), ] - entries = DepthColumnEntry.find_in_words(all_words, include_splits=False) + entries = DepthColumnEntryExtractor.find_in_words(all_words, include_splits=False) assert len(entries) == 4, "There should be 4 entries" assert pytest.approx(entries[0].value) == 10.0, "The first entry should have a value of 10.0" assert pytest.approx(entries[1].value) == 20.0, "The second entry should have a value of 20.0" @@ -31,7 +31,7 @@ def test_depth_column_entries_with_splits(): # noqa: D103 TextWord(fitz.Rect(0, 0, 10, 1), "10.00-20.0m", PAGE_NUMBER), TextWord(fitz.Rect(0, 2, 10, 3), "30.0-40.0m", PAGE_NUMBER), ] - entries = DepthColumnEntry.find_in_words(all_words, include_splits=True) + entries = DepthColumnEntryExtractor.find_in_words(all_words, include_splits=True) assert len(entries) == 4, "There should be 4 entries" assert entries[0].value == 10.0, "The first entry should have a value of 10.0" assert entries[1].value == 20.0, "The second entry should have a value of 20.0" @@ -47,7 +47,7 @@ def test_depth_column_entries_with_leading_character(): # noqa: D103 TextWord(fitz.Rect(0, 4, 5, 5), "-3.0m", PAGE_NUMBER), TextWord(fitz.Rect(0, 6, 5, 7), ".4.2m", PAGE_NUMBER), ] - entries = DepthColumnEntry.find_in_words(all_words, include_splits=True) + entries = DepthColumnEntryExtractor.find_in_words(all_words, include_splits=True) assert len(entries) == 4, "There should be 4 entries" assert entries[0].value == 0.0, "The first entry should have a value of 0" assert entries[1].value == 2.0, "The second entry should have a value of 2.0" diff --git a/tests/test_interval.py b/tests/test_interval.py index 7d6d411..5bc8492 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -1,7 +1,7 @@ """Test suite for the interval module.""" import fitz -from stratigraphy.depthcolumn.depthcolumnentry import AToBDepthColumnEntry, DepthColumnEntry +from stratigraphy.depthcolumnentry.depthcolumnentry import DepthColumnEntry from stratigraphy.util.interval import AAboveBInterval, AToBInterval @@ -27,8 +27,7 @@ def test_line_anchor(): # noqa: D103 start = DepthColumnEntry(fitz.Rect(0, 0, 1, 1), 5) end = DepthColumnEntry(fitz.Rect(2, 0, 3, 1), 10) - entry = AToBDepthColumnEntry(start, end) - layer_interval = AToBInterval(entry) + layer_interval = AToBInterval(start, end) assert layer_interval.line_anchor == fitz.Point( 3, 0.5 ), "The 'line anchor' for a LayerInterval should be the midpoint of the right-hand-side of the end rect." From 9dace26f5e11307b588c4b29e0efe06f1bbfd782 Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 12:09:24 +0100 Subject: [PATCH 2/8] LGVISIUM-102: rename intervals in tests --- tests/test_interval.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/test_interval.py b/tests/test_interval.py index 5bc8492..2c27ad0 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -6,42 +6,42 @@ def test_line_anchor(): # noqa: D103 - """Test the line anchor property of the BoundaryInterval and LayerInterval classes.""" + """Test the line anchor property of the AAboveBInterval and AToBInterval classes.""" start = DepthColumnEntry(fitz.Rect(0, 0, 1, 1), 5) end = DepthColumnEntry(fitz.Rect(0, 2, 1, 3), 10) - boundary_interval = AAboveBInterval(start, end) - assert boundary_interval.line_anchor == fitz.Point(1, 1.5), ( - "The 'line anchor' for a BoundaryInterval should be halfway between the bottom-right of the start depth and " + a_above_b_interval = AAboveBInterval(start, end) + assert a_above_b_interval.line_anchor == fitz.Point(1, 1.5), ( + "The 'line anchor' for an AAboveBInterval should be halfway between the bottom-right of the start depth and " "the top-right of the end depth." ) - boundary_interval = AAboveBInterval(start, end=None) - assert boundary_interval.line_anchor == fitz.Point( + a_above_b_interval = AAboveBInterval(start, end=None) + assert a_above_b_interval.line_anchor == fitz.Point( 1, 1 - ), "The 'line anchor' for a BoundaryInterval without end should be the bottom-right of the start depth." + ), "The 'line anchor' for an AAboveBInterval without end should be the bottom-right of the start depth." - boundary_interval = AAboveBInterval(start=None, end=end) - assert boundary_interval.line_anchor == fitz.Point( + a_above_b_interval = AAboveBInterval(start=None, end=end) + assert a_above_b_interval.line_anchor == fitz.Point( 1, 2 - ), "The 'line anchor' for a BoundaryInterval without start should be the top-right of the end depth." + ), "The 'line anchor' for a AAboveBInterval without start should be the top-right of the end depth." start = DepthColumnEntry(fitz.Rect(0, 0, 1, 1), 5) end = DepthColumnEntry(fitz.Rect(2, 0, 3, 1), 10) - layer_interval = AToBInterval(start, end) - assert layer_interval.line_anchor == fitz.Point( + a_to_b_interval = AToBInterval(start, end) + assert a_to_b_interval.line_anchor == fitz.Point( 3, 0.5 - ), "The 'line anchor' for a LayerInterval should be the midpoint of the right-hand-side of the end rect." + ), "The 'line anchor' for an AToBInterval should be the midpoint of the right-hand-side of the end rect." def test_background_rect(): # noqa: D103 - """Test the background_rect property of the BoundaryInterval class.""" + """Test the background_rect property of the AAboveBInterval class.""" start = DepthColumnEntry(fitz.Rect(0, 0, 1, 1), 5) end = DepthColumnEntry(fitz.Rect(0, 2, 1, 3), 10) - boundary_interval = AAboveBInterval(start, end) - assert boundary_interval.background_rect == fitz.Rect( + a_above_b_interval = AAboveBInterval(start, end) + assert a_above_b_interval.background_rect == fitz.Rect( start.rect.x0, start.rect.y1, start.rect.x1, end.rect.y0 ), "The background rect should be (0, 1, 1, 2)" - assert boundary_interval.background_rect == fitz.Rect(0, 1, 1, 2), "The background rect should be (0, 1, 1, 2)" + assert a_above_b_interval.background_rect == fitz.Rect(0, 1, 1, 2), "The background rect should be (0, 1, 1, 2)" -# TODO: add tests for BoundaryInterval.matching_blocks +# TODO: add tests for AAboveBInterval.matching_blocks From 78110b2a1324801a488ad9b7fb992694bd192613 Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 13:14:27 +0100 Subject: [PATCH 3/8] LGVISIUM-102: fix AToBIntervalExtractor method call --- src/stratigraphy/sidebar/layer_identifier_sidebar.py | 4 ++-- src/stratigraphy/util/a_to_b_interval_extractor.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stratigraphy/sidebar/layer_identifier_sidebar.py b/src/stratigraphy/sidebar/layer_identifier_sidebar.py index e2467cf..d33b2f0 100644 --- a/src/stratigraphy/sidebar/layer_identifier_sidebar.py +++ b/src/stratigraphy/sidebar/layer_identifier_sidebar.py @@ -8,7 +8,7 @@ from stratigraphy.text.textblock import TextBlock from stratigraphy.util.dataclasses import Line -from ..util.interval import AToBInterval +from ..util.a_to_b_interval_extractor import AToBIntervalExtractor from .interval_block_group import IntervalBlockGroup from .sidebar import Sidebar @@ -70,7 +70,7 @@ def identify_groups( result = [] for block in blocks: depth_intervals = [] - depth_interval = AToBInterval.get_depth_interval_from_lines(block.lines) + depth_interval = AToBIntervalExtractor.from_lines(block.lines) if depth_interval: depth_intervals.append(depth_interval) result.append(IntervalBlockGroup(depth_intervals=depth_intervals, blocks=[block])) diff --git a/src/stratigraphy/util/a_to_b_interval_extractor.py b/src/stratigraphy/util/a_to_b_interval_extractor.py index 445b3e1..a2cf05d 100644 --- a/src/stratigraphy/util/a_to_b_interval_extractor.py +++ b/src/stratigraphy/util/a_to_b_interval_extractor.py @@ -14,7 +14,7 @@ class AToBIntervalExtractor: """Methods for finding AToBInterval instances (e.g. "0.5m - 1.8m") in a text.""" @classmethod - def get_depth_interval_from_lines(cls, lines: list[TextLine]) -> AToBInterval | None: + def from_lines(cls, lines: list[TextLine]) -> AToBInterval | None: """Extract depth interval from text lines. For borehole profiles in the Deriaz layout, the depth interval is usually found in the text of the material From ec693c107a2b5966df52f4aac9854f5c3108777b Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 13:16:08 +0100 Subject: [PATCH 4/8] LGVISIUM-102: fix AToBInterval method call --- src/stratigraphy/sidebar/a_to_b_sidebar.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/stratigraphy/sidebar/a_to_b_sidebar.py b/src/stratigraphy/sidebar/a_to_b_sidebar.py index 995cfe7..5586268 100644 --- a/src/stratigraphy/sidebar/a_to_b_sidebar.py +++ b/src/stratigraphy/sidebar/a_to_b_sidebar.py @@ -35,9 +35,6 @@ def __repr__(self): """ return "AToBSidebar({})".format(", ".join([str(entry) for entry in self.entries])) - def depth_intervals(self) -> list[AToBInterval]: - return [AToBInterval(entry) for entry in self.entries] - def break_on_mismatch(self) -> list[AToBSidebar]: """Breaks the sidebar into segments where the depths are not in an arithmetic progression. @@ -95,17 +92,15 @@ def identify_groups( Returns: list[IntervalBlockGroup]: A list of groups, where each group is a IntervalBlockGroup. """ - depth_intervals = self.depth_intervals() - groups = [] line_index = 0 - for interval_index, interval in enumerate(depth_intervals): + for interval_index, interval in enumerate(self.entries): # don't allow a layer above depth 0 if interval.start is None and interval.end.value == 0: continue - next_interval = depth_intervals[interval_index + 1] if interval_index + 1 < len(depth_intervals) else None + next_interval = self.entries[interval_index + 1] if interval_index + 1 < len(self.entries) else None matched_blocks = interval.matching_blocks(description_lines, line_index, next_interval) line_index += sum([len(block.lines) for block in matched_blocks]) From d4d92e4e65a41b0a2e170adf6cf954a8a0e5ce58 Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 13:26:05 +0100 Subject: [PATCH 5/8] LGVISIUM-102: group DepthColumnEntry and Interval into a single "depth" package --- .../{util => depth}/a_to_b_interval_extractor.py | 7 ++++--- .../{depthcolumnentry => depth}/depthcolumnentry.py | 0 .../depthcolumnentry_extractor.py | 7 ++++--- src/stratigraphy/{util => depth}/interval.py | 3 ++- src/stratigraphy/{depthcolumnentry => depth}/util.py | 0 src/stratigraphy/evaluation/utility.py | 2 +- src/stratigraphy/extract.py | 2 +- src/stratigraphy/layer/layer.py | 3 +-- src/stratigraphy/sidebar/a_above_b_sidebar.py | 3 +-- src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py | 7 ++++--- src/stratigraphy/sidebar/a_above_b_sidebar_validator.py | 2 +- src/stratigraphy/sidebar/a_to_b_sidebar.py | 2 +- src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py | 3 +-- src/stratigraphy/sidebar/interval_block_group.py | 2 +- src/stratigraphy/sidebar/layer_identifier_sidebar.py | 2 +- .../sidebar/layer_identifier_sidebar_extractor.py | 4 ++-- src/stratigraphy/sidebar/sidebar.py | 2 +- tests/test_depthcolumn.py | 2 +- tests/test_find_sidebar.py | 2 +- tests/test_interval.py | 3 +-- 20 files changed, 29 insertions(+), 29 deletions(-) rename src/stratigraphy/{util => depth}/a_to_b_interval_extractor.py (95%) rename src/stratigraphy/{depthcolumnentry => depth}/depthcolumnentry.py (100%) rename src/stratigraphy/{depthcolumnentry => depth}/depthcolumnentry_extractor.py (90%) rename src/stratigraphy/{util => depth}/interval.py (99%) rename src/stratigraphy/{depthcolumnentry => depth}/util.py (100%) diff --git a/src/stratigraphy/util/a_to_b_interval_extractor.py b/src/stratigraphy/depth/a_to_b_interval_extractor.py similarity index 95% rename from src/stratigraphy/util/a_to_b_interval_extractor.py rename to src/stratigraphy/depth/a_to_b_interval_extractor.py index a2cf05d..0acee46 100644 --- a/src/stratigraphy/util/a_to_b_interval_extractor.py +++ b/src/stratigraphy/depth/a_to_b_interval_extractor.py @@ -4,10 +4,11 @@ import fitz -from stratigraphy.depthcolumnentry import DepthColumnEntry -from stratigraphy.depthcolumnentry.util import value_as_float +from stratigraphy.depth import DepthColumnEntry +from stratigraphy.depth.util import value_as_float from stratigraphy.lines.line import TextLine -from stratigraphy.util.interval import AToBInterval + +from .interval import AToBInterval class AToBIntervalExtractor: diff --git a/src/stratigraphy/depthcolumnentry/depthcolumnentry.py b/src/stratigraphy/depth/depthcolumnentry.py similarity index 100% rename from src/stratigraphy/depthcolumnentry/depthcolumnentry.py rename to src/stratigraphy/depth/depthcolumnentry.py diff --git a/src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py b/src/stratigraphy/depth/depthcolumnentry_extractor.py similarity index 90% rename from src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py rename to src/stratigraphy/depth/depthcolumnentry_extractor.py index c9d8ebf..757b579 100644 --- a/src/stratigraphy/depthcolumnentry/depthcolumnentry_extractor.py +++ b/src/stratigraphy/depth/depthcolumnentry_extractor.py @@ -2,10 +2,11 @@ import re -from stratigraphy.depthcolumnentry import DepthColumnEntry -from stratigraphy.depthcolumnentry.util import value_as_float +from stratigraphy.depth import DepthColumnEntry +from stratigraphy.depth.util import value_as_float from stratigraphy.lines.line import TextWord -from stratigraphy.util.a_to_b_interval_extractor import AToBIntervalExtractor + +from .a_to_b_interval_extractor import AToBIntervalExtractor class DepthColumnEntryExtractor: diff --git a/src/stratigraphy/util/interval.py b/src/stratigraphy/depth/interval.py similarity index 99% rename from src/stratigraphy/util/interval.py rename to src/stratigraphy/depth/interval.py index 3aebe0a..7c7e7e9 100644 --- a/src/stratigraphy/util/interval.py +++ b/src/stratigraphy/depth/interval.py @@ -6,10 +6,11 @@ import fitz -from stratigraphy.depthcolumnentry import DepthColumnEntry from stratigraphy.lines.line import TextLine from stratigraphy.text.textblock import TextBlock +from .depthcolumnentry import DepthColumnEntry + class Interval(metaclass=abc.ABCMeta): """Abstract class for (depth) intervals.""" diff --git a/src/stratigraphy/depthcolumnentry/util.py b/src/stratigraphy/depth/util.py similarity index 100% rename from src/stratigraphy/depthcolumnentry/util.py rename to src/stratigraphy/depth/util.py diff --git a/src/stratigraphy/evaluation/utility.py b/src/stratigraphy/evaluation/utility.py index 5862df5..7fda85d 100644 --- a/src/stratigraphy/evaluation/utility.py +++ b/src/stratigraphy/evaluation/utility.py @@ -2,8 +2,8 @@ from collections import Counter +from stratigraphy.depth import Interval from stratigraphy.evaluation.evaluation_dataclasses import Metrics -from stratigraphy.util.interval import Interval def count_against_ground_truth(values: list[str], ground_truth: list[str]) -> Metrics: diff --git a/src/stratigraphy/extract.py b/src/stratigraphy/extract.py index d8beeea..7c17460 100644 --- a/src/stratigraphy/extract.py +++ b/src/stratigraphy/extract.py @@ -6,6 +6,7 @@ import fitz from stratigraphy.data_extractor.data_extractor import FeatureOnPage +from stratigraphy.depth import AAboveBInterval, Interval from stratigraphy.depths_materials_column_pairs.bounding_boxes import BoundingBox, BoundingBoxes from stratigraphy.depths_materials_column_pairs.material_description_rect_with_sidebar import ( MaterialDescriptionRectWithSidebar, @@ -24,7 +25,6 @@ ) from stratigraphy.text.textblock import MaterialDescription, MaterialDescriptionLine, TextBlock, block_distance from stratigraphy.util.dataclasses import Line -from stratigraphy.util.interval import AAboveBInterval, Interval from stratigraphy.util.util import ( x_overlap, x_overlap_significant_smallest, diff --git a/src/stratigraphy/layer/layer.py b/src/stratigraphy/layer/layer.py index 58a798a..4d7cc9e 100644 --- a/src/stratigraphy/layer/layer.py +++ b/src/stratigraphy/layer/layer.py @@ -4,9 +4,8 @@ import fitz from stratigraphy.data_extractor.data_extractor import ExtractedFeature, FeatureOnPage -from stratigraphy.depthcolumnentry import DepthColumnEntry +from stratigraphy.depth import AAboveBInterval, DepthColumnEntry, Interval from stratigraphy.text.textblock import MaterialDescription, TextBlock -from stratigraphy.util.interval import AAboveBInterval, Interval from stratigraphy.util.util import parse_text diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar.py b/src/stratigraphy/sidebar/a_above_b_sidebar.py index be9f127..d808275 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar.py @@ -7,11 +7,10 @@ import fitz import numpy as np -from stratigraphy.depthcolumnentry import DepthColumnEntry +from stratigraphy.depth import AAboveBInterval, DepthColumnEntry from stratigraphy.lines.line import TextLine from stratigraphy.text.find_description import get_description_blocks from stratigraphy.util.dataclasses import Line -from stratigraphy.util.interval import AAboveBInterval from .interval_block_group import IntervalBlockGroup from .sidebar import Sidebar diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py b/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py index 5ce9131..c63c480 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py @@ -2,10 +2,11 @@ import fitz -from stratigraphy.depthcolumnentry import DepthColumnEntryExtractor +from stratigraphy.depth import DepthColumnEntryExtractor from stratigraphy.lines.line import TextWord -from stratigraphy.sidebar.a_above_b_sidebar import AAboveBSidebar -from stratigraphy.sidebar.a_above_b_sidebar_validator import AAboveBSidebarValidator + +from .a_above_b_sidebar import AAboveBSidebar +from .a_above_b_sidebar_validator import AAboveBSidebarValidator class AAboveBSidebarExtractor: diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py b/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py index ed65cc4..7ca4ca3 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py @@ -2,7 +2,7 @@ import dataclasses -from stratigraphy.depthcolumnentry import DepthColumnEntry +from stratigraphy.depth import DepthColumnEntry from stratigraphy.lines.line import TextWord from .a_above_b_sidebar import AAboveBSidebar diff --git a/src/stratigraphy/sidebar/a_to_b_sidebar.py b/src/stratigraphy/sidebar/a_to_b_sidebar.py index 5586268..ec5334b 100644 --- a/src/stratigraphy/sidebar/a_to_b_sidebar.py +++ b/src/stratigraphy/sidebar/a_to_b_sidebar.py @@ -6,9 +6,9 @@ import fitz +from stratigraphy.depth import AToBInterval from stratigraphy.lines.line import TextLine from stratigraphy.util.dataclasses import Line -from stratigraphy.util.interval import AToBInterval from .interval_block_group import IntervalBlockGroup from .sidebar import Sidebar diff --git a/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py b/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py index c972ae8..2148a48 100644 --- a/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py +++ b/src/stratigraphy/sidebar/a_to_b_sidebar_extractor.py @@ -2,10 +2,9 @@ import re -from stratigraphy.depthcolumnentry import DepthColumnEntry, DepthColumnEntryExtractor +from stratigraphy.depth import AToBInterval, DepthColumnEntry, DepthColumnEntryExtractor from stratigraphy.lines.line import TextWord from stratigraphy.sidebar import AToBSidebar -from stratigraphy.util.interval import AToBInterval class AToBSidebarExtractor: diff --git a/src/stratigraphy/sidebar/interval_block_group.py b/src/stratigraphy/sidebar/interval_block_group.py index 6911519..c3766b6 100644 --- a/src/stratigraphy/sidebar/interval_block_group.py +++ b/src/stratigraphy/sidebar/interval_block_group.py @@ -2,8 +2,8 @@ from dataclasses import dataclass +from stratigraphy.depth import Interval from stratigraphy.text.textblock import TextBlock -from stratigraphy.util.interval import Interval @dataclass diff --git a/src/stratigraphy/sidebar/layer_identifier_sidebar.py b/src/stratigraphy/sidebar/layer_identifier_sidebar.py index d33b2f0..6f04571 100644 --- a/src/stratigraphy/sidebar/layer_identifier_sidebar.py +++ b/src/stratigraphy/sidebar/layer_identifier_sidebar.py @@ -4,11 +4,11 @@ import fitz +from stratigraphy.depth import AToBIntervalExtractor from stratigraphy.lines.line import TextLine from stratigraphy.text.textblock import TextBlock from stratigraphy.util.dataclasses import Line -from ..util.a_to_b_interval_extractor import AToBIntervalExtractor from .interval_block_group import IntervalBlockGroup from .sidebar import Sidebar diff --git a/src/stratigraphy/sidebar/layer_identifier_sidebar_extractor.py b/src/stratigraphy/sidebar/layer_identifier_sidebar_extractor.py index d880b46..63c2dab 100644 --- a/src/stratigraphy/sidebar/layer_identifier_sidebar_extractor.py +++ b/src/stratigraphy/sidebar/layer_identifier_sidebar_extractor.py @@ -3,8 +3,8 @@ import re from stratigraphy.lines.line import TextLine -from stratigraphy.sidebar import LayerIdentifierSidebar -from stratigraphy.sidebar.layer_identifier_sidebar import LayerIdentifierEntry + +from .layer_identifier_sidebar import LayerIdentifierEntry, LayerIdentifierSidebar class LayerIdentifierSidebarExtractor: diff --git a/src/stratigraphy/sidebar/sidebar.py b/src/stratigraphy/sidebar/sidebar.py index 0b0f256..fcedaf8 100644 --- a/src/stratigraphy/sidebar/sidebar.py +++ b/src/stratigraphy/sidebar/sidebar.py @@ -8,7 +8,7 @@ import fitz -from stratigraphy.depthcolumnentry import DepthColumnEntry +from stratigraphy.depth import DepthColumnEntry from stratigraphy.lines.line import TextLine, TextWord from stratigraphy.sidebar.interval_block_group import IntervalBlockGroup from stratigraphy.util.dataclasses import Line diff --git a/tests/test_depthcolumn.py b/tests/test_depthcolumn.py index 099946c..435dfe0 100644 --- a/tests/test_depthcolumn.py +++ b/tests/test_depthcolumn.py @@ -1,7 +1,7 @@ """Test suite for the find_depth_columns module.""" import fitz -from stratigraphy.depthcolumnentry.depthcolumnentry import DepthColumnEntry +from stratigraphy.depth.depthcolumnentry import DepthColumnEntry from stratigraphy.sidebar import AAboveBSidebar diff --git a/tests/test_find_sidebar.py b/tests/test_find_sidebar.py index c7b2ff7..1b76469 100644 --- a/tests/test_find_sidebar.py +++ b/tests/test_find_sidebar.py @@ -2,7 +2,7 @@ import fitz import pytest -from stratigraphy.depthcolumnentry import DepthColumnEntryExtractor +from stratigraphy.depth import DepthColumnEntryExtractor from stratigraphy.lines.line import TextWord from stratigraphy.sidebar import AAboveBSidebarExtractor, AToBSidebarExtractor diff --git a/tests/test_interval.py b/tests/test_interval.py index 2c27ad0..fc55113 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -1,8 +1,7 @@ """Test suite for the interval module.""" import fitz -from stratigraphy.depthcolumnentry.depthcolumnentry import DepthColumnEntry -from stratigraphy.util.interval import AAboveBInterval, AToBInterval +from stratigraphy.depth import AAboveBInterval, AToBInterval, DepthColumnEntry def test_line_anchor(): # noqa: D103 From 8348daebd19e6548b39fca5817714fd364985728 Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 13:27:07 +0100 Subject: [PATCH 6/8] LGVISIUM-102: group DepthColumnEntry and Interval into a single "depth" package --- src/stratigraphy/depth/__init__.py | 15 +++++++++++++++ src/stratigraphy/depthcolumnentry/__init__.py | 6 ------ 2 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 src/stratigraphy/depth/__init__.py delete mode 100644 src/stratigraphy/depthcolumnentry/__init__.py diff --git a/src/stratigraphy/depth/__init__.py b/src/stratigraphy/depth/__init__.py new file mode 100644 index 0000000..e9a3cf9 --- /dev/null +++ b/src/stratigraphy/depth/__init__.py @@ -0,0 +1,15 @@ +"""Modules for extracting values indicating some measured depth below the surface.""" + +from .a_to_b_interval_extractor import AToBIntervalExtractor +from .depthcolumnentry import DepthColumnEntry +from .depthcolumnentry_extractor import DepthColumnEntryExtractor +from .interval import AAboveBInterval, AToBInterval, Interval + +__all__ = [ + "AAboveBInterval", + "AToBInterval", + "AToBIntervalExtractor", + "DepthColumnEntry", + "DepthColumnEntryExtractor", + "Interval", +] diff --git a/src/stratigraphy/depthcolumnentry/__init__.py b/src/stratigraphy/depthcolumnentry/__init__.py deleted file mode 100644 index 6bba7e8..0000000 --- a/src/stratigraphy/depthcolumnentry/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Modules for depth column entries, values that indicate the measured depth of an interface between layers.""" - -from .depthcolumnentry import DepthColumnEntry -from .depthcolumnentry_extractor import DepthColumnEntryExtractor - -__all__ = ["DepthColumnEntry", "DepthColumnEntryExtractor"] From fb2affc8cb3b4aca4b6ce01a8bd36cc09655ef34 Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Wed, 20 Nov 2024 13:36:37 +0100 Subject: [PATCH 7/8] LGVISIUM-102: fix import --- src/stratigraphy/depth/a_to_b_interval_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stratigraphy/depth/a_to_b_interval_extractor.py b/src/stratigraphy/depth/a_to_b_interval_extractor.py index 0acee46..63b57c2 100644 --- a/src/stratigraphy/depth/a_to_b_interval_extractor.py +++ b/src/stratigraphy/depth/a_to_b_interval_extractor.py @@ -4,11 +4,11 @@ import fitz -from stratigraphy.depth import DepthColumnEntry -from stratigraphy.depth.util import value_as_float from stratigraphy.lines.line import TextLine +from .depthcolumnentry import DepthColumnEntry from .interval import AToBInterval +from .util import value_as_float class AToBIntervalExtractor: From 8ff58a426b8672dfe9b89306e17c7314f4f19d1a Mon Sep 17 00:00:00 2001 From: Stijn Vermeeren Date: Fri, 22 Nov 2024 10:07:38 +0100 Subject: [PATCH 8/8] LGVISIUM-102: rename AToBIntervalExtractor.from_material_description_lines() + add example to docs --- src/stratigraphy/depth/a_to_b_interval_extractor.py | 13 +++++++++++-- .../sidebar/layer_identifier_sidebar.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/stratigraphy/depth/a_to_b_interval_extractor.py b/src/stratigraphy/depth/a_to_b_interval_extractor.py index 63b57c2..08d3de7 100644 --- a/src/stratigraphy/depth/a_to_b_interval_extractor.py +++ b/src/stratigraphy/depth/a_to_b_interval_extractor.py @@ -15,14 +15,23 @@ class AToBIntervalExtractor: """Methods for finding AToBInterval instances (e.g. "0.5m - 1.8m") in a text.""" @classmethod - def from_lines(cls, lines: list[TextLine]) -> AToBInterval | None: - """Extract depth interval from text lines. + def from_material_description_lines(cls, lines: list[TextLine]) -> AToBInterval | None: + """Extract depth interval from text lines from a material description. For borehole profiles in the Deriaz layout, the depth interval is usually found in the text of the material description. Often, these text descriptions contain a further separation into multiple sub layers. These sub layers have their own depth intervals. This function extracts the overall depth interval, spanning across all mentioned sub layers. + For example (from GeoQuat 12306): + 1) REMBLAIS HETEROGENES + 0.00 - 0.08 m : Revêtement bitumineux + 0.08- 0.30 m : Grave d'infrastructure + 0.30 - 1.40 m : Grave dans importante matrice de sable + moyen, brun beige, pulvérulent. + From this material description, this method will extract a single depth interval that starts at 0m and ends + at 1.40m. + Args: lines (list[TextLine]): The lines to extract the depth interval from. diff --git a/src/stratigraphy/sidebar/layer_identifier_sidebar.py b/src/stratigraphy/sidebar/layer_identifier_sidebar.py index 6f04571..4ac4dc0 100644 --- a/src/stratigraphy/sidebar/layer_identifier_sidebar.py +++ b/src/stratigraphy/sidebar/layer_identifier_sidebar.py @@ -70,7 +70,7 @@ def identify_groups( result = [] for block in blocks: depth_intervals = [] - depth_interval = AToBIntervalExtractor.from_lines(block.lines) + depth_interval = AToBIntervalExtractor.from_material_description_lines(block.lines) if depth_interval: depth_intervals.append(depth_interval) result.append(IntervalBlockGroup(depth_intervals=depth_intervals, blocks=[block]))