Skip to content

Commit

Permalink
feat: trim flanking whitespace when reading a metric
Browse files Browse the repository at this point in the history
Fixes #216

Also, strips any flanking whitespace when formatting a float into
a string.
  • Loading branch information
nh13 committed Jan 28, 2025
1 parent f1f89e9 commit de3b0c7
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 3 deletions.
11 changes: 8 additions & 3 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
return {}

@classmethod
def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
def read(
cls, path: Path, ignore_extra_fields: bool = True, strip_whitespace: bool = True
) -> Iterator[Any]:
"""Reads in zero or more metrics from the given path.
The metric file must contain a matching header.
Expand All @@ -221,6 +223,7 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
Args:
path: the path to the metrics file.
ignore_extra_fields: True to ignore any extra columns, False to raise an exception.
strip_whitespace: True to strip leading and trailing whitespace, False to keep as-is.
"""
parsers = cls._parsers()
with io.to_reader(path) as reader:
Expand Down Expand Up @@ -263,6 +266,8 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
for lineno, line in enumerate(reader, 2):
# parse the raw values
values: List[str] = line.rstrip("\r\n").split("\t")
if strip_whitespace:
values = [v.strip() for v in values]

# raise an exception if there aren't the same number of values as the header
if len(header) != len(values):
Expand Down Expand Up @@ -353,11 +358,11 @@ def format_value(cls, value: Any) -> str: # noqa: C901
+ "}"
)
elif isinstance(value, float):
return str(round(value, 5))
return f"{round(value, 5)}".strip()
elif value is None:
return ""
else:
return str(value)
return f"{value}"

@classmethod
def to_list(cls, value: str) -> List[Any]:
Expand Down
32 changes: 32 additions & 0 deletions tests/fgpyo/util/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ class PersonDefault(Metric["PersonDefault"]):
name: str
age: int = 0

@make_dataclass(use_attr=use_attr)
class PersonAgeFloat(Metric["PersonAgeFloat"]):
name: Optional[str]
age: Optional[float]

@make_dataclass(use_attr=use_attr)
class ListPerson(Metric["ListPerson"]):
name: List[Optional[str]]
Expand Down Expand Up @@ -403,6 +408,33 @@ def test_metric_values(data_and_classes: DataBuilder) -> None:
assert list(data_and_classes.Person(name="name", age=42).values()) == ["name", 42]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_round_floats(data_and_classes: DataBuilder) -> None:
assert list(data_and_classes.Person(name="John Doe", age=42.123456).formatted_values()) == [
"John Doe",
"42.12346",
]


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_strips_trailing_whitespace(tmp_path: Path, data_and_classes: DataBuilder) -> None:
test_tsv = tmp_path / "test.tsv"
with test_tsv.open("w") as fout:
fout.write("name\tage\n")
fout.write(" John Doe \t42\n") # whitespace around name
fout.write("Jane Doe\t 35 \n") # whitespace around age
fout.write(" Someone Else \t 47 \n") # whitespace around both

persons = list(data_and_classes.Person.read(test_tsv))
assert len(persons) == 3
assert persons[0].name == "John Doe"
assert persons[0].age == 42
assert persons[1].name == "Jane Doe"
assert persons[1].age == 35
assert persons[2].name == "Someone Else"
assert persons[2].age == 47


@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
def test_metric_items(data_and_classes: DataBuilder) -> None:
"""`metric.items()` should return a list of (key, value) tuples."""
Expand Down

0 comments on commit de3b0c7

Please sign in to comment.