diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py index 25a2004b..3c480c30 100644 --- a/polaris/benchmark/_base.py +++ b/polaris/benchmark/_base.py @@ -171,7 +171,7 @@ def _convert(m: str | dict | Metric) -> Metric: unique_names = {m.name for m in unique_metrics} if len(unique_names) != len(unique_metrics): raise InvalidBenchmarkError( - "The benchmark has similarly named metrics. Specify a custom name with Metric(custom_name=...)" + "The metrics of a benchmark need to have unique names. Specify a custom name with Metric(custom_name=...)" ) return unique_metrics diff --git a/polaris/evaluate/_metric.py b/polaris/evaluate/_metric.py index b958ee48..6b97348a 100644 --- a/polaris/evaluate/_metric.py +++ b/polaris/evaluate/_metric.py @@ -285,7 +285,7 @@ class Metric(BaseModel): label: MetricLabel config: GroupedMetricConfig | None = None - custom_name: str | None = Field(None, exclude=True) + custom_name: str | None = Field(None, exclude=True, alias="name") # Frozen metadata fn: Callable = Field(frozen=True, exclude=True) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index dfc3b4e7..9cdf6398 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -233,7 +233,7 @@ def test_benchmark_duplicate_metrics(test_single_task_benchmark): m["main_metric"] = m["metrics"][0] SingleTaskBenchmarkSpecification(**m) - with pytest.raises(ValidationError, match="The benchmark has similarly named metrics"): + with pytest.raises(ValidationError, match="The metrics of a benchmark need to have unique names."): m["metrics"][0].config.group_by = "MULTICLASS_calc" SingleTaskBenchmarkSpecification(**m)