diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py
index 25a2004b..3c480c30 100644
--- a/polaris/benchmark/_base.py
+++ b/polaris/benchmark/_base.py
@@ -171,7 +171,7 @@ def _convert(m: str | dict | Metric) -> Metric:
         unique_names = {m.name for m in unique_metrics}
         if len(unique_names) != len(unique_metrics):
             raise InvalidBenchmarkError(
-                "The benchmark has similarly named metrics. Specify a custom name with Metric(custom_name=...)"
+                "The metrics of a benchmark need to have unique names. Specify a custom name with Metric(custom_name=...)"
             )
 
         return unique_metrics
diff --git a/polaris/evaluate/_metric.py b/polaris/evaluate/_metric.py
index b958ee48..6b97348a 100644
--- a/polaris/evaluate/_metric.py
+++ b/polaris/evaluate/_metric.py
@@ -285,7 +285,7 @@ class Metric(BaseModel):
 
     label: MetricLabel
     config: GroupedMetricConfig | None = None
-    custom_name: str | None = Field(None, exclude=True)
+    custom_name: str | None = Field(None, exclude=True, alias="name")
 
     # Frozen metadata
     fn: Callable = Field(frozen=True, exclude=True)
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index dfc3b4e7..9cdf6398 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -233,7 +233,7 @@ def test_benchmark_duplicate_metrics(test_single_task_benchmark):
         m["main_metric"] = m["metrics"][0]
         SingleTaskBenchmarkSpecification(**m)
 
-    with pytest.raises(ValidationError, match="The benchmark has similarly named metrics"):
+    with pytest.raises(ValidationError, match="The metrics of a benchmark need to have unique names."):
         m["metrics"][0].config.group_by = "MULTICLASS_calc"
         SingleTaskBenchmarkSpecification(**m)