Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update aggregate_metrics_df to work with None values #522

Merged
merged 12 commits into from
Dec 17, 2024
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523))
- Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524))
-
-
- Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
-
-
-
Expand Down
12 changes: 10 additions & 2 deletions etna/auto/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,11 @@
for metric in aggregated_metrics:
trial.set_user_attr(metric, aggregated_metrics[metric])

return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
result_value = aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
if result_value is None:
raise ValueError("Metric value is None! It should be float for optimization.")

Check warning on line 489 in etna/auto/auto.py

View check run for this annotation

Codecov / codecov/patch

etna/auto/auto.py#L489

Added line #L489 was not covered by tests

return result_value

return _objective

Expand Down Expand Up @@ -809,7 +813,11 @@
for metric in aggregated_metrics:
trial.set_user_attr(metric, aggregated_metrics[metric])

return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
result_value = aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"]
if result_value is None:
raise ValueError("Metric value is None! It should be float for optimization.")

return result_value

return _objective

Expand Down
99 changes: 83 additions & 16 deletions etna/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import Callable
from typing import Dict
from typing import List
Expand Down Expand Up @@ -37,32 +38,97 @@ def compute_metrics(
return metrics_values


def mean_agg():
"""Mean for pandas agg."""

def func(x: pd.Series):
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="Mean of empty slice",
action="ignore",
)
return np.nanmean(a=x.values)

func.__name__ = "mean"
return func


def median_agg():
"""Median for pandas agg."""

def func(x: pd.Series):
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="All-NaN slice encountered",
action="ignore",
)
return np.nanmedian(a=x.values)

func.__name__ = "median"
return func


def std_agg():
"""Std for pandas agg."""

def func(x: pd.Series):
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="Degrees of freedom <= 0",
action="ignore",
)
return np.nanstd(a=x.values)

func.__name__ = "std"
return func


def notna_size_agg():
"""Size of not-na elements for pandas agg."""

def func(x: pd.Series):
return len(x) - pd.isna(x.values).sum()

func.__name__ = "notna_size"
return func


def percentile(n: int):
"""Percentile for pandas agg."""

def percentile_(x):
return np.nanpercentile(a=x.values, q=n)
def func(x: pd.Series):
with warnings.catch_warnings():
# this helps to prevent warning in case of all nans
warnings.filterwarnings(
message="All-NaN slice encountered",
action="ignore",
)
return np.nanpercentile(a=x.values, q=n)

percentile_.__name__ = f"percentile_{n}"
return percentile_
func.__name__ = f"percentile_{n}"
return func


MetricAggregationStatistics = Literal[
"median", "mean", "std", "percentile_5", "percentile_25", "percentile_75", "percentile_95"
"median", "mean", "std", "notna_size", "percentile_5", "percentile_25", "percentile_75", "percentile_95"
]

METRICS_AGGREGATION_MAP: Dict[MetricAggregationStatistics, Union[str, Callable]] = {
"median": "median",
"mean": "mean",
"std": "std",
"median": mean_agg(),
"mean": median_agg(),
"std": std_agg(),
"notna_size": notna_size_agg(),
"percentile_5": percentile(5),
"percentile_25": percentile(25),
"percentile_75": percentile(75),
"percentile_95": percentile(95),
}


def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]:
def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, Optional[float]]:
"""Aggregate metrics in :py:meth:`log_backtest_metrics` method.

Parameters
Expand All @@ -74,7 +140,7 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]:
if "fold_number" in metrics_df.columns:
metrics_dict = (
metrics_df.groupby("segment")
.mean()
.mean(numeric_only=False)
.reset_index()
.drop(["segment", "fold_number"], axis=1)
.apply(list(METRICS_AGGREGATION_MAP.values()))
Expand All @@ -85,10 +151,11 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]:
else:
metrics_dict = metrics_df.drop(["segment"], axis=1).apply(list(METRICS_AGGREGATION_MAP.values())).to_dict()

metrics_dict_wide = {
f"{metrics_key}_{statistics_key}": value
for metrics_key, values in metrics_dict.items()
for statistics_key, value in values.items()
}
cur_dict = {}
for metrics_key, values in metrics_dict.items():
for statistics_key, value in values.items():
new_key = f"{metrics_key}_{statistics_key}"
new_value = value if not pd.isna(value) else None
cur_dict[new_key] = new_value

return metrics_dict_wide
return cur_dict
4 changes: 3 additions & 1 deletion etna/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,9 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame
metrics_df.sort_values(["segment", self._fold_column], inplace=True)

if aggregate_metrics:
metrics_df = metrics_df.groupby("segment").mean().reset_index().drop(self._fold_column, axis=1)
metrics_df = (
metrics_df.groupby("segment").mean(numeric_only=False).reset_index().drop(self._fold_column, axis=1)
)

return metrics_df

Expand Down
102 changes: 102 additions & 0 deletions tests/test_auto/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from os import unlink

import numpy as np
import pandas as pd
import pytest
from optuna.storages import RDBStorage
from optuna.trial import TrialState
from typing_extensions import NamedTuple

from etna.auto.utils import config_hash
from etna.datasets import TSDataset
from etna.models import NaiveModel
from etna.pipeline import Pipeline

Expand Down Expand Up @@ -35,3 +38,102 @@ class Trial(NamedTuple):
fail_trials = [Trial(user_attrs={}, state=TrialState.FAIL)]

return complete_trials + complete_trials[:3] + fail_trials


@pytest.fixture
def ts_with_fold_missing_tail(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
df1["target"] = np.random.uniform(10, 20, size=periods)
df1.loc[df1.index[-7:], "target"] = np.NaN

df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(-15, 5, size=periods)
df2.loc[df2.index[-7:], "target"] = np.NaN

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds


@pytest.fixture
def ts_with_fold_missing_middle(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
df1["target"] = np.random.uniform(10, 20, size=periods)
df1.loc[df1.index[-14:-7], "target"] = np.NaN

df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(-15, 5, size=periods)
df2.loc[df2.index[-14:-7], "target"] = np.NaN

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds


@pytest.fixture
def ts_with_all_folds_missing_one_segment(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
df1["target"] = np.random.uniform(10, 20, size=periods)
df1.loc[df1.index[-40:], "target"] = np.NaN

df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(-15, 5, size=periods)

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds


@pytest.fixture
def ts_with_all_folds_missing_all_segments(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
df1["target"] = np.random.uniform(10, 20, size=periods)
df1.loc[df1.index[-40:], "target"] = np.NaN

df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(-15, 5, size=periods)
df2.loc[df2.index[-40:], "target"] = np.NaN

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds


@pytest.fixture
def ts_with_few_missing(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
df1["target"] = np.random.uniform(10, 20, size=periods)
df1.loc[df1.index[-4:-2], "target"] = np.NaN

df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(-15, 5, size=periods)
df2.loc[df2.index[-12:-10], "target"] = np.NaN

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds
Loading
Loading