Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update aggregate_metrics_df to work with None values #522

Merged
merged 12 commits into from
Dec 17, 2024
12 changes: 6 additions & 6 deletions etna/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ def func(x):
return func


def size_agg():
"""Size for pandas agg."""
def notna_size_agg():
"""Size of not-na elements for pandas agg."""

def func(x):
return len(x) - pd.isna(x.values).sum()

func.__name__ = "size"
func.__name__ = "notna_size"
return func


Expand All @@ -113,14 +113,14 @@ def func(x):


MetricAggregationStatistics = Literal[
"median", "mean", "std", "size", "percentile_5", "percentile_25", "percentile_75", "percentile_95"
"median", "mean", "std", "notna_size", "percentile_5", "percentile_25", "percentile_75", "percentile_95"
]

METRICS_AGGREGATION_MAP: Dict[MetricAggregationStatistics, Union[str, Callable]] = {
"median": mean_agg(),
"mean": median_agg(),
"std": std_agg(),
"size": size_agg(),
"notna_size": notna_size_agg(),
"percentile_5": percentile(5),
"percentile_25": percentile(25),
"percentile_75": percentile(75),
Expand All @@ -140,7 +140,7 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, Optional[float]]
if "fold_number" in metrics_df.columns:
metrics_dict = (
metrics_df.groupby("segment")
.apply(lambda x: x.mean(skipna=False, numeric_only=True))
.mean(numeric_only=False)
.reset_index()
.drop(["segment", "fold_number"], axis=1)
.apply(list(METRICS_AGGREGATION_MAP.values()))
Expand Down
5 changes: 1 addition & 4 deletions etna/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,10 +857,7 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame

if aggregate_metrics:
metrics_df = (
metrics_df.groupby("segment")
.apply(lambda x: x.mean(skipna=False, numeric_only=True))
.reset_index()
.drop(self._fold_column, axis=1)
metrics_df.groupby("segment").mean(numeric_only=False).reset_index().drop(self._fold_column, axis=1)
)

return metrics_df
Expand Down
19 changes: 19 additions & 0 deletions tests/test_auto/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,25 @@ def ts_with_fold_missing_middle(random_seed) -> TSDataset:
return tsds


@pytest.fixture
def ts_with_all_folds_missing_one_segment(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
df1["target"] = np.random.uniform(10, 20, size=periods)
df1.loc[df1.index[-21:], "target"] = np.NaN

df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(-15, 5, size=periods)

df = pd.concat([df1, df2]).reset_index(drop=True)
df = TSDataset.to_dataset(df)
tsds = TSDataset(df, freq="D")

return tsds


@pytest.fixture
def ts_with_few_missing(random_seed) -> TSDataset:
periods = 100
Expand Down
42 changes: 27 additions & 15 deletions tests/test_auto/test_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from etna.auto.auto import _Callback
from etna.auto.auto import _Initializer
from etna.metrics import MAE
from etna.metrics import MSE
from etna.models import LinearPerSegmentModel
from etna.models import MovingAverageModel
from etna.models import NaiveModel
Expand Down Expand Up @@ -48,23 +47,35 @@ def pool_list():
]


@pytest.mark.parametrize(
"ts_name",
[
"example_tsds",
"ts_with_few_missing",
"ts_with_fold_missing_tail",
"ts_with_fold_missing_middle",
],
)
def test_objective(
example_tsds,
target_metric=MAE(),
ts_name,
request,
target_metric=MAE(missing_mode="ignore"),
metric_aggregation: Literal["mean"] = "mean",
metrics=[MAE()],
metrics=[MAE(missing_mode="ignore")],
backtest_params={},
initializer=MagicMock(spec=_Initializer),
callback=MagicMock(spec=_Callback),
relative_params={
"_target_": "etna.pipeline.Pipeline",
"horizon": 7,
"model": {"_target_": "etna.models.NaiveModel", "lag": 1},
"transforms": [{"_target_": "etna.transforms.TimeSeriesImputerTransform"}],
},
):
ts = request.getfixturevalue(ts_name)
initializer = MagicMock(spec=_Initializer)
callback = MagicMock(spec=_Callback)
trial = MagicMock(relative_params=relative_params)
_objective = Auto.objective(
ts=example_tsds,
ts=ts,
target_metric=target_metric,
metric_aggregation=metric_aggregation,
metrics=metrics,
Expand All @@ -79,13 +90,13 @@ def test_objective(
callback.assert_called_once()


@pytest.mark.parametrize("ts_name", ["ts_with_fold_missing_tail", "ts_with_fold_missing_middle"])
@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_one_segment"])
def test_objective_fail_none(
ts_name,
request,
target_metric=MSE(missing_mode="ignore"),
target_metric=MAE(missing_mode="ignore"),
metric_aggregation: Literal["mean"] = "mean",
metrics=[MSE(missing_mode="ignore")],
metrics=[MAE(missing_mode="ignore")],
backtest_params={},
initializer=MagicMock(spec=_Initializer),
callback=MagicMock(spec=_Callback),
Expand All @@ -108,7 +119,8 @@ def test_objective_fail_none(
callback=callback,
)

with pytest.raises(ValueError, match="Metric value is None"):
# TODO: discuss the error here
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should discuss this: it fails before we go into metrics computation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like we should support this behavior now. We can leave it as is for this task and create a separate issue to change this. What do you think?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or we can try to mock here the part that blocks us from testing intended behavior.

with pytest.raises(ValueError, match="Last train timestamp should be not later"):
_ = _objective(trial)


Expand Down Expand Up @@ -188,8 +200,8 @@ def test_fit_without_tuning_list(ts_name, optuna_storage, pool, request):
ts = request.getfixturevalue(ts_name)
pool = request.getfixturevalue(pool)
auto = Auto(
MSE(missing_mode="ignore"),
metrics=[MSE(missing_mode="ignore")],
MAE(missing_mode="ignore"),
metrics=[MAE(missing_mode="ignore")],
pool=pool,
metric_aggregation="median",
horizon=7,
Expand Down Expand Up @@ -228,8 +240,8 @@ def test_fit_with_tuning(
):
ts = request.getfixturevalue(ts_name)
auto = Auto(
MSE(missing_mode="ignore"),
metrics=[MSE(missing_mode="ignore")],
MAE(missing_mode="ignore"),
metrics=[MAE(missing_mode="ignore")],
pool=pool,
metric_aggregation="median",
horizon=7,
Expand Down
40 changes: 26 additions & 14 deletions tests/test_auto/test_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from etna.distributions import FloatDistribution
from etna.distributions import IntDistribution
from etna.metrics import MAE
from etna.metrics import MSE
from etna.models import NaiveModel
from etna.models import SimpleExpSmoothingModel
from etna.pipeline import AutoRegressivePipeline
Expand All @@ -25,20 +24,31 @@
from etna.transforms import TimeSeriesImputerTransform


@pytest.mark.parametrize(
"ts_name",
[
"example_tsds",
"ts_with_few_missing",
"ts_with_fold_missing_tail",
"ts_with_fold_missing_middle",
],
)
def test_objective(
example_tsds,
target_metric=MAE(),
ts_name,
request,
target_metric=MAE(missing_mode="ignore"),
metric_aggregation: Literal["mean"] = "mean",
metrics=[MAE()],
metrics=[MAE(missing_mode="ignore")],
backtest_params={},
initializer=MagicMock(spec=_Initializer),
callback=MagicMock(spec=_Callback),
pipeline=Pipeline(NaiveModel()),
pipeline=Pipeline(model=NaiveModel(), transforms=[TimeSeriesImputerTransform()], horizon=7),
params_to_tune={},
):
ts = request.getfixturevalue(ts_name)
initializer = MagicMock(spec=_Initializer)
callback = MagicMock(spec=_Callback)
trial = MagicMock()
_objective = Tune.objective(
ts=example_tsds,
ts=ts,
pipeline=pipeline,
params_to_tune=params_to_tune,
target_metric=target_metric,
Expand All @@ -55,13 +65,13 @@ def test_objective(
callback.assert_called_once()


@pytest.mark.parametrize("ts_name", ["ts_with_fold_missing_tail", "ts_with_fold_missing_middle"])
@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_one_segment"])
def test_objective_fail_none(
ts_name,
request,
target_metric=MSE(missing_mode="ignore"),
target_metric=MAE(missing_mode="ignore"),
metric_aggregation: Literal["mean"] = "mean",
metrics=[MSE(missing_mode="ignore")],
metrics=[MAE(missing_mode="ignore")],
backtest_params={},
initializer=MagicMock(spec=_Initializer),
callback=MagicMock(spec=_Callback),
Expand All @@ -81,7 +91,9 @@ def test_objective_fail_none(
initializer=initializer,
callback=callback,
)
with pytest.raises(ValueError, match="Metric value is None"):

# TODO: discuss the error here
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should discuss this: it fails before we go into metrics computation.

with pytest.raises(ValueError, match="Last train timestamp should be not later"):
_ = _objective(trial)


Expand Down Expand Up @@ -215,8 +227,8 @@ def test_tune_run(ts_name, optuna_storage, pipeline, request):
ts = request.getfixturevalue(ts_name)
tune = Tune(
pipeline=pipeline,
target_metric=MSE(missing_mode="ignore"),
metrics=[MSE(missing_mode="ignore")],
target_metric=MAE(missing_mode="ignore"),
metrics=[MAE(missing_mode="ignore")],
metric_aggregation="median",
horizon=7,
storage=optuna_storage,
Expand Down
Loading
Loading