From c30430f0e8c144948b81d82bdeddeb71b8b876a6 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Tue, 10 Dec 2024 16:00:37 +0300 Subject: [PATCH 1/9] feature: add task files --- etna/auto/auto.py | 12 +- etna/metrics/utils.py | 99 +++++++++++--- etna/pipeline/base.py | 7 +- tests/test_auto/conftest.py | 63 +++++++++ tests/test_auto/test_auto.py | 76 +++++++++-- tests/test_auto/test_tune.py | 51 ++++++- tests/test_metrics/test_metrics_utils.py | 162 +++++++++++++++++++++++ 7 files changed, 433 insertions(+), 37 deletions(-) diff --git a/etna/auto/auto.py b/etna/auto/auto.py index 47101bc6d..525987f3e 100644 --- a/etna/auto/auto.py +++ b/etna/auto/auto.py @@ -484,7 +484,11 @@ def _objective(trial: Trial) -> float: for metric in aggregated_metrics: trial.set_user_attr(metric, aggregated_metrics[metric]) - return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] + result_value = aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] + if result_value is None: + raise ValueError("Metric value is None! It should be float for optimization.") + + return result_value return _objective @@ -809,7 +813,11 @@ def _objective(trial: Trial) -> float: for metric in aggregated_metrics: trial.set_user_attr(metric, aggregated_metrics[metric]) - return aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] + result_value = aggregated_metrics[f"{target_metric.name}_{metric_aggregation}"] + if result_value is None: + raise ValueError("Metric value is None! It should be float for optimization.") + + return result_value return _objective diff --git a/etna/metrics/utils.py b/etna/metrics/utils.py index 5e31c5d78..0d424b0cb 100644 --- a/etna/metrics/utils.py +++ b/etna/metrics/utils.py @@ -1,3 +1,4 @@ +import warnings from typing import Callable from typing import Dict from typing import List @@ -37,24 +38,89 @@ def compute_metrics( return metrics_values +def mean_agg(): + """Mean for pandas agg.""" + + def func(x): + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="Mean of empty slice", + action="ignore", + ) + return np.nanmean(a=x.values) + + func.__name__ = "mean" + return func + + +def median_agg(): + """Median for pandas agg.""" + + def func(x): + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="All-NaN slice encountered", + action="ignore", + ) + return np.nanmedian(a=x.values) + + func.__name__ = "median" + return func + + +def std_agg(): + """Std for pandas agg.""" + + def func(x): + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="Degrees of freedom <=", + action="ignore", + ) + return np.nanstd(a=x.values) + + func.__name__ = "std" + return func + + +def size_agg(): + """Size for pandas agg.""" + + def func(x): + return len(x) - pd.isna(x.values).sum() + + func.__name__ = "size" + return func + + def percentile(n: int): """Percentile for pandas agg.""" - def percentile_(x): - return np.nanpercentile(a=x.values, q=n) + def func(x): + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="All-NaN slice encountered", + action="ignore", + ) + return np.nanpercentile(a=x.values, q=n) - percentile_.__name__ = f"percentile_{n}" - return percentile_ + func.__name__ = f"percentile_{n}" + return func MetricAggregationStatistics = Literal[ - "median", "mean", "std", "percentile_5", "percentile_25", "percentile_75", "percentile_95" + "median", "mean", "std", "size", "percentile_5", "percentile_25", "percentile_75", "percentile_95" ] METRICS_AGGREGATION_MAP: Dict[MetricAggregationStatistics, Union[str, Callable]] = { - "median": "median", - "mean": "mean", - "std": "std", + "median": mean_agg(), + "mean": median_agg(), + "std": std_agg(), + "size": size_agg(), "percentile_5": percentile(5), "percentile_25": percentile(25), "percentile_75": percentile(75), @@ -62,7 +128,7 @@ def percentile_(x): } -def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]: +def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, Optional[float]]: """Aggregate metrics in :py:meth:`log_backtest_metrics` method. Parameters @@ -74,7 +140,7 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]: if "fold_number" in metrics_df.columns: metrics_dict = ( metrics_df.groupby("segment") - .mean() + .apply(lambda x: x.mean(skipna=False, numeric_only=False)) .reset_index() .drop(["segment", "fold_number"], axis=1) .apply(list(METRICS_AGGREGATION_MAP.values())) @@ -85,10 +151,11 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, float]: else: metrics_dict = metrics_df.drop(["segment"], axis=1).apply(list(METRICS_AGGREGATION_MAP.values())).to_dict() - metrics_dict_wide = { - f"{metrics_key}_{statistics_key}": value - for metrics_key, values in metrics_dict.items() - for statistics_key, value in values.items() - } + cur_dict = {} + for metrics_key, values in metrics_dict.items(): + for statistics_key, value in values.items(): + new_key = f"{metrics_key}_{statistics_key}" + new_value = value if not pd.isna(value) else None + cur_dict[new_key] = new_value - return metrics_dict_wide + return cur_dict diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index d6728ee6c..3166a77ac 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -856,7 +856,12 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame metrics_df.sort_values(["segment", self._fold_column], inplace=True) if aggregate_metrics: - metrics_df = metrics_df.groupby("segment").mean().reset_index().drop(self._fold_column, axis=1) + metrics_df = ( + metrics_df.groupby("segment") + .apply(lambda x: x.mean(skipna=False, numeric_only=False)) + .reset_index() + .drop(self._fold_column, axis=1) + ) return metrics_df diff --git a/tests/test_auto/conftest.py b/tests/test_auto/conftest.py index 18168e8c3..fe6c03433 100644 --- a/tests/test_auto/conftest.py +++ b/tests/test_auto/conftest.py @@ -1,11 +1,14 @@ from os import unlink +import numpy as np +import pandas as pd import pytest from optuna.storages import RDBStorage from optuna.trial import TrialState from typing_extensions import NamedTuple from etna.auto.utils import config_hash +from etna.datasets import TSDataset from etna.models import NaiveModel from etna.pipeline import Pipeline @@ -35,3 +38,63 @@ class Trial(NamedTuple): fail_trials = [Trial(user_attrs={}, state=TrialState.FAIL)] return complete_trials + complete_trials[:3] + fail_trials + + +@pytest.fixture +def ts_with_fold_missing_tail(random_seed) -> TSDataset: + periods = 100 + df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df1["segment"] = "segment_1" + df1["target"] = np.random.uniform(10, 20, size=periods) + df1.loc[df1.index[-7:], "target"] = np.NaN + + df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df2["segment"] = "segment_2" + df2["target"] = np.random.uniform(-15, 5, size=periods) + df2.loc[df2.index[-7:], "target"] = np.NaN + + df = pd.concat([df1, df2]).reset_index(drop=True) + df = TSDataset.to_dataset(df) + tsds = TSDataset(df, freq="D") + + return tsds + + +@pytest.fixture +def ts_with_fold_missing_middle(random_seed) -> TSDataset: + periods = 100 + df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df1["segment"] = "segment_1" + df1["target"] = np.random.uniform(10, 20, size=periods) + df1.loc[df1.index[-14:-7], "target"] = np.NaN + + df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df2["segment"] = "segment_2" + df2["target"] = np.random.uniform(-15, 5, size=periods) + df2.loc[df2.index[-14:-7], "target"] = np.NaN + + df = pd.concat([df1, df2]).reset_index(drop=True) + df = TSDataset.to_dataset(df) + tsds = TSDataset(df, freq="D") + + return tsds + + +@pytest.fixture +def ts_with_few_missing(random_seed) -> TSDataset: + periods = 100 + df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df1["segment"] = "segment_1" + df1["target"] = np.random.uniform(10, 20, size=periods) + df1.loc[df1.index[-4:-2], "target"] = np.NaN + + df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df2["segment"] = "segment_2" + df2["target"] = np.random.uniform(-15, 5, size=periods) + df2.loc[df2.index[-12:-10], "target"] = np.NaN + + df = pd.concat([df1, df2]).reset_index(drop=True) + df = TSDataset.to_dataset(df) + tsds = TSDataset(df, freq="D") + + return tsds diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py index e93b56581..12251e8fd 100644 --- a/tests/test_auto/test_auto.py +++ b/tests/test_auto/test_auto.py @@ -11,11 +11,13 @@ from etna.auto.auto import _Callback from etna.auto.auto import _Initializer from etna.metrics import MAE +from etna.metrics import MSE from etna.models import LinearPerSegmentModel from etna.models import MovingAverageModel from etna.models import NaiveModel from etna.pipeline import Pipeline from etna.transforms import LagTransform +from etna.transforms import TimeSeriesImputerTransform @pytest.fixture() @@ -23,13 +25,15 @@ def pool_generator(): pool = [ { "_target_": "etna.pipeline.Pipeline", - "horizon": "${__aux__.horizon}", "model": {"_target_": "etna.models.MovingAverageModel", "window": "${mult:${horizon},1}"}, + "transforms": [{"_target_": "etna.transforms.TimeSeriesImputerTransform"}], + "horizon": "${__aux__.horizon}", }, { "_target_": "etna.pipeline.Pipeline", - "horizon": "${__aux__.horizon}", "model": {"_target_": "etna.models.NaiveModel", "lag": 1}, + "transforms": [{"_target_": "etna.transforms.TimeSeriesImputerTransform"}], + "horizon": "${__aux__.horizon}", }, ] pool_generator = PoolGenerator(pool) @@ -38,7 +42,10 @@ def pool_generator(): @pytest.fixture() def pool_list(): - return [Pipeline(MovingAverageModel(7), horizon=7), Pipeline(NaiveModel(1), horizon=7)] + return [ + Pipeline(MovingAverageModel(7), transforms=[TimeSeriesImputerTransform()], horizon=7), + Pipeline(NaiveModel(1), transforms=[TimeSeriesImputerTransform()], horizon=7), + ] def test_objective( @@ -72,6 +79,39 @@ def test_objective( callback.assert_called_once() +@pytest.mark.parametrize("ts_name", ["ts_with_fold_missing_tail", "ts_with_fold_missing_middle"]) +def test_objective_fail_none( + ts_name, + request, + target_metric=MSE(missing_mode="ignore"), + metric_aggregation: Literal["mean"] = "mean", + metrics=[MSE(missing_mode="ignore")], + backtest_params={}, + initializer=MagicMock(spec=_Initializer), + callback=MagicMock(spec=_Callback), + relative_params={ + "_target_": "etna.pipeline.Pipeline", + "horizon": 7, + "model": {"_target_": "etna.models.NaiveModel", "lag": 1}, + "transforms": [{"_target_": "etna.transforms.TimeSeriesImputerTransform"}], + }, +): + ts = request.getfixturevalue(ts_name) + trial = MagicMock(relative_params=relative_params) + _objective = Auto.objective( + ts=ts, + target_metric=target_metric, + metric_aggregation=metric_aggregation, + metrics=metrics, + backtest_params=backtest_params, + initializer=initializer, + callback=callback, + ) + + with pytest.raises(ValueError, match="Metric value is None"): + _ = _objective(trial) + + @pytest.mark.parametrize("tune_size", [0, 2]) def test_fit_called_tuning_pool( tune_size, @@ -142,17 +182,20 @@ def test_init_optuna( ) +@pytest.mark.parametrize("ts_name", ["example_tsds", "ts_with_few_missing"]) @pytest.mark.parametrize("pool", ["pool_list", "pool_generator"]) -def test_fit_without_tuning_list(example_tsds, optuna_storage, pool, request): +def test_fit_without_tuning_list(ts_name, optuna_storage, pool, request): + ts = request.getfixturevalue(ts_name) pool = request.getfixturevalue(pool) auto = Auto( - MAE(), + MSE(missing_mode="ignore"), + metrics=[MSE(missing_mode="ignore")], pool=pool, metric_aggregation="median", horizon=7, storage=optuna_storage, ) - auto.fit(ts=example_tsds, n_trials=2) + auto.fit(ts=ts, n_trials=2) assert len(auto._pool_optuna.study.trials) == 2 assert len(auto.summary()) == 2 @@ -163,27 +206,36 @@ def test_fit_without_tuning_list(example_tsds, optuna_storage, pool, request): assert auto.top_k(k=1)[0].to_dict() == pool[0].to_dict() +@pytest.mark.parametrize("ts_name", ["example_tsds", "ts_with_few_missing"]) @pytest.mark.parametrize("tune_size", [1, 2]) def test_fit_with_tuning( + ts_name, tune_size, - example_tsds, + request, optuna_storage, pool=( - Pipeline(MovingAverageModel(5), horizon=7), - Pipeline(NaiveModel(1), horizon=7), + Pipeline(MovingAverageModel(5), transforms=[TimeSeriesImputerTransform(strategy="forward_fill")], horizon=7), + Pipeline(NaiveModel(1), transforms=[TimeSeriesImputerTransform(strategy="forward_fill")], horizon=7), Pipeline( - LinearPerSegmentModel(), transforms=[LagTransform(in_column="target", lags=list(range(7, 21)))], horizon=7 + LinearPerSegmentModel(), + transforms=[ + TimeSeriesImputerTransform(strategy="forward_fill"), + LagTransform(in_column="target", lags=list(range(7, 21))), + ], + horizon=7, ), ), ): + ts = request.getfixturevalue(ts_name) auto = Auto( - MAE(), + MSE(missing_mode="ignore"), + metrics=[MSE(missing_mode="ignore")], pool=pool, metric_aggregation="median", horizon=7, storage=optuna_storage, ) - auto.fit(ts=example_tsds, n_trials=11, tune_size=tune_size) + auto.fit(ts=ts, n_trials=11, tune_size=tune_size) assert len(auto._pool_optuna.study.trials) == 3 assert len(auto.summary()) == 11 diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index 0a1b972a0..175d05c13 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -13,6 +13,7 @@ from etna.distributions import FloatDistribution from etna.distributions import IntDistribution from etna.metrics import MAE +from etna.metrics import MSE from etna.models import NaiveModel from etna.models import SimpleExpSmoothingModel from etna.pipeline import AutoRegressivePipeline @@ -21,6 +22,7 @@ from etna.reconciliation import BottomUpReconciliator from etna.transforms import AddConstTransform from etna.transforms import DateFlagsTransform +from etna.transforms import TimeSeriesImputerTransform def test_objective( @@ -53,6 +55,36 @@ def test_objective( callback.assert_called_once() +@pytest.mark.parametrize("ts_name", ["ts_with_fold_missing_tail", "ts_with_fold_missing_middle"]) +def test_objective_fail_none( + ts_name, + request, + target_metric=MSE(missing_mode="ignore"), + metric_aggregation: Literal["mean"] = "mean", + metrics=[MSE(missing_mode="ignore")], + backtest_params={}, + initializer=MagicMock(spec=_Initializer), + callback=MagicMock(spec=_Callback), + pipeline=Pipeline(model=NaiveModel(), transforms=[TimeSeriesImputerTransform()], horizon=7), + params_to_tune={}, +): + ts = request.getfixturevalue(ts_name) + trial = MagicMock() + _objective = Tune.objective( + ts=ts, + pipeline=pipeline, + params_to_tune=params_to_tune, + target_metric=target_metric, + metric_aggregation=metric_aggregation, + metrics=metrics, + backtest_params=backtest_params, + initializer=initializer, + callback=callback, + ) + with pytest.raises(ValueError, match="Metric value is None"): + _ = _objective(trial) + + def test_fit_called_tune( ts=MagicMock(), tune=MagicMock(), @@ -165,23 +197,30 @@ def test_top_k( assert [pipeline.model.lag for pipeline in top_k] == [i for i in range(expected_k)] # noqa C416 +@pytest.mark.parametrize("ts_name", ["example_tsds", "ts_with_few_missing"]) @pytest.mark.parametrize( "pipeline", [ - (Pipeline(NaiveModel(1), horizon=7)), - (AutoRegressivePipeline(model=NaiveModel(1), horizon=7, transforms=[])), - (AutoRegressivePipeline(model=NaiveModel(1), horizon=7, transforms=[DateFlagsTransform()])), + (Pipeline(NaiveModel(1), transforms=[TimeSeriesImputerTransform()], horizon=7)), + (AutoRegressivePipeline(model=NaiveModel(1), transforms=[TimeSeriesImputerTransform()], horizon=7)), + ( + AutoRegressivePipeline( + model=NaiveModel(1), transforms=[DateFlagsTransform(), TimeSeriesImputerTransform()], horizon=7 + ) + ), ], ) -def test_tune_run(example_tsds, optuna_storage, pipeline): +def test_tune_run(ts_name, optuna_storage, pipeline, request): + ts = request.getfixturevalue(ts_name) tune = Tune( pipeline=pipeline, - target_metric=MAE(), + target_metric=MSE(missing_mode="ignore"), + metrics=[MSE(missing_mode="ignore")], metric_aggregation="median", horizon=7, storage=optuna_storage, ) - tune.fit(ts=example_tsds, n_trials=2) + tune.fit(ts=ts, n_trials=2) assert len(tune._optuna.study.trials) == 2 assert len(tune.summary()) == 2 diff --git a/tests/test_metrics/test_metrics_utils.py b/tests/test_metrics/test_metrics_utils.py index 8872ad7af..0bceec965 100644 --- a/tests/test_metrics/test_metrics_utils.py +++ b/tests/test_metrics/test_metrics_utils.py @@ -1,11 +1,16 @@ +from typing import Any +from typing import Dict from typing import Tuple import numpy as np +import pandas as pd +import pytest from etna.datasets import TSDataset from etna.metrics import MAE from etna.metrics import MAPE from etna.metrics import MSE +from etna.metrics.utils import aggregate_metrics_df from etna.metrics.utils import compute_metrics @@ -21,3 +26,160 @@ def test_compute_metrics(train_test_dfs: Tuple[TSDataset, TSDataset]): ] result = compute_metrics(metrics=metrics, y_true=true_df, y_pred=forecast_df) np.testing.assert_array_equal(sorted(expected_keys), sorted(result.keys())) + + +@pytest.fixture +def metrics_df_with_folds() -> pd.DataFrame: + df = pd.DataFrame( + { + "segment": ["segment_0"] * 3 + ["segment_1"] * 3 + ["segment_2"] * 3, + "MAE": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0], + "MSE": [2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], + "fold_number": [0, 1, 2, 0, 1, 2, 0, 1, 2], + } + ) + return df + + +@pytest.fixture +def metrics_df_no_folds(metrics_df_with_folds) -> pd.DataFrame: + df = metrics_df_with_folds + df = df.groupby("segment").mean().reset_index().drop("fold_number", axis=1) + return df + + +@pytest.fixture +def aggregated_metrics_df() -> Dict[str, Any]: + result = { + "MAE_median": 3.0, + "MAE_mean": 3.0, + "MAE_std": 0.816496580927726, + "MAE_size": 3.0, + "MAE_percentile_5": 2.1, + "MAE_percentile_25": 2.5, + "MAE_percentile_75": 3.5, + "MAE_percentile_95": 3.9, + "MSE_median": 4.0, + "MSE_mean": 4.333333333333333, + "MSE_std": 1.247219128924647, + "MSE_size": 3.0, + "MSE_percentile_5": 3.1, + "MSE_percentile_25": 3.5, + "MSE_percentile_75": 5.0, + "MSE_percentile_95": 5.8, + } + return result + + +@pytest.fixture +def metrics_df_with_folds_with_missing() -> pd.DataFrame: + df = pd.DataFrame( + { + "segment": ["segment_0"] * 3 + ["segment_1"] * 3 + ["segment_2"] * 3, + "MAE": [None, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0], + "MSE": [2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], + "fold_number": [0, 1, 2, 0, 1, 2, 0, 1, 2], + } + ) + return df + + +@pytest.fixture +def metrics_df_no_folds_with_missing(metrics_df_with_folds_with_missing) -> pd.DataFrame: + df = metrics_df_with_folds_with_missing + df = ( + df.groupby("segment") + .apply(lambda x: x.mean(skipna=False, numeric_only=False)) + .reset_index() + .drop("fold_number", axis=1) + ) + return df + + +@pytest.fixture +def aggregated_metrics_df_with_missing() -> Dict[str, Any]: + result = { + "MAE_mean": 3.5, + "MAE_median": 3.5, + "MAE_std": 0.5, + "MAE_size": 2.0, + "MAE_percentile_5": 3.05, + "MAE_percentile_25": 3.25, + "MAE_percentile_75": 3.75, + "MAE_percentile_95": 3.95, + "MSE_mean": 4.333333333333333, + "MSE_median": 4.0, + "MSE_std": 1.247219128924647, + "MSE_size": 3.0, + "MSE_percentile_5": 3.1, + "MSE_percentile_25": 3.5, + "MSE_percentile_75": 5.0, + "MSE_percentile_95": 5.8, + } + return result + + +@pytest.fixture +def metrics_df_with_folds_with_full_missing() -> pd.DataFrame: + df = pd.DataFrame( + { + "segment": ["segment_0"] * 3 + ["segment_1"] * 3 + ["segment_2"] * 3, + "MAE": [None, 2.0, 3.0, 2.0, None, 4.0, 3.0, 4.0, None], + "MSE": [2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], + "fold_number": [0, 1, 2, 0, 1, 2, 0, 1, 2], + } + ) + return df + + +@pytest.fixture +def metrics_df_no_folds_with_full_missing(metrics_df_with_folds_with_full_missing) -> pd.DataFrame: + df = metrics_df_with_folds_with_full_missing + df = ( + df.groupby("segment") + .apply(lambda x: x.mean(skipna=False, numeric_only=False)) + .reset_index() + .drop("fold_number", axis=1) + ) + return df + + +@pytest.fixture +def aggregated_metrics_df_with_full_missing() -> Dict[str, Any]: + result = { + "MAE_mean": None, + "MAE_median": None, + "MAE_std": None, + "MAE_size": 0.0, + "MAE_percentile_5": None, + "MAE_percentile_25": None, + "MAE_percentile_75": None, + "MAE_percentile_95": None, + "MSE_mean": 4.333333333333333, + "MSE_median": 4.0, + "MSE_std": 1.247219128924647, + "MSE_size": 3.0, + "MSE_percentile_5": 3.1, + "MSE_percentile_25": 3.5, + "MSE_percentile_75": 5.0, + "MSE_percentile_95": 5.8, + } + return result + + +@pytest.mark.parametrize( + "df_name, answer_name", + [ + ("metrics_df_with_folds", "aggregated_metrics_df"), + ("metrics_df_no_folds", "aggregated_metrics_df"), + ("metrics_df_with_folds_with_missing", "aggregated_metrics_df_with_missing"), + ("metrics_df_no_folds_with_missing", "aggregated_metrics_df_with_missing"), + ("metrics_df_with_folds_with_full_missing", "aggregated_metrics_df_with_full_missing"), + ("metrics_df_no_folds_with_full_missing", "aggregated_metrics_df_with_full_missing"), + ], +) +def test_aggregate_metrics_df(df_name, answer_name, request): + metrics_df = request.getfixturevalue(df_name) + answer = request.getfixturevalue(answer_name) + result = aggregate_metrics_df(metrics_df) + assert result == answer From 3a4d08fb474e0779da534c7a03219bb1f1462de1 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 11 Dec 2024 16:07:47 +0300 Subject: [PATCH 2/9] fix: set numeric_only=True --- etna/metrics/utils.py | 2 +- etna/pipeline/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etna/metrics/utils.py b/etna/metrics/utils.py index 0d424b0cb..ca69152dd 100644 --- a/etna/metrics/utils.py +++ b/etna/metrics/utils.py @@ -140,7 +140,7 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, Optional[float]] if "fold_number" in metrics_df.columns: metrics_dict = ( metrics_df.groupby("segment") - .apply(lambda x: x.mean(skipna=False, numeric_only=False)) + .apply(lambda x: x.mean(skipna=False, numeric_only=True)) .reset_index() .drop(["segment", "fold_number"], axis=1) .apply(list(METRICS_AGGREGATION_MAP.values())) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index 3166a77ac..46b5a7d70 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -858,7 +858,7 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame if aggregate_metrics: metrics_df = ( metrics_df.groupby("segment") - .apply(lambda x: x.mean(skipna=False, numeric_only=False)) + .apply(lambda x: x.mean(skipna=False, numeric_only=True)) .reset_index() .drop(self._fold_column, axis=1) ) From 2ba6c87761eb194a7fbedc5c32ba77fb6a0184ce Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 11 Dec 2024 17:30:45 +0300 Subject: [PATCH 3/9] fix: fix file logger tests --- tests/test_loggers/test_file_logger.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_loggers/test_file_logger.py b/tests/test_loggers/test_file_logger.py index 8f3385435..1e120534f 100644 --- a/tests/test_loggers/test_file_logger.py +++ b/tests/test_loggers/test_file_logger.py @@ -154,6 +154,7 @@ def test_base_file_logger_log_backtest_run(example_tsds: TSDataset): "median", "mean", "std", + "size", "percentile_5", "percentile_25", "percentile_75", @@ -213,7 +214,16 @@ def test_base_file_logger_log_backtest_metrics(example_tsds: TSDataset, aggregat with open(crossval_results_folder.joinpath("metrics_summary.json"), "r") as inf: metrics_summary = json.load(inf) - statistic_keys = ["median", "mean", "std", "percentile_5", "percentile_25", "percentile_75", "percentile_95"] + statistic_keys = [ + "median", + "mean", + "std", + "size", + "percentile_5", + "percentile_25", + "percentile_75", + "percentile_95", + ] assert len(metrics_summary.keys()) == len(metrics) * len(statistic_keys) tslogger.remove(idx) From 594d15e306b0764714519c3b526133c8af6a2e42 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 11 Dec 2024 18:45:00 +0300 Subject: [PATCH 4/9] fix: add ignoring overflow warning in test --- etna/metrics/utils.py | 2 +- tests/test_auto/test_tune.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/etna/metrics/utils.py b/etna/metrics/utils.py index ca69152dd..8cc17a63c 100644 --- a/etna/metrics/utils.py +++ b/etna/metrics/utils.py @@ -77,7 +77,7 @@ def func(x): with warnings.catch_warnings(): # this helps to prevent warning in case of all nans warnings.filterwarnings( - message="Degrees of freedom <=", + message="Degrees of freedom <= 0", action="ignore", ) return np.nanstd(a=x.values) diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index 175d05c13..52dc48db9 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -124,6 +124,7 @@ def test_init_optuna( ) +@pytest.mark.filterwarnings("ignore: overflow encountered in multiply") @pytest.mark.parametrize( "params, model", [ From ab908447212d2b7cefe62baf913c389d3f2b09b7 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 13 Dec 2024 14:44:50 +0300 Subject: [PATCH 5/9] fix: rework handling missing values by folds, rework tests --- etna/metrics/utils.py | 12 +- etna/pipeline/base.py | 5 +- tests/test_auto/conftest.py | 19 +++ tests/test_auto/test_auto.py | 41 ++++--- tests/test_auto/test_tune.py | 39 ++++-- tests/test_metrics/test_metrics_utils.py | 145 ++++++----------------- 6 files changed, 115 insertions(+), 146 deletions(-) diff --git a/etna/metrics/utils.py b/etna/metrics/utils.py index 8cc17a63c..1830a8ba9 100644 --- a/etna/metrics/utils.py +++ b/etna/metrics/utils.py @@ -86,13 +86,13 @@ def func(x): return func -def size_agg(): - """Size for pandas agg.""" +def notna_size_agg(): + """Size of not-na elements for pandas agg.""" def func(x): return len(x) - pd.isna(x.values).sum() - func.__name__ = "size" + func.__name__ = "notna_size" return func @@ -113,14 +113,14 @@ def func(x): MetricAggregationStatistics = Literal[ - "median", "mean", "std", "size", "percentile_5", "percentile_25", "percentile_75", "percentile_95" + "median", "mean", "std", "notna_size", "percentile_5", "percentile_25", "percentile_75", "percentile_95" ] METRICS_AGGREGATION_MAP: Dict[MetricAggregationStatistics, Union[str, Callable]] = { "median": mean_agg(), "mean": median_agg(), "std": std_agg(), - "size": size_agg(), + "notna_size": notna_size_agg(), "percentile_5": percentile(5), "percentile_25": percentile(25), "percentile_75": percentile(75), @@ -140,7 +140,7 @@ def aggregate_metrics_df(metrics_df: pd.DataFrame) -> Dict[str, Optional[float]] if "fold_number" in metrics_df.columns: metrics_dict = ( metrics_df.groupby("segment") - .apply(lambda x: x.mean(skipna=False, numeric_only=True)) + .mean(numeric_only=False) .reset_index() .drop(["segment", "fold_number"], axis=1) .apply(list(METRICS_AGGREGATION_MAP.values())) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index 46b5a7d70..c5b5dba3f 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -857,10 +857,7 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame if aggregate_metrics: metrics_df = ( - metrics_df.groupby("segment") - .apply(lambda x: x.mean(skipna=False, numeric_only=True)) - .reset_index() - .drop(self._fold_column, axis=1) + metrics_df.groupby("segment").mean(numeric_only=False).reset_index().drop(self._fold_column, axis=1) ) return metrics_df diff --git a/tests/test_auto/conftest.py b/tests/test_auto/conftest.py index fe6c03433..8688468e1 100644 --- a/tests/test_auto/conftest.py +++ b/tests/test_auto/conftest.py @@ -80,6 +80,25 @@ def ts_with_fold_missing_middle(random_seed) -> TSDataset: return tsds +@pytest.fixture +def ts_with_all_folds_missing_one_segment(random_seed) -> TSDataset: + periods = 100 + df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df1["segment"] = "segment_1" + df1["target"] = np.random.uniform(10, 20, size=periods) + df1.loc[df1.index[-21:], "target"] = np.NaN + + df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df2["segment"] = "segment_2" + df2["target"] = np.random.uniform(-15, 5, size=periods) + + df = pd.concat([df1, df2]).reset_index(drop=True) + df = TSDataset.to_dataset(df) + tsds = TSDataset(df, freq="D") + + return tsds + + @pytest.fixture def ts_with_few_missing(random_seed) -> TSDataset: periods = 100 diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py index 12251e8fd..371741f57 100644 --- a/tests/test_auto/test_auto.py +++ b/tests/test_auto/test_auto.py @@ -48,23 +48,35 @@ def pool_list(): ] +@pytest.mark.parametrize( + "ts_name", + [ + "example_tsds", + "ts_with_few_missing", + "ts_with_fold_missing_tail", + "ts_with_fold_missing_middle", + ], +) def test_objective( - example_tsds, - target_metric=MAE(), + ts_name, + request, + target_metric=MAE(missing_mode="ignore"), metric_aggregation: Literal["mean"] = "mean", - metrics=[MAE()], + metrics=[MAE(missing_mode="ignore")], backtest_params={}, - initializer=MagicMock(spec=_Initializer), - callback=MagicMock(spec=_Callback), relative_params={ "_target_": "etna.pipeline.Pipeline", "horizon": 7, "model": {"_target_": "etna.models.NaiveModel", "lag": 1}, + "transforms": [{"_target_": "etna.transforms.TimeSeriesImputerTransform"}], }, ): + ts = request.getfixturevalue(ts_name) + initializer = MagicMock(spec=_Initializer) + callback = MagicMock(spec=_Callback) trial = MagicMock(relative_params=relative_params) _objective = Auto.objective( - ts=example_tsds, + ts=ts, target_metric=target_metric, metric_aggregation=metric_aggregation, metrics=metrics, @@ -79,13 +91,13 @@ def test_objective( callback.assert_called_once() -@pytest.mark.parametrize("ts_name", ["ts_with_fold_missing_tail", "ts_with_fold_missing_middle"]) +@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_one_segment"]) def test_objective_fail_none( ts_name, request, - target_metric=MSE(missing_mode="ignore"), + target_metric=MAE(missing_mode="ignore"), metric_aggregation: Literal["mean"] = "mean", - metrics=[MSE(missing_mode="ignore")], + metrics=[MAE(missing_mode="ignore")], backtest_params={}, initializer=MagicMock(spec=_Initializer), callback=MagicMock(spec=_Callback), @@ -108,7 +120,8 @@ def test_objective_fail_none( callback=callback, ) - with pytest.raises(ValueError, match="Metric value is None"): + # TODO: discuss the error here + with pytest.raises(ValueError, match="Last train timestamp should be not later"): _ = _objective(trial) @@ -188,8 +201,8 @@ def test_fit_without_tuning_list(ts_name, optuna_storage, pool, request): ts = request.getfixturevalue(ts_name) pool = request.getfixturevalue(pool) auto = Auto( - MSE(missing_mode="ignore"), - metrics=[MSE(missing_mode="ignore")], + MAE(missing_mode="ignore"), + metrics=[MAE(missing_mode="ignore")], pool=pool, metric_aggregation="median", horizon=7, @@ -228,8 +241,8 @@ def test_fit_with_tuning( ): ts = request.getfixturevalue(ts_name) auto = Auto( - MSE(missing_mode="ignore"), - metrics=[MSE(missing_mode="ignore")], + MAE(missing_mode="ignore"), + metrics=[MAE(missing_mode="ignore")], pool=pool, metric_aggregation="median", horizon=7, diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index 52dc48db9..ac857b3d3 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -25,20 +25,31 @@ from etna.transforms import TimeSeriesImputerTransform +@pytest.mark.parametrize( + "ts_name", + [ + "example_tsds", + "ts_with_few_missing", + "ts_with_fold_missing_tail", + "ts_with_fold_missing_middle", + ], +) def test_objective( - example_tsds, - target_metric=MAE(), + ts_name, + request, + target_metric=MAE(missing_mode="ignore"), metric_aggregation: Literal["mean"] = "mean", - metrics=[MAE()], + metrics=[MAE(missing_mode="ignore")], backtest_params={}, - initializer=MagicMock(spec=_Initializer), - callback=MagicMock(spec=_Callback), - pipeline=Pipeline(NaiveModel()), + pipeline=Pipeline(model=NaiveModel(), transforms=[TimeSeriesImputerTransform()], horizon=7), params_to_tune={}, ): + ts = request.getfixturevalue(ts_name) + initializer = MagicMock(spec=_Initializer) + callback = MagicMock(spec=_Callback) trial = MagicMock() _objective = Tune.objective( - ts=example_tsds, + ts=ts, pipeline=pipeline, params_to_tune=params_to_tune, target_metric=target_metric, @@ -55,13 +66,13 @@ def test_objective( callback.assert_called_once() -@pytest.mark.parametrize("ts_name", ["ts_with_fold_missing_tail", "ts_with_fold_missing_middle"]) +@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_one_segment"]) def test_objective_fail_none( ts_name, request, - target_metric=MSE(missing_mode="ignore"), + target_metric=MAE(missing_mode="ignore"), metric_aggregation: Literal["mean"] = "mean", - metrics=[MSE(missing_mode="ignore")], + metrics=[MAE(missing_mode="ignore")], backtest_params={}, initializer=MagicMock(spec=_Initializer), callback=MagicMock(spec=_Callback), @@ -81,7 +92,9 @@ def test_objective_fail_none( initializer=initializer, callback=callback, ) - with pytest.raises(ValueError, match="Metric value is None"): + + # TODO: discuss the error here + with pytest.raises(ValueError, match="Last train timestamp should be not later"): _ = _objective(trial) @@ -215,8 +228,8 @@ def test_tune_run(ts_name, optuna_storage, pipeline, request): ts = request.getfixturevalue(ts_name) tune = Tune( pipeline=pipeline, - target_metric=MSE(missing_mode="ignore"), - metrics=[MSE(missing_mode="ignore")], + target_metric=MAE(missing_mode="ignore"), + metrics=[MAE(missing_mode="ignore")], metric_aggregation="median", horizon=7, storage=optuna_storage, diff --git a/tests/test_metrics/test_metrics_utils.py b/tests/test_metrics/test_metrics_utils.py index 9bf259599..38b74baef 100644 --- a/tests/test_metrics/test_metrics_utils.py +++ b/tests/test_metrics/test_metrics_utils.py @@ -34,7 +34,10 @@ def metrics_df_with_folds() -> pd.DataFrame: { "segment": ["segment_0"] * 3 + ["segment_1"] * 3 + ["segment_2"] * 3, "MAE": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0], - "MSE": [2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], + "MSE": [None, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], + "MAPE": [None, None, None, 20.0, 30.0, 40.0, 30.0, 40.0, 50.0], + "SMAPE": [None, None, None, None, None, None, 50.0, 60.0, 70.0], + "RMSE": [None, None, None, None, None, None, None, None, None], "fold_number": [0, 1, 2, 0, 1, 2, 0, 1, 2], } ) @@ -44,125 +47,53 @@ def metrics_df_with_folds() -> pd.DataFrame: @pytest.fixture def metrics_df_no_folds(metrics_df_with_folds) -> pd.DataFrame: df = metrics_df_with_folds - df = df.groupby("segment").mean().reset_index().drop("fold_number", axis=1) + df = df.groupby("segment").mean(numeric_only=False).reset_index().drop("fold_number", axis=1) return df @pytest.fixture def aggregated_metrics_df() -> Dict[str, Any]: result = { - "MAE_median": 3.0, "MAE_mean": 3.0, + "MAE_median": 3.0, "MAE_std": 0.816496580927726, - "MAE_size": 3.0, + "MAE_notna_size": 3.0, "MAE_percentile_5": 2.1, "MAE_percentile_25": 2.5, "MAE_percentile_75": 3.5, "MAE_percentile_95": 3.9, + "MSE_mean": 4.5, "MSE_median": 4.0, - "MSE_mean": 4.333333333333333, - "MSE_std": 1.247219128924647, - "MSE_size": 3.0, - "MSE_percentile_5": 3.1, - "MSE_percentile_25": 3.5, - "MSE_percentile_75": 5.0, - "MSE_percentile_95": 5.8, - } - return result - - -@pytest.fixture -def metrics_df_with_folds_with_missing() -> pd.DataFrame: - df = pd.DataFrame( - { - "segment": ["segment_0"] * 3 + ["segment_1"] * 3 + ["segment_2"] * 3, - "MAE": [None, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0], - "MSE": [2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], - "fold_number": [0, 1, 2, 0, 1, 2, 0, 1, 2], - } - ) - return df - - -@pytest.fixture -def metrics_df_no_folds_with_missing(metrics_df_with_folds_with_missing) -> pd.DataFrame: - df = metrics_df_with_folds_with_missing - df = ( - df.groupby("segment") - .apply(lambda x: x.mean(skipna=False, numeric_only=False)) - .reset_index() - .drop("fold_number", axis=1) - ) - return df - - -@pytest.fixture -def aggregated_metrics_df_with_missing() -> Dict[str, Any]: - result = { - "MAE_mean": 3.5, - "MAE_median": 3.5, - "MAE_std": 0.5, - "MAE_size": 2.0, - "MAE_percentile_5": 3.05, - "MAE_percentile_25": 3.25, - "MAE_percentile_75": 3.75, - "MAE_percentile_95": 3.95, - "MSE_mean": 4.333333333333333, - "MSE_median": 4.0, - "MSE_std": 1.247219128924647, - "MSE_size": 3.0, - "MSE_percentile_5": 3.1, - "MSE_percentile_25": 3.5, - "MSE_percentile_75": 5.0, - "MSE_percentile_95": 5.8, - } - return result - - -@pytest.fixture -def metrics_df_with_folds_with_full_missing() -> pd.DataFrame: - df = pd.DataFrame( - { - "segment": ["segment_0"] * 3 + ["segment_1"] * 3 + ["segment_2"] * 3, - "MAE": [None, 2.0, 3.0, 2.0, None, 4.0, 3.0, 4.0, None], - "MSE": [2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0], - "fold_number": [0, 1, 2, 0, 1, 2, 0, 1, 2], - } - ) - return df - - -@pytest.fixture -def metrics_df_no_folds_with_full_missing(metrics_df_with_folds_with_full_missing) -> pd.DataFrame: - df = metrics_df_with_folds_with_full_missing - df = ( - df.groupby("segment") - .apply(lambda x: x.mean(skipna=False, numeric_only=False)) - .reset_index() - .drop("fold_number", axis=1) - ) - return df - - -@pytest.fixture -def aggregated_metrics_df_with_full_missing() -> Dict[str, Any]: - result = { - "MAE_mean": None, - "MAE_median": None, - "MAE_std": None, - "MAE_size": 0.0, - "MAE_percentile_5": None, - "MAE_percentile_25": None, - "MAE_percentile_75": None, - "MAE_percentile_95": None, - "MSE_mean": 4.333333333333333, - "MSE_median": 4.0, - "MSE_std": 1.247219128924647, - "MSE_size": 3.0, - "MSE_percentile_5": 3.1, - "MSE_percentile_25": 3.5, + "MSE_std": 1.0801234497346435, + "MSE_notna_size": 3.0, + "MSE_percentile_5": 3.55, + "MSE_percentile_25": 3.75, "MSE_percentile_75": 5.0, "MSE_percentile_95": 5.8, + "MAPE_mean": 35.0, + "MAPE_median": 35.0, + "MAPE_std": 5.0, + "MAPE_notna_size": 2.0, + "MAPE_percentile_5": 30.5, + "MAPE_percentile_25": 32.5, + "MAPE_percentile_75": 37.5, + "MAPE_percentile_95": 39.5, + "SMAPE_mean": 60.0, + "SMAPE_median": 60.0, + "SMAPE_std": 0.0, + "SMAPE_notna_size": 1.0, + "SMAPE_percentile_5": 60.0, + "SMAPE_percentile_25": 60.0, + "SMAPE_percentile_75": 60.0, + "SMAPE_percentile_95": 60.0, + "RMSE_mean": None, + "RMSE_median": None, + "RMSE_std": None, + "RMSE_notna_size": 0.0, + "RMSE_percentile_5": None, + "RMSE_percentile_25": None, + "RMSE_percentile_75": None, + "RMSE_percentile_95": None, } return result @@ -172,10 +103,6 @@ def aggregated_metrics_df_with_full_missing() -> Dict[str, Any]: [ ("metrics_df_with_folds", "aggregated_metrics_df"), ("metrics_df_no_folds", "aggregated_metrics_df"), - ("metrics_df_with_folds_with_missing", "aggregated_metrics_df_with_missing"), - ("metrics_df_no_folds_with_missing", "aggregated_metrics_df_with_missing"), - ("metrics_df_with_folds_with_full_missing", "aggregated_metrics_df_with_full_missing"), - ("metrics_df_no_folds_with_full_missing", "aggregated_metrics_df_with_full_missing"), ], ) def test_aggregate_metrics_df(df_name, answer_name, request): From 1cf6d7b9f6a03c5887a6d2b0d419a3ee698ec2c0 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 13 Dec 2024 14:47:32 +0300 Subject: [PATCH 6/9] style: fix styling --- tests/test_auto/test_auto.py | 1 - tests/test_auto/test_tune.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py index 371741f57..8d180ebb4 100644 --- a/tests/test_auto/test_auto.py +++ b/tests/test_auto/test_auto.py @@ -11,7 +11,6 @@ from etna.auto.auto import _Callback from etna.auto.auto import _Initializer from etna.metrics import MAE -from etna.metrics import MSE from etna.models import LinearPerSegmentModel from etna.models import MovingAverageModel from etna.models import NaiveModel diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index ac857b3d3..efbf443b7 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -13,7 +13,6 @@ from etna.distributions import FloatDistribution from etna.distributions import IntDistribution from etna.metrics import MAE -from etna.metrics import MSE from etna.models import NaiveModel from etna.models import SimpleExpSmoothingModel from etna.pipeline import AutoRegressivePipeline From 04aa3013ab73cab61ab38f1d36ebab23f5fa2f39 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Mon, 16 Dec 2024 14:17:06 +0300 Subject: [PATCH 7/9] fix: add signature --- etna/metrics/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/etna/metrics/utils.py b/etna/metrics/utils.py index 1830a8ba9..e563010a7 100644 --- a/etna/metrics/utils.py +++ b/etna/metrics/utils.py @@ -41,7 +41,7 @@ def compute_metrics( def mean_agg(): """Mean for pandas agg.""" - def func(x): + def func(x: pd.Series): with warnings.catch_warnings(): # this helps to prevent warning in case of all nans warnings.filterwarnings( @@ -57,7 +57,7 @@ def func(x): def median_agg(): """Median for pandas agg.""" - def func(x): + def func(x: pd.Series): with warnings.catch_warnings(): # this helps to prevent warning in case of all nans warnings.filterwarnings( @@ -73,7 +73,7 @@ def func(x): def std_agg(): """Std for pandas agg.""" - def func(x): + def func(x: pd.Series): with warnings.catch_warnings(): # this helps to prevent warning in case of all nans warnings.filterwarnings( @@ -89,7 +89,7 @@ def func(x): def notna_size_agg(): """Size of not-na elements for pandas agg.""" - def func(x): + def func(x: pd.Series): return len(x) - pd.isna(x.values).sum() func.__name__ = "notna_size" @@ -99,7 +99,7 @@ def func(x): def percentile(n: int): """Percentile for pandas agg.""" - def func(x): + def func(x: pd.Series): with warnings.catch_warnings(): # this helps to prevent warning in case of all nans warnings.filterwarnings( From 3c92b14fc25bf020ee439f52009185e4403394ba Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Mon, 16 Dec 2024 15:24:26 +0300 Subject: [PATCH 8/9] fix: rework tests to raise valid error --- tests/test_auto/conftest.py | 22 +++++++++++++++++++++- tests/test_auto/test_auto.py | 10 +++++++--- tests/test_auto/test_tune.py | 10 +++++++--- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/tests/test_auto/conftest.py b/tests/test_auto/conftest.py index 8688468e1..66584e926 100644 --- a/tests/test_auto/conftest.py +++ b/tests/test_auto/conftest.py @@ -86,7 +86,7 @@ def ts_with_all_folds_missing_one_segment(random_seed) -> TSDataset: df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) df1["segment"] = "segment_1" df1["target"] = np.random.uniform(10, 20, size=periods) - df1.loc[df1.index[-21:], "target"] = np.NaN + df1.loc[df1.index[-40:], "target"] = np.NaN df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) df2["segment"] = "segment_2" @@ -99,6 +99,26 @@ def ts_with_all_folds_missing_one_segment(random_seed) -> TSDataset: return tsds +@pytest.fixture +def ts_with_all_folds_missing_all_segments(random_seed) -> TSDataset: + periods = 100 + df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df1["segment"] = "segment_1" + df1["target"] = np.random.uniform(10, 20, size=periods) + df1.loc[df1.index[-40:], "target"] = np.NaN + + df2 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)}) + df2["segment"] = "segment_2" + df2["target"] = np.random.uniform(-15, 5, size=periods) + df2.loc[df2.index[-40:], "target"] = np.NaN + + df = pd.concat([df1, df2]).reset_index(drop=True) + df = TSDataset.to_dataset(df) + tsds = TSDataset(df, freq="D") + + return tsds + + @pytest.fixture def ts_with_few_missing(random_seed) -> TSDataset: periods = 100 diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py index 8d180ebb4..76f8e44e4 100644 --- a/tests/test_auto/test_auto.py +++ b/tests/test_auto/test_auto.py @@ -47,6 +47,7 @@ def pool_list(): ] +@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix @pytest.mark.parametrize( "ts_name", [ @@ -54,9 +55,11 @@ def pool_list(): "ts_with_few_missing", "ts_with_fold_missing_tail", "ts_with_fold_missing_middle", + "ts_with_all_folds_missing_one_segment", ], ) def test_objective( + validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), @@ -90,8 +93,10 @@ def test_objective( callback.assert_called_once() -@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_one_segment"]) +@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix +@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"]) def test_objective_fail_none( + validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), @@ -119,8 +124,7 @@ def test_objective_fail_none( callback=callback, ) - # TODO: discuss the error here - with pytest.raises(ValueError, match="Last train timestamp should be not later"): + with pytest.raises(ValueError, match="Metric value is None"): _ = _objective(trial) diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index efbf443b7..41562994d 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -24,6 +24,7 @@ from etna.transforms import TimeSeriesImputerTransform +@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix @pytest.mark.parametrize( "ts_name", [ @@ -31,9 +32,11 @@ "ts_with_few_missing", "ts_with_fold_missing_tail", "ts_with_fold_missing_middle", + "ts_with_all_folds_missing_one_segment", ], ) def test_objective( + validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), @@ -65,8 +68,10 @@ def test_objective( callback.assert_called_once() -@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_one_segment"]) +@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix +@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"]) def test_objective_fail_none( + validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), @@ -92,8 +97,7 @@ def test_objective_fail_none( callback=callback, ) - # TODO: discuss the error here - with pytest.raises(ValueError, match="Last train timestamp should be not later"): + with pytest.raises(ValueError, match="Metric value is None"): _ = _objective(trial) From 69b782a33ee3cab8e6c5861e7b902ee22b8f87af Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Tue, 17 Dec 2024 11:32:00 +0300 Subject: [PATCH 9/9] chore: update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d505bdeb..6e3d5c379 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add parameter `missing_mode` into `MAE` metric ([#523](https://github.com/etna-team/etna/pull/523)) - Add parameter `missing_mode` into `MAPE` and `SMAPE` metrics ([#524](https://github.com/etna-team/etna/pull/524)) - -- +- Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522)) - - -