From 82c1be282a8cb13c97b4ecc1c1d82ba2c50aa644 Mon Sep 17 00:00:00 2001 From: d-a-bunin <142778107+d-a-bunin@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:17:16 +0300 Subject: [PATCH] Add `MissingCounter` metric (#520) --- CHANGELOG.md | 4 ++ docs/source/api_reference/metrics.rst | 1 + etna/metrics/__init__.py | 1 + etna/metrics/functional_metrics.py | 53 +++++++++++++- etna/metrics/metrics.py | 58 ++++++++++++++- tests/test_metrics/test_functional_metrics.py | 70 ++++++++++++++++++- tests/test_metrics/test_metrics.py | 61 ++++++++++------ 7 files changed, 224 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f19df43d..69f84d398 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - +- Add `MissingCounter` metric ([#520](https://github.com/etna-team/etna/pull/520)) +- +- +- - - - diff --git a/docs/source/api_reference/metrics.rst b/docs/source/api_reference/metrics.rst index fa5607010..5e41bf400 100644 --- a/docs/source/api_reference/metrics.rst +++ b/docs/source/api_reference/metrics.rst @@ -47,6 +47,7 @@ Scalar metrics: MaxDeviation MedAE Sign + MissingCounter Interval metrics: diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py index 38235e2fd..508f50c04 100644 --- a/etna/metrics/__init__.py +++ b/etna/metrics/__init__.py @@ -32,5 +32,6 @@ from etna.metrics.metrics import WAPE from etna.metrics.metrics import MaxDeviation from etna.metrics.metrics import MedAE +from etna.metrics.metrics import MissingCounter from etna.metrics.metrics import Sign from etna.metrics.utils import compute_metrics diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index 7feb319ad..e3966c597 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -296,4 +296,55 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar return np.sum(np.abs(y_true_array - y_pred_array), axis=axis) / np.sum(np.abs(y_true_array), axis=axis) # type: ignore -__all__ = ["mae", "mse", "msle", "medae", "r2_score", "mape", "smape", "sign", "max_deviation", "rmse", "wape"] +def count_missing_values(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: + """Count missing values in ``y_true``. + + .. math:: + MissingCounter(y\_true, y\_pred) = \\sum_{i=1}^{n}{isnan(y\_true_i)} + + Parameters + ---------- + y_true: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Ground truth (correct) target values. + + y_pred: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Estimated target values. + + multioutput: + Defines aggregating of multiple output values + (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`). + + Returns + ------- + : + A floating point value, or an array of floating point values, + one for each individual target. + """ + y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred) + + if len(y_true_array.shape) != len(y_pred_array.shape): + raise ValueError("Shapes of the labels must be the same") + + axis = _get_axis_by_multioutput(multioutput) + + return np.sum(np.isnan(y_true), axis=axis).astype(float) + + +__all__ = [ + "mae", + "mse", + "msle", + "medae", + "r2_score", + "mape", + "smape", + "sign", + "max_deviation", + "rmse", + "wape", + "count_missing_values", +] diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index 67088a811..e6b3e82bd 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -2,6 +2,7 @@ from etna.metrics.base import Metric from etna.metrics.base import MetricWithMissingHandling +from etna.metrics.functional_metrics import count_missing_values from etna.metrics.functional_metrics import mae from etna.metrics.functional_metrics import mape from etna.metrics.functional_metrics import max_deviation @@ -417,4 +418,59 @@ def greater_is_better(self) -> bool: return False -__all__ = ["MAE", "MSE", "RMSE", "R2", "MSLE", "MAPE", "SMAPE", "MedAE", "Sign", "MaxDeviation", "WAPE"] +class MissingCounter(MetricWithMissingHandling): + """Missing values counter with multi-segment computation support. + + .. math:: + MissingCounter(y\_true, y\_pred) = \\sum_{i=1}^{n}{isnan(y\_true_i)} + + Notes + ----- + You can read more about logic of multi-segment metrics in Metric docs. + """ + + def __init__(self, mode: str = "per-segment", **kwargs): + """Init metric. + + Parameters + ---------- + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + kwargs: + metric's computation arguments + """ + count_missing_values_per_output = partial(count_missing_values, multioutput="raw_values") + super().__init__( + mode=mode, + metric_fn=count_missing_values_per_output, + metric_fn_signature="matrix_to_array", + missing_mode="ignore", + **kwargs, + ) + + @property + def greater_is_better(self) -> None: + """Whether higher metric value is better.""" + return None + + +__all__ = [ + "MAE", + "MSE", + "RMSE", + "R2", + "MSLE", + "MAPE", + "SMAPE", + "MedAE", + "Sign", + "MaxDeviation", + "WAPE", + "MissingCounter", +] diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index ba0e2436c..b4dca0017 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -13,6 +13,7 @@ from etna.metrics import sign from etna.metrics import smape from etna.metrics import wape +from etna.metrics.functional_metrics import count_missing_values @pytest.fixture() @@ -43,6 +44,7 @@ def y_pred_1d(): (sign, -1), (max_deviation, 2), (wape, 1 / 2), + (count_missing_values, 0), ), ) def test_all_1d_metrics(metric, right_metrics_value, y_true_1d, y_pred_1d): @@ -65,6 +67,7 @@ def test_mle_metric_exception(y_true_1d, y_pred_1d): sign, max_deviation, wape, + count_missing_values, ), ) def test_all_wrong_mode(metric, y_true_1d, y_pred_1d): @@ -95,6 +98,7 @@ def y_pred_2d(): (sign, 0), (max_deviation, 2), (wape, 1 / 6), + (count_missing_values, 0), ), ) def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d): @@ -114,6 +118,7 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d) (sign, {"multioutput": "raw_values"}, [0, 0]), (max_deviation, {"multioutput": "raw_values"}, [1, 1]), (wape, {"multioutput": "raw_values"}, [0.0952381, 2 / 3]), + (count_missing_values, {"multioutput": "raw_values"}, [0, 0]), ), ) def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2d, y_pred_2d): @@ -177,6 +182,69 @@ def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2 ), ], ) -def test_values_ok(y_true, y_pred, multioutput, expected): +def test_mse_ok(y_true, y_pred, multioutput, expected): result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput) npt.assert_allclose(result, expected) + + +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 0.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 1.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 0.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 1.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 1.0), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", 2.0), + (np.array([np.NaN, np.NaN, np.NaN]), np.array([3.0, 1.0, 2.0]), "joint", 3.0), + # 2d + (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 0.0), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "joint", + 2.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "joint", + 3.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "joint", + 6.0, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([0.0, 0.0]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([1.0, 1.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([3.0, 0.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([3.0, 3.0]), + ), + ], +) +def test_count_missing_values_ok(y_true, y_pred, multioutput, expected): + result = count_missing_values(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index 58074ead9..f22835175 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -19,6 +19,7 @@ from etna.metrics import wape from etna.metrics.base import Metric from etna.metrics.base import MetricAggregationMode +from etna.metrics.functional_metrics import count_missing_values from etna.metrics.metrics import MAE from etna.metrics.metrics import MAPE from etna.metrics.metrics import MSE @@ -29,6 +30,7 @@ from etna.metrics.metrics import WAPE from etna.metrics.metrics import MaxDeviation from etna.metrics.metrics import MedAE +from etna.metrics.metrics import MissingCounter from etna.metrics.metrics import Sign from tests.utils import DummyMetric from tests.utils import create_dummy_functional_metric @@ -51,6 +53,7 @@ (MaxDeviation(), "MaxDeviation(mode = 'per-segment', )"), (DummyMetric(), "DummyMetric(mode = 'per-segment', alpha = 1.0, )"), (WAPE(), "WAPE(mode = 'per-segment', )"), + (MissingCounter(), "MissingCounter(mode = 'per-segment', )"), ), ) def test_repr(metric, expected_repr): @@ -61,7 +64,7 @@ def test_repr(metric, expected_repr): @pytest.mark.parametrize( "metric_class", - (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, WAPE), + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, WAPE, MissingCounter), ) def test_name_class_name(metric_class): """Check metrics name property without changing its during inheritance""" @@ -85,7 +88,9 @@ def test_name_repr(metric_class): assert metric_name == true_name -@pytest.mark.parametrize("metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, WAPE)) +@pytest.mark.parametrize( + "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, WAPE, MissingCounter) +) def test_metrics_macro(metric_class, train_test_dfs): """Check metrics interface in 'macro' mode""" forecast_df, true_df = train_test_dfs @@ -95,7 +100,8 @@ def test_metrics_macro(metric_class, train_test_dfs): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_metrics_per_segment(metric_class, train_test_dfs): """Check metrics interface in 'per-segment' mode""" @@ -108,7 +114,8 @@ def test_metrics_per_segment(metric_class, train_test_dfs): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_metrics_invalid_aggregation(metric_class): """Check metrics behavior in case of invalid aggregation multioutput""" @@ -117,7 +124,8 @@ def test_metrics_invalid_aggregation(metric_class): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_invalid_segments(metric_class, two_dfs_with_different_segments_sets): """Check metrics behavior in case of invalid segments sets""" @@ -128,7 +136,8 @@ def test_invalid_segments(metric_class, two_dfs_with_different_segments_sets): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_invalid_target_columns(metric_class, train_test_dfs): """Check metrics behavior in case of no target column in segment""" @@ -142,7 +151,8 @@ def test_invalid_target_columns(metric_class, train_test_dfs): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_invalid_index(metric_class, two_dfs_with_different_timestamps): """Check metrics behavior in case of invalid index""" @@ -153,7 +163,8 @@ def test_invalid_index(metric_class, two_dfs_with_different_timestamps): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_invalid_nans_pred(metric_class, train_test_dfs): """Check metrics behavior in case of nans in prediction.""" @@ -191,7 +202,7 @@ def test_invalid_nans_true(metric, train_test_dfs): @pytest.mark.parametrize( "metric", - (MSE(missing_mode="ignore"),), + (MSE(missing_mode="ignore"), MissingCounter()), ) def test_invalid_single_nan_ignore(metric, train_test_dfs): """Check metrics behavior in case of ignoring one nan in true values.""" @@ -205,25 +216,29 @@ def test_invalid_single_nan_ignore(metric, train_test_dfs): @pytest.mark.parametrize( - "metric", - (MSE(mode="per-segment", missing_mode="ignore"),), + "metric, expected_type", + ((MSE(mode="per-segment", missing_mode="ignore"), type(None)), (MissingCounter(mode="per-segment"), float)), ) -def test_invalid_segment_nans_ignore_per_segment(metric, train_test_dfs): +def test_invalid_segment_nans_ignore_per_segment(metric, expected_type, train_test_dfs): """Check per-segment metrics behavior in case of ignoring segment of all nans in true values.""" forecast_df, true_df = train_test_dfs true_df.df.iloc[:, 0] = np.NaN value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, dict) segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist()) - assert value.keys() == segments empty_segment = true_df.df.columns.get_level_values("segment").unique()[0] - assert all(isinstance(cur_value, float) for cur_segment, cur_value in value.items() if cur_segment != empty_segment) - assert value[empty_segment] is None + assert value.keys() == segments + for cur_segment, cur_value in value.items(): + if cur_segment == empty_segment: + assert isinstance(cur_value, expected_type) + else: + assert isinstance(cur_value, float) @pytest.mark.parametrize( "metric", - (MSE(mode="macro", missing_mode="ignore"),), + (MSE(mode="macro", missing_mode="ignore"), MissingCounter(mode="macro")), ) def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): """Check macro metrics behavior in case of ignoring segment of all nans in true values.""" @@ -234,15 +249,15 @@ def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): @pytest.mark.parametrize( - "metric", - (MSE(mode="macro", missing_mode="ignore"),), + "metric, expected_type", + ((MSE(mode="macro", missing_mode="ignore"), type(None)), (MissingCounter(mode="macro"), float)), ) -def test_invalid_all_nans_ignore_macro(metric, train_test_dfs): +def test_invalid_all_nans_ignore_macro(metric, expected_type, train_test_dfs): """Check macro metrics behavior in case of all nan values in true values.""" forecast_df, true_df = train_test_dfs true_df.df.iloc[:, :] = np.NaN value = metric(y_true=true_df, y_pred=forecast_df) - assert value is None + assert isinstance(value, expected_type) @pytest.mark.parametrize( @@ -260,6 +275,7 @@ def test_invalid_all_nans_ignore_macro(metric, train_test_dfs): (MaxDeviation, max_deviation), (DummyMetric, create_dummy_functional_metric()), (WAPE, wape), + (MissingCounter, count_missing_values), ), ) def test_metrics_values(metric_class, metric_fn, train_test_dfs): @@ -310,6 +326,7 @@ def init(self, mode): (sign, {"multioutput": "raw_values"}, None), (max_deviation, {"multioutput": "raw_values"}, False), (wape, {"multioutput": "raw_values"}, False), + (count_missing_values, {"multioutput": "raw_values"}, None), ), ) def test_metrics_equivalence_of_signatures(metric_fn, matrix_to_array_params, greater_is_better, train_test_dfs): @@ -332,7 +349,8 @@ def test_metrics_equivalence_of_signatures(metric_fn, matrix_to_array_params, gr @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric_class", + (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE, MissingCounter), ) def test_metric_values_with_changed_segment_order(metric_class, train_test_dfs): forecast_df, true_df = train_test_dfs @@ -366,6 +384,7 @@ def test_metric_values_with_changed_segment_order(metric_class, train_test_dfs): (MaxDeviation(), False), (DummyMetric(), False), (WAPE(), False), + (MissingCounter(), None), ), ) def test_metrics_greater_is_better(metric, greater_is_better):