From bb6e41edc7f33a7259d7ec717ceeb8cfd44f36b0 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Thu, 28 Nov 2024 17:31:30 +0300 Subject: [PATCH 1/8] feature: add task files --- etna/libs/sklearn/__init__.py | 1 + etna/libs/sklearn/metrics.py | 117 +++++++++++++++++++++++++++++ etna/metrics/functional_metrics.py | 3 +- 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 etna/libs/sklearn/__init__.py create mode 100644 etna/libs/sklearn/metrics.py diff --git a/etna/libs/sklearn/__init__.py b/etna/libs/sklearn/__init__.py new file mode 100644 index 000000000..0b6eb21db --- /dev/null +++ b/etna/libs/sklearn/__init__.py @@ -0,0 +1 @@ +from etna.libs.sklearn.metrics import mean_squared_error diff --git a/etna/libs/sklearn/metrics.py b/etna/libs/sklearn/metrics.py new file mode 100644 index 000000000..596c6f16b --- /dev/null +++ b/etna/libs/sklearn/metrics.py @@ -0,0 +1,117 @@ +""" +BSD 3-Clause License + +Copyright (c) 2007-2024 The scikit-learn developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" +# Note: Copied from scikit-learn repository (https://github.com/scikit-learn/scikit-learn/blob/1.0.2/sklearn/metrics/_regression.py#L378) + +import numpy as np + +from sklearn.utils.validation import check_consistent_length +from sklearn.metrics._regression import _check_reg_targets + + +def mean_squared_error( + y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True +): + """Mean squared error regression loss. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values. + + y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) + Estimated target values. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. + + multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ + (n_outputs,), default='uniform_average' + Defines aggregating of multiple output values. + Array-like value defines weights used to average errors. + + 'raw_values' : + Returns a full set of errors in case of multioutput input. + + 'uniform_average' : + Errors of all outputs are averaged with uniform weight. + + squared : bool, default=True + If True returns MSE value, if False returns RMSE value. + + Returns + ------- + loss : float or ndarray of floats + A non-negative floating point value (the best value is 0.0), or an + array of floating point values, one for each individual target. + + Examples + -------- + >>> from sklearn.metrics import mean_squared_error + >>> y_true = [3, -0.5, 2, 7] + >>> y_pred = [2.5, 0.0, 2, 8] + >>> mean_squared_error(y_true, y_pred) + 0.375 + >>> y_true = [3, -0.5, 2, 7] + >>> y_pred = [2.5, 0.0, 2, 8] + >>> mean_squared_error(y_true, y_pred, squared=False) + 0.612... + >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] + >>> y_pred = [[0, 2],[-1, 2],[8, -5]] + >>> mean_squared_error(y_true, y_pred) + 0.708... + >>> mean_squared_error(y_true, y_pred, squared=False) + 0.822... + >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') + array([0.41666667, 1. ]) + >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) + 0.825... + """ + y_type, y_true, y_pred, multioutput = _check_reg_targets( + y_true, y_pred, multioutput + ) + check_consistent_length(y_true, y_pred, sample_weight) + # here we changed using `np.average` -> `np.nanmean` + output_errors = np.nanmean((y_true - y_pred) ** 2, axis=0, weights=sample_weight) + + if not squared: + output_errors = np.sqrt(output_errors) + + if isinstance(multioutput, str): + if multioutput == "raw_values": + return output_errors + elif multioutput == "uniform_average": + # pass None as weights to np.average: uniform mean + multioutput = None + + return np.average(output_errors, weights=multioutput) diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index 404f1a757..fad58ed40 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -6,12 +6,13 @@ import numpy as np from sklearn.metrics import mean_absolute_error as mae -from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score from typing_extensions import assert_never +from etna.libs.sklearn import mean_squared_error as mse + ArrayLike = Union[float, Sequence[float], Sequence[Sequence[float]]] From 5767624891834890e21ad9645372d1fefefd60e8 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 29 Nov 2024 12:11:15 +0300 Subject: [PATCH 2/8] feature: rework adding new functional metric --- etna/libs/sklearn/__init__.py | 1 - etna/libs/sklearn/metrics.py | 117 ------------------ etna/metrics/functional_metrics.py | 42 ++++++- tests/test_metrics/test_functional_metrics.py | 69 +++++++++++ 4 files changed, 109 insertions(+), 120 deletions(-) delete mode 100644 etna/libs/sklearn/__init__.py delete mode 100644 etna/libs/sklearn/metrics.py diff --git a/etna/libs/sklearn/__init__.py b/etna/libs/sklearn/__init__.py deleted file mode 100644 index 0b6eb21db..000000000 --- a/etna/libs/sklearn/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from etna.libs.sklearn.metrics import mean_squared_error diff --git a/etna/libs/sklearn/metrics.py b/etna/libs/sklearn/metrics.py deleted file mode 100644 index 596c6f16b..000000000 --- a/etna/libs/sklearn/metrics.py +++ /dev/null @@ -1,117 +0,0 @@ -""" -BSD 3-Clause License - -Copyright (c) 2007-2024 The scikit-learn developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" -# Note: Copied from scikit-learn repository (https://github.com/scikit-learn/scikit-learn/blob/1.0.2/sklearn/metrics/_regression.py#L378) - -import numpy as np - -from sklearn.utils.validation import check_consistent_length -from sklearn.metrics._regression import _check_reg_targets - - -def mean_squared_error( - y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True -): - """Mean squared error regression loss. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) - Ground truth (correct) target values. - - y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) - Estimated target values. - - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. - - multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ - (n_outputs,), default='uniform_average' - Defines aggregating of multiple output values. - Array-like value defines weights used to average errors. - - 'raw_values' : - Returns a full set of errors in case of multioutput input. - - 'uniform_average' : - Errors of all outputs are averaged with uniform weight. - - squared : bool, default=True - If True returns MSE value, if False returns RMSE value. - - Returns - ------- - loss : float or ndarray of floats - A non-negative floating point value (the best value is 0.0), or an - array of floating point values, one for each individual target. - - Examples - -------- - >>> from sklearn.metrics import mean_squared_error - >>> y_true = [3, -0.5, 2, 7] - >>> y_pred = [2.5, 0.0, 2, 8] - >>> mean_squared_error(y_true, y_pred) - 0.375 - >>> y_true = [3, -0.5, 2, 7] - >>> y_pred = [2.5, 0.0, 2, 8] - >>> mean_squared_error(y_true, y_pred, squared=False) - 0.612... - >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] - >>> y_pred = [[0, 2],[-1, 2],[8, -5]] - >>> mean_squared_error(y_true, y_pred) - 0.708... - >>> mean_squared_error(y_true, y_pred, squared=False) - 0.822... - >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') - array([0.41666667, 1. ]) - >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) - 0.825... - """ - y_type, y_true, y_pred, multioutput = _check_reg_targets( - y_true, y_pred, multioutput - ) - check_consistent_length(y_true, y_pred, sample_weight) - # here we changed using `np.average` -> `np.nanmean` - output_errors = np.nanmean((y_true - y_pred) ** 2, axis=0, weights=sample_weight) - - if not squared: - output_errors = np.sqrt(output_errors) - - if isinstance(multioutput, str): - if multioutput == "raw_values": - return output_errors - elif multioutput == "uniform_average": - # pass None as weights to np.average: uniform mean - multioutput = None - - return np.average(output_errors, weights=multioutput) diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index fad58ed40..b0f0ae58b 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -6,13 +6,12 @@ import numpy as np from sklearn.metrics import mean_absolute_error as mae +from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score from typing_extensions import assert_never -from etna.libs.sklearn import mean_squared_error as mse - ArrayLike = Union[float, Sequence[float], Sequence[Sequence[float]]] @@ -42,6 +41,45 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]: assert_never(multioutput_enum) +def mse_with_missing_handling(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: + """Mean squared error with missing values handling. + + `Wikipedia entry on the Mean squared error + `_ + + The nans are ignored during computation. + + Parameters + ---------- + y_true: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Ground truth (correct) target values. + + y_pred: + array-like of shape (n_samples,) or (n_samples, n_outputs) + + Estimated target values. + + multioutput: + Defines aggregating of multiple output values + (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`). + + Returns + ------- + : + A non-negative floating point value (the best value is 0.0), or an array of floating point values, + one for each individual target. + """ + y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred) + + if len(y_true_array.shape) != len(y_pred_array.shape): + raise ValueError("Shapes of the labels must be the same") + + axis = _get_axis_by_multioutput(multioutput) + return np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis) + + def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: """Mean absolute percentage error. diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index f9198cf11..5657983b9 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -1,3 +1,4 @@ +import numpy as np import numpy.testing as npt import pytest @@ -12,6 +13,7 @@ from etna.metrics import sign from etna.metrics import smape from etna.metrics import wape +from etna.metrics.functional_metrics import mse_with_missing_handling @pytest.fixture() @@ -34,6 +36,7 @@ def y_pred_1d(): ( (mae, 1), (mse, 1), + (mse_with_missing_handling, 1), (rmse, 1), (mape, 66 + 2 / 3), (smape, 47.6190476), @@ -58,6 +61,7 @@ def test_mle_metric_exception(y_true_1d, y_pred_1d): @pytest.mark.parametrize( "metric", ( + mse_with_missing_handling, mape, smape, sign, @@ -85,6 +89,7 @@ def y_pred_2d(): ( (mae, 1), (mse, 1), + (mse_with_missing_handling, 1), (rmse, 1), (mape, 42 + 3 / 11), (smape, 38.0952380), @@ -104,6 +109,7 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d) ( (mae, {"multioutput": "raw_values"}, [1, 1]), (mse, {"multioutput": "raw_values"}, [1, 1]), + (mse_with_missing_handling, {"multioutput": "raw_values"}, [1, 1]), (rmse, {"multioutput": "raw_values"}, [1, 1]), (mape, {"multioutput": "raw_values"}, [9.5454545, 75]), (smape, {"multioutput": "raw_values"}, [9.5238095, 66 + 2 / 3]), @@ -116,3 +122,66 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d) ) def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2d, y_pred_2d): npt.assert_almost_equal(metric(y_true_2d, y_pred_2d, **params), right_metrics_value) + + +@pytest.mark.filterwarnings("ignore: Mean of empty slice") +@pytest.mark.parametrize( + "y_true, y_pred, multioutput, expected", + [ + # 1d + (np.array([1.0]), np.array([1.0]), "joint", 0.0), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.0), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.5), + (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5), + (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 4.0), + (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN), + # 2d + (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 2.5), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 4.0, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + 2.5, + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "joint", + np.NaN, + ), + ( + np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, + "raw_values", + np.array([2.0, 3.0]), + ), + ( + np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([4.0, 4.0]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, 2.5]), + ), + ( + np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T, + np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T, + "raw_values", + np.array([np.NaN, np.NaN]), + ), + ], +) +def test_values_ok(y_true, y_pred, multioutput, expected): + result = mse_with_missing_handling(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + npt.assert_allclose(result, expected) From 157d361c1fc64e97f7c7dd9c9ce15dc96fe376ef Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 4 Dec 2024 14:06:39 +0300 Subject: [PATCH 3/8] feature: rework MSE to handle_missing, rework tests --- etna/metrics/base.py | 13 +- etna/metrics/functional_metrics.py | 10 +- etna/metrics/intervals_metrics.py | 24 ++- etna/metrics/metrics.py | 140 +++++++++++++----- tests/test_metrics/test_functional_metrics.py | 1 - tests/test_metrics/test_metrics.py | 112 +++++++++++--- tests/utils.py | 2 +- 7 files changed, 231 insertions(+), 71 deletions(-) diff --git a/etna/metrics/base.py b/etna/metrics/base.py index 0dfc0304e..45c17cbd3 100644 --- a/etna/metrics/base.py +++ b/etna/metrics/base.py @@ -128,7 +128,7 @@ class Metric(AbstractMetric, BaseMixin): def __init__( self, metric_fn: MetricFunction, - mode: str = MetricAggregationMode.per_segment, + mode: str = MetricAggregationMode.per_segment.value, metric_fn_signature: str = "array_to_scalar", **kwargs, ): @@ -146,6 +146,8 @@ def __init__( * if "per-segment" -- does not aggregate metrics + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + metric_fn_signature: type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`) kwargs: @@ -385,7 +387,7 @@ class MetricWithMissingHandling(Metric): def __init__( self, metric_fn: MetricFunction, - mode: str = MetricAggregationMode.per_segment, + mode: str = MetricAggregationMode.per_segment.value, metric_fn_signature: str = "array_to_scalar", missing_mode: str = "error", **kwargs, @@ -404,6 +406,8 @@ def __init__( * if "per-segment" -- does not aggregate metrics + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + metric_fn_signature: type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`) missing_mode: @@ -421,7 +425,8 @@ def __init__( If non-existent ``missing_mode`` is used. """ super().__init__(metric_fn=metric_fn, mode=mode, metric_fn_signature=metric_fn_signature, **kwargs) - self.missing_mode = MetricMissingMode(missing_mode) + self.missing_mode = missing_mode + self._missing_mode_enum = MetricMissingMode(missing_mode) def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset): """Check that ``y_true`` and ``y_pred`` doesn't have NaNs depending on ``missing_mode``. @@ -442,7 +447,7 @@ def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset): df_pred = y_pred.df.loc[:, pd.IndexSlice[:, "target"]] df_true_isna_sum = df_true.isna().sum() - if self.missing_mode is MetricMissingMode.error and (df_true_isna_sum > 0).any(): + if self._missing_mode_enum is MetricMissingMode.error and (df_true_isna_sum > 0).any(): error_segments = set(df_true_isna_sum[df_true_isna_sum > 0].index.droplevel("feature").tolist()) raise ValueError(f"There are NaNs in y_true! Segments with NaNs: {reprlib.repr(error_segments)}.") diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index b0f0ae58b..b517231e9 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -1,3 +1,4 @@ +import warnings from enum import Enum from functools import partial from typing import Optional @@ -77,7 +78,14 @@ def mse_with_missing_handling(y_true: ArrayLike, y_pred: ArrayLike, multioutput: raise ValueError("Shapes of the labels must be the same") axis = _get_axis_by_multioutput(multioutput) - return np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis) + with warnings.catch_warnings(): + # this helps to prevent warning in case of all nans + warnings.filterwarnings( + message="Mean of empty slice", + action="ignore", + ) + result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis) + return result def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py index 6e70525af..8e1847489 100644 --- a/etna/metrics/intervals_metrics.py +++ b/etna/metrics/intervals_metrics.py @@ -56,7 +56,7 @@ class Coverage(Metric, _IntervalsMetricMixin): def __init__( self, quantiles: Optional[Tuple[float, float]] = None, - mode: str = MetricAggregationMode.per_segment, + mode: str = MetricAggregationMode.per_segment.value, upper_name: Optional[str] = None, lower_name: Optional[str] = None, **kwargs, @@ -67,8 +67,14 @@ def __init__( ---------- quantiles: lower and upper quantiles - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. upper_name: name of column with upper border of the interval lower_name: @@ -169,7 +175,7 @@ class Width(Metric, _IntervalsMetricMixin): def __init__( self, quantiles: Optional[Tuple[float, float]] = None, - mode: str = MetricAggregationMode.per_segment, + mode: str = MetricAggregationMode.per_segment.value, upper_name: Optional[str] = None, lower_name: Optional[str] = None, **kwargs, @@ -180,8 +186,14 @@ def __init__( ---------- quantiles: lower and upper quantiles - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. upper_name: name of column with upper border of the interval lower_name: diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index d5388d7ce..c6e0da774 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -2,11 +2,12 @@ from etna.metrics.base import Metric from etna.metrics.base import MetricAggregationMode +from etna.metrics.base import MetricWithMissingHandling from etna.metrics.functional_metrics import mae from etna.metrics.functional_metrics import mape from etna.metrics.functional_metrics import max_deviation from etna.metrics.functional_metrics import medae -from etna.metrics.functional_metrics import mse +from etna.metrics.functional_metrics import mse_with_missing_handling from etna.metrics.functional_metrics import msle from etna.metrics.functional_metrics import r2_score from etna.metrics.functional_metrics import rmse @@ -26,7 +27,7 @@ class MAE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters @@ -45,7 +46,7 @@ def greater_is_better(self) -> bool: return False -class MSE(Metric): +class MSE(MetricWithMissingHandling): """Mean squared error metric with multi-segment computation support. .. math:: @@ -56,18 +57,33 @@ class MSE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, missing_mode: str = "error", **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. + + missing_mode: + mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`) kwargs: metric's computation arguments """ - mse_per_output = partial(mse, multioutput="raw_values") - super().__init__(mode=mode, metric_fn=mse_per_output, metric_fn_signature="matrix_to_array", **kwargs) + mse_per_output = partial(mse_with_missing_handling, multioutput="raw_values") + super().__init__( + mode=mode, + metric_fn=mse_per_output, + missing_mode=missing_mode, + metric_fn_signature="matrix_to_array", + **kwargs, + ) @property def greater_is_better(self) -> bool: @@ -86,13 +102,19 @@ class RMSE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -115,13 +137,19 @@ class R2(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -145,13 +173,19 @@ class MAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -175,13 +209,19 @@ class SMAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -205,13 +245,19 @@ class MedAE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -235,13 +281,19 @@ class MSLE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments @@ -266,13 +318,19 @@ class Sign(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -296,13 +354,19 @@ class MaxDeviation(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ @@ -325,13 +389,19 @@ class WAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): """Init metric. Parameters ---------- - mode: 'macro' or 'per-segment' - metrics aggregation mode + mode: + "macro" or "per-segment", way to aggregate metric values over segments: + + * if "macro" computes average value + + * if "per-segment" -- does not aggregate metrics + + See :py:class:`~etna.metrics.base.MetricAggregationMode`. kwargs: metric's computation arguments """ diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index 5657983b9..0158bdf96 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -124,7 +124,6 @@ def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2 npt.assert_almost_equal(metric(y_true_2d, y_pred_2d, **params), right_metrics_value) -@pytest.mark.filterwarnings("ignore: Mean of empty slice") @pytest.mark.parametrize( "y_true, y_pred, multioutput, expected", [ diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index 1d02d5b98..58074ead9 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -35,31 +35,28 @@ @pytest.mark.parametrize( - "metric_class, metric_class_repr, metric_params, param_repr", + "metric, expected_repr", ( - (MAE, "MAE", {}, ""), - (MSE, "MSE", {}, ""), - (RMSE, "RMSE", {}, ""), - (MedAE, "MedAE", {}, ""), - (MSLE, "MSLE", {}, ""), - (MAPE, "MAPE", {}, ""), - (SMAPE, "SMAPE", {}, ""), - (R2, "R2", {}, ""), - (Sign, "Sign", {}, ""), - (MaxDeviation, "MaxDeviation", {}, ""), - (DummyMetric, "DummyMetric", {"alpha": 1.0}, "alpha = 1.0, "), - (WAPE, "WAPE", {}, ""), + (MAE(), "MAE(mode = 'per-segment', )"), + (MAE(mode="macro"), "MAE(mode = 'macro', )"), + (MSE(), "MSE(mode = 'per-segment', missing_mode = 'error', )"), + (MSE(missing_mode="ignore"), "MSE(mode = 'per-segment', missing_mode = 'ignore', )"), + (RMSE(), "RMSE(mode = 'per-segment', )"), + (MedAE(), "MedAE(mode = 'per-segment', )"), + (MSLE(), "MSLE(mode = 'per-segment', )"), + (MAPE(), "MAPE(mode = 'per-segment', )"), + (SMAPE(), "SMAPE(mode = 'per-segment', )"), + (R2(), "R2(mode = 'per-segment', )"), + (Sign(), "Sign(mode = 'per-segment', )"), + (MaxDeviation(), "MaxDeviation(mode = 'per-segment', )"), + (DummyMetric(), "DummyMetric(mode = 'per-segment', alpha = 1.0, )"), + (WAPE(), "WAPE(mode = 'per-segment', )"), ), ) -def test_repr(metric_class, metric_class_repr, metric_params, param_repr): +def test_repr(metric, expected_repr): """Check metrics __repr__ method""" - metric_mode = "per-segment" - kwargs = {**metric_params, "kwarg_1": "value_1", "kwarg_2": "value_2"} - kwargs_repr = param_repr + "kwarg_1 = 'value_1', kwarg_2 = 'value_2'" - metric = metric_class(mode=metric_mode, **kwargs) metric_repr = metric.__repr__() - true_repr = f"{metric_class_repr}(mode = '{metric_mode}', {kwargs_repr}, )" - assert metric_repr == true_repr + assert metric_repr == expected_repr @pytest.mark.parametrize( @@ -168,17 +165,86 @@ def test_invalid_nans_pred(metric_class, train_test_dfs): @pytest.mark.parametrize( - "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE) + "metric", + ( + MAE(), + MSE(missing_mode="error"), + RMSE(), + MedAE(), + MSLE(), + MAPE(), + SMAPE(), + R2(), + Sign(), + MaxDeviation(), + DummyMetric(), + WAPE(), + ), ) -def test_invalid_nans_true(metric_class, train_test_dfs): +def test_invalid_nans_true(metric, train_test_dfs): """Check metrics behavior in case of nans in true values.""" forecast_df, true_df = train_test_dfs true_df.df.iloc[0, 0] = np.NaN - metric = metric_class() with pytest.raises(ValueError, match="There are NaNs in y_true"): _ = metric(y_true=true_df, y_pred=forecast_df) +@pytest.mark.parametrize( + "metric", + (MSE(missing_mode="ignore"),), +) +def test_invalid_single_nan_ignore(metric, train_test_dfs): + """Check metrics behavior in case of ignoring one nan in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[0, 0] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, dict) + segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist()) + assert value.keys() == segments + assert all(isinstance(cur_value, float) for cur_value in value.values()) + + +@pytest.mark.parametrize( + "metric", + (MSE(mode="per-segment", missing_mode="ignore"),), +) +def test_invalid_segment_nans_ignore_per_segment(metric, train_test_dfs): + """Check per-segment metrics behavior in case of ignoring segment of all nans in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[:, 0] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, dict) + segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist()) + assert value.keys() == segments + empty_segment = true_df.df.columns.get_level_values("segment").unique()[0] + assert all(isinstance(cur_value, float) for cur_segment, cur_value in value.items() if cur_segment != empty_segment) + assert value[empty_segment] is None + + +@pytest.mark.parametrize( + "metric", + (MSE(mode="macro", missing_mode="ignore"),), +) +def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs): + """Check macro metrics behavior in case of ignoring segment of all nans in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[:, 0] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert isinstance(value, float) + + +@pytest.mark.parametrize( + "metric", + (MSE(mode="macro", missing_mode="ignore"),), +) +def test_invalid_all_nans_ignore_macro(metric, train_test_dfs): + """Check macro metrics behavior in case of all nan values in true values.""" + forecast_df, true_df = train_test_dfs + true_df.df.iloc[:, :] = np.NaN + value = metric(y_true=true_df, y_pred=forecast_df) + assert value is None + + @pytest.mark.parametrize( "metric_class, metric_fn", ( diff --git a/tests/utils.py b/tests/utils.py index e9c6f0d5e..85fa5b06c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -92,7 +92,7 @@ class DummyMetric(Metric): We change the name property here. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment, alpha: float = 1.0, **kwargs): + def __init__(self, mode: str = MetricAggregationMode.per_segment.value, alpha: float = 1.0, **kwargs): self.alpha = alpha super().__init__(mode=mode, metric_fn=create_dummy_functional_metric(alpha), **kwargs) From 22aa421795043a07ab476de99d6c3661e8805f8f Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 4 Dec 2024 16:03:45 +0300 Subject: [PATCH 4/8] fix: update test on compute_metrics --- tests/test_metrics/test_metrics_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_metrics/test_metrics_utils.py b/tests/test_metrics/test_metrics_utils.py index d8123081e..8872ad7af 100644 --- a/tests/test_metrics/test_metrics_utils.py +++ b/tests/test_metrics/test_metrics_utils.py @@ -16,7 +16,7 @@ def test_compute_metrics(train_test_dfs: Tuple[TSDataset, TSDataset]): expected_keys = [ "MAE(mode = 'per-segment', )", "MAE(mode = 'macro', )", - "MSE(mode = 'per-segment', )", + "MSE(mode = 'per-segment', missing_mode = 'error', )", "MAPE(mode = 'macro', eps = 1e-05, )", ] result = compute_metrics(metrics=metrics, y_true=true_df, y_pred=forecast_df) From d21df7b0b999494f03eae366a91600eb67fe2532 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 6 Dec 2024 10:52:53 +0300 Subject: [PATCH 5/8] fix: remove sklearn mse and replace it our own mse --- etna/metrics/__init__.py | 1 - etna/metrics/functional_metrics.py | 6 +++--- etna/metrics/metrics.py | 4 ++-- tests/test_metrics/test_functional_metrics.py | 8 ++------ 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py index 24f2e679a..38235e2fd 100644 --- a/etna/metrics/__init__.py +++ b/etna/metrics/__init__.py @@ -1,7 +1,6 @@ """Module with metrics of forecasting quality.""" from sklearn.metrics import mean_absolute_error as mae -from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index b517231e9..036e4ae70 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -7,7 +7,7 @@ import numpy as np from sklearn.metrics import mean_absolute_error as mae -from sklearn.metrics import mean_squared_error as mse +from sklearn.metrics import mean_squared_error as mse_sklearn from sklearn.metrics import mean_squared_log_error as msle from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score @@ -42,7 +42,7 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]: assert_never(multioutput_enum) -def mse_with_missing_handling(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: +def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: """Mean squared error with missing values handling. `Wikipedia entry on the Mean squared error @@ -253,7 +253,7 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join return np.max(np.abs(prefix_error_sum), axis=axis) -rmse = partial(mse, squared=False) +rmse = partial(mse_sklearn, squared=False) def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index c6e0da774..24db102b6 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -7,7 +7,7 @@ from etna.metrics.functional_metrics import mape from etna.metrics.functional_metrics import max_deviation from etna.metrics.functional_metrics import medae -from etna.metrics.functional_metrics import mse_with_missing_handling +from etna.metrics.functional_metrics import mse from etna.metrics.functional_metrics import msle from etna.metrics.functional_metrics import r2_score from etna.metrics.functional_metrics import rmse @@ -76,7 +76,7 @@ def __init__(self, mode: str = MetricAggregationMode.per_segment.value, missing_ kwargs: metric's computation arguments """ - mse_per_output = partial(mse_with_missing_handling, multioutput="raw_values") + mse_per_output = partial(mse, multioutput="raw_values") super().__init__( mode=mode, metric_fn=mse_per_output, diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py index 0158bdf96..ba0e2436c 100644 --- a/tests/test_metrics/test_functional_metrics.py +++ b/tests/test_metrics/test_functional_metrics.py @@ -13,7 +13,6 @@ from etna.metrics import sign from etna.metrics import smape from etna.metrics import wape -from etna.metrics.functional_metrics import mse_with_missing_handling @pytest.fixture() @@ -36,7 +35,6 @@ def y_pred_1d(): ( (mae, 1), (mse, 1), - (mse_with_missing_handling, 1), (rmse, 1), (mape, 66 + 2 / 3), (smape, 47.6190476), @@ -61,7 +59,7 @@ def test_mle_metric_exception(y_true_1d, y_pred_1d): @pytest.mark.parametrize( "metric", ( - mse_with_missing_handling, + mse, mape, smape, sign, @@ -89,7 +87,6 @@ def y_pred_2d(): ( (mae, 1), (mse, 1), - (mse_with_missing_handling, 1), (rmse, 1), (mape, 42 + 3 / 11), (smape, 38.0952380), @@ -109,7 +106,6 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d) ( (mae, {"multioutput": "raw_values"}, [1, 1]), (mse, {"multioutput": "raw_values"}, [1, 1]), - (mse_with_missing_handling, {"multioutput": "raw_values"}, [1, 1]), (rmse, {"multioutput": "raw_values"}, [1, 1]), (mape, {"multioutput": "raw_values"}, [9.5454545, 75]), (smape, {"multioutput": "raw_values"}, [9.5238095, 66 + 2 / 3]), @@ -182,5 +178,5 @@ def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2 ], ) def test_values_ok(y_true, y_pred, multioutput, expected): - result = mse_with_missing_handling(y_true=y_true, y_pred=y_pred, multioutput=multioutput) + result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput) npt.assert_allclose(result, expected) From 6ca83b217917694871510e7b5c803a3170a50a91 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 6 Dec 2024 10:55:55 +0300 Subject: [PATCH 6/8] fix: change default value of metric mode to string per-segment --- etna/metrics/base.py | 4 ++-- etna/metrics/intervals_metrics.py | 5 ++--- etna/metrics/metrics.py | 23 +++++++++++------------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/etna/metrics/base.py b/etna/metrics/base.py index 45c17cbd3..32373a145 100644 --- a/etna/metrics/base.py +++ b/etna/metrics/base.py @@ -128,7 +128,7 @@ class Metric(AbstractMetric, BaseMixin): def __init__( self, metric_fn: MetricFunction, - mode: str = MetricAggregationMode.per_segment.value, + mode: str = "per-segment", metric_fn_signature: str = "array_to_scalar", **kwargs, ): @@ -387,7 +387,7 @@ class MetricWithMissingHandling(Metric): def __init__( self, metric_fn: MetricFunction, - mode: str = MetricAggregationMode.per_segment.value, + mode: str = "per-segment", metric_fn_signature: str = "array_to_scalar", missing_mode: str = "error", **kwargs, diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py index 8e1847489..10284460e 100644 --- a/etna/metrics/intervals_metrics.py +++ b/etna/metrics/intervals_metrics.py @@ -9,7 +9,6 @@ from etna.datasets import TSDataset from etna.metrics.base import Metric -from etna.metrics.base import MetricAggregationMode from etna.metrics.functional_metrics import ArrayLike @@ -56,7 +55,7 @@ class Coverage(Metric, _IntervalsMetricMixin): def __init__( self, quantiles: Optional[Tuple[float, float]] = None, - mode: str = MetricAggregationMode.per_segment.value, + mode: str = "per-segment", upper_name: Optional[str] = None, lower_name: Optional[str] = None, **kwargs, @@ -175,7 +174,7 @@ class Width(Metric, _IntervalsMetricMixin): def __init__( self, quantiles: Optional[Tuple[float, float]] = None, - mode: str = MetricAggregationMode.per_segment.value, + mode: str = "per-segment", upper_name: Optional[str] = None, lower_name: Optional[str] = None, **kwargs, diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index 24db102b6..fe894bfdd 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -1,7 +1,6 @@ from functools import partial from etna.metrics.base import Metric -from etna.metrics.base import MetricAggregationMode from etna.metrics.base import MetricWithMissingHandling from etna.metrics.functional_metrics import mae from etna.metrics.functional_metrics import mape @@ -27,7 +26,7 @@ class MAE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -57,7 +56,7 @@ class MSE(MetricWithMissingHandling): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, missing_mode: str = "error", **kwargs): + def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs): """Init metric. Parameters @@ -102,7 +101,7 @@ class RMSE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -137,7 +136,7 @@ class R2(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -173,7 +172,7 @@ class MAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -209,7 +208,7 @@ class SMAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -245,7 +244,7 @@ class MedAE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -281,7 +280,7 @@ class MSLE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -318,7 +317,7 @@ class Sign(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -354,7 +353,7 @@ class MaxDeviation(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters @@ -389,7 +388,7 @@ class WAPE(Metric): You can read more about logic of multi-segment metrics in Metric docs. """ - def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs): + def __init__(self, mode: str = "per-segment", **kwargs): """Init metric. Parameters From 878313ee0c4013c4f641104d38e71aef4f40f383 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 6 Dec 2024 11:08:11 +0300 Subject: [PATCH 7/8] docs: update docs for metrics --- etna/metrics/functional_metrics.py | 24 +++++++++++++----------- etna/metrics/metrics.py | 4 ++++ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py index 036e4ae70..7feb319ad 100644 --- a/etna/metrics/functional_metrics.py +++ b/etna/metrics/functional_metrics.py @@ -45,10 +45,10 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]: def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: """Mean squared error with missing values handling. - `Wikipedia entry on the Mean squared error - `_ + .. math:: + MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n} - The nans are ignored during computation. + The nans are ignored during computation. If all values are nans, the result is NaN. Parameters ---------- @@ -91,8 +91,10 @@ def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: """Mean absolute percentage error. - `Wikipedia entry on the Mean absolute percentage error - `_ + .. math:: + MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon} + + `Scale-dependent errors `_ Parameters ---------- @@ -135,11 +137,8 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike: """Symmetric mean absolute percentage error. - `Wikipedia entry on the Symmetric mean absolute percentage error - `_ - .. math:: - SMAPE = \dfrac{100}{n}\sum_{t=1}^{n}\dfrac{|ytrue_{t}-ypred_{t}|}{(|ypred_{t}|+|ytrue_{t}|) / 2} + SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid} Parameters ---------- @@ -183,7 +182,7 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar """Sign error metric. .. math:: - Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=0}^{n - 1}{sign(y\_true_i - y\_pred_i)} + Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)} Parameters ---------- @@ -220,6 +219,9 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike: """Max Deviation metric. + .. math:: + MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i} + Parameters ---------- y_true: @@ -260,7 +262,7 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar """Weighted average percentage Error metric. .. math:: - WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=0}^{n}|y\\_true_i|} + WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|} Parameters ---------- diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py index fe894bfdd..67088a811 100644 --- a/etna/metrics/metrics.py +++ b/etna/metrics/metrics.py @@ -51,6 +51,9 @@ class MSE(MetricWithMissingHandling): .. math:: MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n} + This metric can handle missing values with parameter ``missing_mode``. + If there are too many of them in ``ignore`` mode, the result will be ``None``. + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. @@ -383,6 +386,7 @@ class WAPE(Metric): .. math:: WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|} + Notes ----- You can read more about logic of multi-segment metrics in Metric docs. From e701e4972a7ac5aa38f47d99ca351eb4504e5a48 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Fri, 6 Dec 2024 11:09:36 +0300 Subject: [PATCH 8/8] chore: update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e44d0ff32..dc8f73cc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#474](https://github.com/etna-team/etna/pull/474)) -- +- Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515)) - - -