From bb6e41edc7f33a7259d7ec717ceeb8cfd44f36b0 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Thu, 28 Nov 2024 17:31:30 +0300
Subject: [PATCH 1/8] feature: add task files

---
 etna/libs/sklearn/__init__.py      |   1 +
 etna/libs/sklearn/metrics.py       | 117 +++++++++++++++++++++++++++++
 etna/metrics/functional_metrics.py |   3 +-
 3 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 etna/libs/sklearn/__init__.py
 create mode 100644 etna/libs/sklearn/metrics.py

diff --git a/etna/libs/sklearn/__init__.py b/etna/libs/sklearn/__init__.py
new file mode 100644
index 000000000..0b6eb21db
--- /dev/null
+++ b/etna/libs/sklearn/__init__.py
@@ -0,0 +1 @@
+from etna.libs.sklearn.metrics import mean_squared_error
diff --git a/etna/libs/sklearn/metrics.py b/etna/libs/sklearn/metrics.py
new file mode 100644
index 000000000..596c6f16b
--- /dev/null
+++ b/etna/libs/sklearn/metrics.py
@@ -0,0 +1,117 @@
+"""
+BSD 3-Clause License
+
+Copyright (c) 2007-2024 The scikit-learn developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+# Note: Copied from scikit-learn repository (https://github.com/scikit-learn/scikit-learn/blob/1.0.2/sklearn/metrics/_regression.py#L378)
+
+import numpy as np
+
+from sklearn.utils.validation import check_consistent_length
+from sklearn.metrics._regression import _check_reg_targets
+
+
+def mean_squared_error(
+    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
+):
+    """Mean squared error regression loss.
+
+    Read more in the :ref:`User Guide <mean_squared_error>`.
+
+    Parameters
+    ----------
+    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
+        Ground truth (correct) target values.
+
+    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
+        Estimated target values.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+
+    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
+            (n_outputs,), default='uniform_average'
+        Defines aggregating of multiple output values.
+        Array-like value defines weights used to average errors.
+
+        'raw_values' :
+            Returns a full set of errors in case of multioutput input.
+
+        'uniform_average' :
+            Errors of all outputs are averaged with uniform weight.
+
+    squared : bool, default=True
+        If True returns MSE value, if False returns RMSE value.
+
+    Returns
+    -------
+    loss : float or ndarray of floats
+        A non-negative floating point value (the best value is 0.0), or an
+        array of floating point values, one for each individual target.
+
+    Examples
+    --------
+    >>> from sklearn.metrics import mean_squared_error
+    >>> y_true = [3, -0.5, 2, 7]
+    >>> y_pred = [2.5, 0.0, 2, 8]
+    >>> mean_squared_error(y_true, y_pred)
+    0.375
+    >>> y_true = [3, -0.5, 2, 7]
+    >>> y_pred = [2.5, 0.0, 2, 8]
+    >>> mean_squared_error(y_true, y_pred, squared=False)
+    0.612...
+    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
+    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
+    >>> mean_squared_error(y_true, y_pred)
+    0.708...
+    >>> mean_squared_error(y_true, y_pred, squared=False)
+    0.822...
+    >>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
+    array([0.41666667, 1.        ])
+    >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
+    0.825...
+    """
+    y_type, y_true, y_pred, multioutput = _check_reg_targets(
+        y_true, y_pred, multioutput
+    )
+    check_consistent_length(y_true, y_pred, sample_weight)
+    # here we changed using `np.average` -> `np.nanmean`
+    output_errors = np.nanmean((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
+
+    if not squared:
+        output_errors = np.sqrt(output_errors)
+
+    if isinstance(multioutput, str):
+        if multioutput == "raw_values":
+            return output_errors
+        elif multioutput == "uniform_average":
+            # pass None as weights to np.average: uniform mean
+            multioutput = None
+
+    return np.average(output_errors, weights=multioutput)
diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 404f1a757..fad58ed40 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -6,12 +6,13 @@
 
 import numpy as np
 from sklearn.metrics import mean_absolute_error as mae
-from sklearn.metrics import mean_squared_error as mse
 from sklearn.metrics import mean_squared_log_error as msle
 from sklearn.metrics import median_absolute_error as medae
 from sklearn.metrics import r2_score
 from typing_extensions import assert_never
 
+from etna.libs.sklearn import mean_squared_error as mse
+
 ArrayLike = Union[float, Sequence[float], Sequence[Sequence[float]]]
 
 

From 5767624891834890e21ad9645372d1fefefd60e8 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Fri, 29 Nov 2024 12:11:15 +0300
Subject: [PATCH 2/8] feature: rework adding new functional metric

---
 etna/libs/sklearn/__init__.py                 |   1 -
 etna/libs/sklearn/metrics.py                  | 117 ------------------
 etna/metrics/functional_metrics.py            |  42 ++++++-
 tests/test_metrics/test_functional_metrics.py |  69 +++++++++++
 4 files changed, 109 insertions(+), 120 deletions(-)
 delete mode 100644 etna/libs/sklearn/__init__.py
 delete mode 100644 etna/libs/sklearn/metrics.py

diff --git a/etna/libs/sklearn/__init__.py b/etna/libs/sklearn/__init__.py
deleted file mode 100644
index 0b6eb21db..000000000
--- a/etna/libs/sklearn/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from etna.libs.sklearn.metrics import mean_squared_error
diff --git a/etna/libs/sklearn/metrics.py b/etna/libs/sklearn/metrics.py
deleted file mode 100644
index 596c6f16b..000000000
--- a/etna/libs/sklearn/metrics.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""
-BSD 3-Clause License
-
-Copyright (c) 2007-2024 The scikit-learn developers.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""
-# Note: Copied from scikit-learn repository (https://github.com/scikit-learn/scikit-learn/blob/1.0.2/sklearn/metrics/_regression.py#L378)
-
-import numpy as np
-
-from sklearn.utils.validation import check_consistent_length
-from sklearn.metrics._regression import _check_reg_targets
-
-
-def mean_squared_error(
-    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
-):
-    """Mean squared error regression loss.
-
-    Read more in the :ref:`User Guide <mean_squared_error>`.
-
-    Parameters
-    ----------
-    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
-        Ground truth (correct) target values.
-
-    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
-        Estimated target values.
-
-    sample_weight : array-like of shape (n_samples,), default=None
-        Sample weights.
-
-    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
-            (n_outputs,), default='uniform_average'
-        Defines aggregating of multiple output values.
-        Array-like value defines weights used to average errors.
-
-        'raw_values' :
-            Returns a full set of errors in case of multioutput input.
-
-        'uniform_average' :
-            Errors of all outputs are averaged with uniform weight.
-
-    squared : bool, default=True
-        If True returns MSE value, if False returns RMSE value.
-
-    Returns
-    -------
-    loss : float or ndarray of floats
-        A non-negative floating point value (the best value is 0.0), or an
-        array of floating point values, one for each individual target.
-
-    Examples
-    --------
-    >>> from sklearn.metrics import mean_squared_error
-    >>> y_true = [3, -0.5, 2, 7]
-    >>> y_pred = [2.5, 0.0, 2, 8]
-    >>> mean_squared_error(y_true, y_pred)
-    0.375
-    >>> y_true = [3, -0.5, 2, 7]
-    >>> y_pred = [2.5, 0.0, 2, 8]
-    >>> mean_squared_error(y_true, y_pred, squared=False)
-    0.612...
-    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
-    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
-    >>> mean_squared_error(y_true, y_pred)
-    0.708...
-    >>> mean_squared_error(y_true, y_pred, squared=False)
-    0.822...
-    >>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
-    array([0.41666667, 1.        ])
-    >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
-    0.825...
-    """
-    y_type, y_true, y_pred, multioutput = _check_reg_targets(
-        y_true, y_pred, multioutput
-    )
-    check_consistent_length(y_true, y_pred, sample_weight)
-    # here we changed using `np.average` -> `np.nanmean`
-    output_errors = np.nanmean((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
-
-    if not squared:
-        output_errors = np.sqrt(output_errors)
-
-    if isinstance(multioutput, str):
-        if multioutput == "raw_values":
-            return output_errors
-        elif multioutput == "uniform_average":
-            # pass None as weights to np.average: uniform mean
-            multioutput = None
-
-    return np.average(output_errors, weights=multioutput)
diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index fad58ed40..b0f0ae58b 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -6,13 +6,12 @@
 
 import numpy as np
 from sklearn.metrics import mean_absolute_error as mae
+from sklearn.metrics import mean_squared_error as mse
 from sklearn.metrics import mean_squared_log_error as msle
 from sklearn.metrics import median_absolute_error as medae
 from sklearn.metrics import r2_score
 from typing_extensions import assert_never
 
-from etna.libs.sklearn import mean_squared_error as mse
-
 ArrayLike = Union[float, Sequence[float], Sequence[Sequence[float]]]
 
 
@@ -42,6 +41,45 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]:
         assert_never(multioutput_enum)
 
 
+def mse_with_missing_handling(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+    """Mean squared error with missing values handling.
+
+    `Wikipedia entry on the Mean squared error
+    <https://en.wikipedia.org/wiki/Mean_squared_error>`_
+
+    The nans are ignored during computation.
+
+    Parameters
+    ----------
+    y_true:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Ground truth (correct) target values.
+
+    y_pred:
+        array-like of shape (n_samples,) or (n_samples, n_outputs)
+
+        Estimated target values.
+
+    multioutput:
+        Defines aggregating of multiple output values
+        (see :py:class:`~etna.metrics.functional_metrics.FunctionalMetricMultioutput`).
+
+    Returns
+    -------
+    :
+        A non-negative floating point value (the best value is 0.0), or an array of floating point values,
+        one for each individual target.
+    """
+    y_true_array, y_pred_array = np.asarray(y_true), np.asarray(y_pred)
+
+    if len(y_true_array.shape) != len(y_pred_array.shape):
+        raise ValueError("Shapes of the labels must be the same")
+
+    axis = _get_axis_by_multioutput(multioutput)
+    return np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
+
+
 def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
     """Mean absolute percentage error.
 
diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index f9198cf11..5657983b9 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -1,3 +1,4 @@
+import numpy as np
 import numpy.testing as npt
 import pytest
 
@@ -12,6 +13,7 @@
 from etna.metrics import sign
 from etna.metrics import smape
 from etna.metrics import wape
+from etna.metrics.functional_metrics import mse_with_missing_handling
 
 
 @pytest.fixture()
@@ -34,6 +36,7 @@ def y_pred_1d():
     (
         (mae, 1),
         (mse, 1),
+        (mse_with_missing_handling, 1),
         (rmse, 1),
         (mape, 66 + 2 / 3),
         (smape, 47.6190476),
@@ -58,6 +61,7 @@ def test_mle_metric_exception(y_true_1d, y_pred_1d):
 @pytest.mark.parametrize(
     "metric",
     (
+        mse_with_missing_handling,
         mape,
         smape,
         sign,
@@ -85,6 +89,7 @@ def y_pred_2d():
     (
         (mae, 1),
         (mse, 1),
+        (mse_with_missing_handling, 1),
         (rmse, 1),
         (mape, 42 + 3 / 11),
         (smape, 38.0952380),
@@ -104,6 +109,7 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d)
     (
         (mae, {"multioutput": "raw_values"}, [1, 1]),
         (mse, {"multioutput": "raw_values"}, [1, 1]),
+        (mse_with_missing_handling, {"multioutput": "raw_values"}, [1, 1]),
         (rmse, {"multioutput": "raw_values"}, [1, 1]),
         (mape, {"multioutput": "raw_values"}, [9.5454545, 75]),
         (smape, {"multioutput": "raw_values"}, [9.5238095, 66 + 2 / 3]),
@@ -116,3 +122,66 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d)
 )
 def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2d, y_pred_2d):
     npt.assert_almost_equal(metric(y_true_2d, y_pred_2d, **params), right_metrics_value)
+
+
+@pytest.mark.filterwarnings("ignore: Mean of empty slice")
+@pytest.mark.parametrize(
+    "y_true, y_pred, multioutput, expected",
+    [
+        # 1d
+        (np.array([1.0]), np.array([1.0]), "joint", 0.0),
+        (np.array([1.0, 2.0, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.0),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, 2.0]), "joint", 2.5),
+        (np.array([1.0, 2.0, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, np.NaN, 2.0]), "joint", 2.5),
+        (np.array([1.0, np.NaN, 3.0]), np.array([3.0, 1.0, np.NaN]), "joint", 4.0),
+        (np.array([1.0, np.NaN, np.NaN]), np.array([np.NaN, np.NaN, 2.0]), "joint", np.NaN),
+        # 2d
+        (np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T, np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T, "joint", 2.5),
+        (
+            np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            4.0,
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            2.5,
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "joint",
+            np.NaN,
+        ),
+        (
+            np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, 2.0], [5.0, 2.0, 4.0]]).T,
+            "raw_values",
+            np.array([2.0, 3.0]),
+        ),
+        (
+            np.array([[1.0, np.NaN, 3.0], [3.0, 4.0, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([4.0, 4.0]),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [3.0, 4.0, 5.0]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([np.NaN, 2.5]),
+        ),
+        (
+            np.array([[np.NaN, np.NaN, np.NaN], [np.NaN, np.NaN, np.NaN]]).T,
+            np.array([[3.0, 1.0, np.NaN], [5.0, np.NaN, 4.0]]).T,
+            "raw_values",
+            np.array([np.NaN, np.NaN]),
+        ),
+    ],
+)
+def test_values_ok(y_true, y_pred, multioutput, expected):
+    result = mse_with_missing_handling(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    npt.assert_allclose(result, expected)

From 157d361c1fc64e97f7c7dd9c9ce15dc96fe376ef Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Wed, 4 Dec 2024 14:06:39 +0300
Subject: [PATCH 3/8] feature: rework MSE to handle_missing, rework tests

---
 etna/metrics/base.py                          |  13 +-
 etna/metrics/functional_metrics.py            |  10 +-
 etna/metrics/intervals_metrics.py             |  24 ++-
 etna/metrics/metrics.py                       | 140 +++++++++++++-----
 tests/test_metrics/test_functional_metrics.py |   1 -
 tests/test_metrics/test_metrics.py            | 112 +++++++++++---
 tests/utils.py                                |   2 +-
 7 files changed, 231 insertions(+), 71 deletions(-)

diff --git a/etna/metrics/base.py b/etna/metrics/base.py
index 0dfc0304e..45c17cbd3 100644
--- a/etna/metrics/base.py
+++ b/etna/metrics/base.py
@@ -128,7 +128,7 @@ class Metric(AbstractMetric, BaseMixin):
     def __init__(
         self,
         metric_fn: MetricFunction,
-        mode: str = MetricAggregationMode.per_segment,
+        mode: str = MetricAggregationMode.per_segment.value,
         metric_fn_signature: str = "array_to_scalar",
         **kwargs,
     ):
@@ -146,6 +146,8 @@ def __init__(
 
             * if "per-segment" -- does not aggregate metrics
 
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
         metric_fn_signature:
             type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`)
         kwargs:
@@ -385,7 +387,7 @@ class MetricWithMissingHandling(Metric):
     def __init__(
         self,
         metric_fn: MetricFunction,
-        mode: str = MetricAggregationMode.per_segment,
+        mode: str = MetricAggregationMode.per_segment.value,
         metric_fn_signature: str = "array_to_scalar",
         missing_mode: str = "error",
         **kwargs,
@@ -404,6 +406,8 @@ def __init__(
 
             * if "per-segment" -- does not aggregate metrics
 
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
         metric_fn_signature:
             type of signature of ``metric_fn`` (see :py:class:`~etna.metrics.base.MetricFunctionSignature`)
         missing_mode:
@@ -421,7 +425,8 @@ def __init__(
             If non-existent ``missing_mode`` is used.
         """
         super().__init__(metric_fn=metric_fn, mode=mode, metric_fn_signature=metric_fn_signature, **kwargs)
-        self.missing_mode = MetricMissingMode(missing_mode)
+        self.missing_mode = missing_mode
+        self._missing_mode_enum = MetricMissingMode(missing_mode)
 
     def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset):
         """Check that ``y_true`` and ``y_pred`` doesn't have NaNs depending on ``missing_mode``.
@@ -442,7 +447,7 @@ def _validate_nans(self, y_true: TSDataset, y_pred: TSDataset):
         df_pred = y_pred.df.loc[:, pd.IndexSlice[:, "target"]]
 
         df_true_isna_sum = df_true.isna().sum()
-        if self.missing_mode is MetricMissingMode.error and (df_true_isna_sum > 0).any():
+        if self._missing_mode_enum is MetricMissingMode.error and (df_true_isna_sum > 0).any():
             error_segments = set(df_true_isna_sum[df_true_isna_sum > 0].index.droplevel("feature").tolist())
             raise ValueError(f"There are NaNs in y_true! Segments with NaNs: {reprlib.repr(error_segments)}.")
 
diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index b0f0ae58b..b517231e9 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -1,3 +1,4 @@
+import warnings
 from enum import Enum
 from functools import partial
 from typing import Optional
@@ -77,7 +78,14 @@ def mse_with_missing_handling(y_true: ArrayLike, y_pred: ArrayLike, multioutput:
         raise ValueError("Shapes of the labels must be the same")
 
     axis = _get_axis_by_multioutput(multioutput)
-    return np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
+    with warnings.catch_warnings():
+        # this helps to prevent warning in case of all nans
+        warnings.filterwarnings(
+            message="Mean of empty slice",
+            action="ignore",
+        )
+        result = np.nanmean((y_true_array - y_pred_array) ** 2, axis=axis)
+    return result
 
 
 def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py
index 6e70525af..8e1847489 100644
--- a/etna/metrics/intervals_metrics.py
+++ b/etna/metrics/intervals_metrics.py
@@ -56,7 +56,7 @@ class Coverage(Metric, _IntervalsMetricMixin):
     def __init__(
         self,
         quantiles: Optional[Tuple[float, float]] = None,
-        mode: str = MetricAggregationMode.per_segment,
+        mode: str = MetricAggregationMode.per_segment.value,
         upper_name: Optional[str] = None,
         lower_name: Optional[str] = None,
         **kwargs,
@@ -67,8 +67,14 @@ def __init__(
         ----------
         quantiles:
             lower and upper quantiles
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         upper_name:
             name of column with upper border of the interval
         lower_name:
@@ -169,7 +175,7 @@ class Width(Metric, _IntervalsMetricMixin):
     def __init__(
         self,
         quantiles: Optional[Tuple[float, float]] = None,
-        mode: str = MetricAggregationMode.per_segment,
+        mode: str = MetricAggregationMode.per_segment.value,
         upper_name: Optional[str] = None,
         lower_name: Optional[str] = None,
         **kwargs,
@@ -180,8 +186,14 @@ def __init__(
         ----------
         quantiles:
             lower and upper quantiles
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         upper_name:
             name of column with upper border of the interval
         lower_name:
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index d5388d7ce..c6e0da774 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -2,11 +2,12 @@
 
 from etna.metrics.base import Metric
 from etna.metrics.base import MetricAggregationMode
+from etna.metrics.base import MetricWithMissingHandling
 from etna.metrics.functional_metrics import mae
 from etna.metrics.functional_metrics import mape
 from etna.metrics.functional_metrics import max_deviation
 from etna.metrics.functional_metrics import medae
-from etna.metrics.functional_metrics import mse
+from etna.metrics.functional_metrics import mse_with_missing_handling
 from etna.metrics.functional_metrics import msle
 from etna.metrics.functional_metrics import r2_score
 from etna.metrics.functional_metrics import rmse
@@ -26,7 +27,7 @@ class MAE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
@@ -45,7 +46,7 @@ def greater_is_better(self) -> bool:
         return False
 
 
-class MSE(Metric):
+class MSE(MetricWithMissingHandling):
     """Mean squared error metric with multi-segment computation support.
 
     .. math::
@@ -56,18 +57,33 @@ class MSE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, missing_mode: str = "error", **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
+
+        missing_mode:
+            mode of handling missing values (see :py:class:`~etna.metrics.base.MetricMissingMode`)
         kwargs:
             metric's computation arguments
         """
-        mse_per_output = partial(mse, multioutput="raw_values")
-        super().__init__(mode=mode, metric_fn=mse_per_output, metric_fn_signature="matrix_to_array", **kwargs)
+        mse_per_output = partial(mse_with_missing_handling, multioutput="raw_values")
+        super().__init__(
+            mode=mode,
+            metric_fn=mse_per_output,
+            missing_mode=missing_mode,
+            metric_fn_signature="matrix_to_array",
+            **kwargs,
+        )
 
     @property
     def greater_is_better(self) -> bool:
@@ -86,13 +102,19 @@ class RMSE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -115,13 +137,19 @@ class R2(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -145,13 +173,19 @@ class MAPE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -175,13 +209,19 @@ class SMAPE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -205,13 +245,19 @@ class MedAE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -235,13 +281,19 @@ class MSLE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
 
@@ -266,13 +318,19 @@ class Sign(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -296,13 +354,19 @@ class MaxDeviation(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
@@ -325,13 +389,19 @@ class WAPE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
         """Init metric.
 
         Parameters
         ----------
-        mode: 'macro' or 'per-segment'
-            metrics aggregation mode
+        mode:
+            "macro" or "per-segment", way to aggregate metric values over segments:
+
+            * if "macro" computes average value
+
+            * if "per-segment" -- does not aggregate metrics
+
+            See :py:class:`~etna.metrics.base.MetricAggregationMode`.
         kwargs:
             metric's computation arguments
         """
diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index 5657983b9..0158bdf96 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -124,7 +124,6 @@ def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2
     npt.assert_almost_equal(metric(y_true_2d, y_pred_2d, **params), right_metrics_value)
 
 
-@pytest.mark.filterwarnings("ignore: Mean of empty slice")
 @pytest.mark.parametrize(
     "y_true, y_pred, multioutput, expected",
     [
diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py
index 1d02d5b98..58074ead9 100644
--- a/tests/test_metrics/test_metrics.py
+++ b/tests/test_metrics/test_metrics.py
@@ -35,31 +35,28 @@
 
 
 @pytest.mark.parametrize(
-    "metric_class, metric_class_repr, metric_params, param_repr",
+    "metric, expected_repr",
     (
-        (MAE, "MAE", {}, ""),
-        (MSE, "MSE", {}, ""),
-        (RMSE, "RMSE", {}, ""),
-        (MedAE, "MedAE", {}, ""),
-        (MSLE, "MSLE", {}, ""),
-        (MAPE, "MAPE", {}, ""),
-        (SMAPE, "SMAPE", {}, ""),
-        (R2, "R2", {}, ""),
-        (Sign, "Sign", {}, ""),
-        (MaxDeviation, "MaxDeviation", {}, ""),
-        (DummyMetric, "DummyMetric", {"alpha": 1.0}, "alpha = 1.0, "),
-        (WAPE, "WAPE", {}, ""),
+        (MAE(), "MAE(mode = 'per-segment', )"),
+        (MAE(mode="macro"), "MAE(mode = 'macro', )"),
+        (MSE(), "MSE(mode = 'per-segment', missing_mode = 'error', )"),
+        (MSE(missing_mode="ignore"), "MSE(mode = 'per-segment', missing_mode = 'ignore', )"),
+        (RMSE(), "RMSE(mode = 'per-segment', )"),
+        (MedAE(), "MedAE(mode = 'per-segment', )"),
+        (MSLE(), "MSLE(mode = 'per-segment', )"),
+        (MAPE(), "MAPE(mode = 'per-segment', )"),
+        (SMAPE(), "SMAPE(mode = 'per-segment', )"),
+        (R2(), "R2(mode = 'per-segment', )"),
+        (Sign(), "Sign(mode = 'per-segment', )"),
+        (MaxDeviation(), "MaxDeviation(mode = 'per-segment', )"),
+        (DummyMetric(), "DummyMetric(mode = 'per-segment', alpha = 1.0, )"),
+        (WAPE(), "WAPE(mode = 'per-segment', )"),
     ),
 )
-def test_repr(metric_class, metric_class_repr, metric_params, param_repr):
+def test_repr(metric, expected_repr):
     """Check metrics __repr__ method"""
-    metric_mode = "per-segment"
-    kwargs = {**metric_params, "kwarg_1": "value_1", "kwarg_2": "value_2"}
-    kwargs_repr = param_repr + "kwarg_1 = 'value_1', kwarg_2 = 'value_2'"
-    metric = metric_class(mode=metric_mode, **kwargs)
     metric_repr = metric.__repr__()
-    true_repr = f"{metric_class_repr}(mode = '{metric_mode}', {kwargs_repr}, )"
-    assert metric_repr == true_repr
+    assert metric_repr == expected_repr
 
 
 @pytest.mark.parametrize(
@@ -168,17 +165,86 @@ def test_invalid_nans_pred(metric_class, train_test_dfs):
 
 
 @pytest.mark.parametrize(
-    "metric_class", (MAE, MSE, RMSE, MedAE, MSLE, MAPE, SMAPE, R2, Sign, MaxDeviation, DummyMetric, WAPE)
+    "metric",
+    (
+        MAE(),
+        MSE(missing_mode="error"),
+        RMSE(),
+        MedAE(),
+        MSLE(),
+        MAPE(),
+        SMAPE(),
+        R2(),
+        Sign(),
+        MaxDeviation(),
+        DummyMetric(),
+        WAPE(),
+    ),
 )
-def test_invalid_nans_true(metric_class, train_test_dfs):
+def test_invalid_nans_true(metric, train_test_dfs):
     """Check metrics behavior in case of nans in true values."""
     forecast_df, true_df = train_test_dfs
     true_df.df.iloc[0, 0] = np.NaN
-    metric = metric_class()
     with pytest.raises(ValueError, match="There are NaNs in y_true"):
         _ = metric(y_true=true_df, y_pred=forecast_df)
 
 
+@pytest.mark.parametrize(
+    "metric",
+    (MSE(missing_mode="ignore"),),
+)
+def test_invalid_single_nan_ignore(metric, train_test_dfs):
+    """Check metrics behavior in case of ignoring one nan in true values."""
+    forecast_df, true_df = train_test_dfs
+    true_df.df.iloc[0, 0] = np.NaN
+    value = metric(y_true=true_df, y_pred=forecast_df)
+    assert isinstance(value, dict)
+    segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist())
+    assert value.keys() == segments
+    assert all(isinstance(cur_value, float) for cur_value in value.values())
+
+
+@pytest.mark.parametrize(
+    "metric",
+    (MSE(mode="per-segment", missing_mode="ignore"),),
+)
+def test_invalid_segment_nans_ignore_per_segment(metric, train_test_dfs):
+    """Check per-segment metrics behavior in case of ignoring segment of all nans in true values."""
+    forecast_df, true_df = train_test_dfs
+    true_df.df.iloc[:, 0] = np.NaN
+    value = metric(y_true=true_df, y_pred=forecast_df)
+    assert isinstance(value, dict)
+    segments = set(forecast_df.df.columns.get_level_values("segment").unique().tolist())
+    assert value.keys() == segments
+    empty_segment = true_df.df.columns.get_level_values("segment").unique()[0]
+    assert all(isinstance(cur_value, float) for cur_segment, cur_value in value.items() if cur_segment != empty_segment)
+    assert value[empty_segment] is None
+
+
+@pytest.mark.parametrize(
+    "metric",
+    (MSE(mode="macro", missing_mode="ignore"),),
+)
+def test_invalid_segment_nans_ignore_macro(metric, train_test_dfs):
+    """Check macro metrics behavior in case of ignoring segment of all nans in true values."""
+    forecast_df, true_df = train_test_dfs
+    true_df.df.iloc[:, 0] = np.NaN
+    value = metric(y_true=true_df, y_pred=forecast_df)
+    assert isinstance(value, float)
+
+
+@pytest.mark.parametrize(
+    "metric",
+    (MSE(mode="macro", missing_mode="ignore"),),
+)
+def test_invalid_all_nans_ignore_macro(metric, train_test_dfs):
+    """Check macro metrics behavior in case of all nan values in true values."""
+    forecast_df, true_df = train_test_dfs
+    true_df.df.iloc[:, :] = np.NaN
+    value = metric(y_true=true_df, y_pred=forecast_df)
+    assert value is None
+
+
 @pytest.mark.parametrize(
     "metric_class, metric_fn",
     (
diff --git a/tests/utils.py b/tests/utils.py
index e9c6f0d5e..85fa5b06c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -92,7 +92,7 @@ class DummyMetric(Metric):
     We change the name property here.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment, alpha: float = 1.0, **kwargs):
+    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, alpha: float = 1.0, **kwargs):
         self.alpha = alpha
         super().__init__(mode=mode, metric_fn=create_dummy_functional_metric(alpha), **kwargs)
 

From 22aa421795043a07ab476de99d6c3661e8805f8f Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Wed, 4 Dec 2024 16:03:45 +0300
Subject: [PATCH 4/8] fix: update test on compute_metrics

---
 tests/test_metrics/test_metrics_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_metrics/test_metrics_utils.py b/tests/test_metrics/test_metrics_utils.py
index d8123081e..8872ad7af 100644
--- a/tests/test_metrics/test_metrics_utils.py
+++ b/tests/test_metrics/test_metrics_utils.py
@@ -16,7 +16,7 @@ def test_compute_metrics(train_test_dfs: Tuple[TSDataset, TSDataset]):
     expected_keys = [
         "MAE(mode = 'per-segment', )",
         "MAE(mode = 'macro', )",
-        "MSE(mode = 'per-segment', )",
+        "MSE(mode = 'per-segment', missing_mode = 'error', )",
         "MAPE(mode = 'macro', eps = 1e-05, )",
     ]
     result = compute_metrics(metrics=metrics, y_true=true_df, y_pred=forecast_df)

From d21df7b0b999494f03eae366a91600eb67fe2532 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Fri, 6 Dec 2024 10:52:53 +0300
Subject: [PATCH 5/8] fix: remove sklearn mse and replace it our own mse

---
 etna/metrics/__init__.py                      | 1 -
 etna/metrics/functional_metrics.py            | 6 +++---
 etna/metrics/metrics.py                       | 4 ++--
 tests/test_metrics/test_functional_metrics.py | 8 ++------
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/etna/metrics/__init__.py b/etna/metrics/__init__.py
index 24f2e679a..38235e2fd 100644
--- a/etna/metrics/__init__.py
+++ b/etna/metrics/__init__.py
@@ -1,7 +1,6 @@
 """Module with metrics of forecasting quality."""
 
 from sklearn.metrics import mean_absolute_error as mae
-from sklearn.metrics import mean_squared_error as mse
 from sklearn.metrics import mean_squared_log_error as msle
 from sklearn.metrics import median_absolute_error as medae
 from sklearn.metrics import r2_score
diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index b517231e9..036e4ae70 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 from sklearn.metrics import mean_absolute_error as mae
-from sklearn.metrics import mean_squared_error as mse
+from sklearn.metrics import mean_squared_error as mse_sklearn
 from sklearn.metrics import mean_squared_log_error as msle
 from sklearn.metrics import median_absolute_error as medae
 from sklearn.metrics import r2_score
@@ -42,7 +42,7 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]:
         assert_never(multioutput_enum)
 
 
-def mse_with_missing_handling(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
+def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
     """Mean squared error with missing values handling.
 
     `Wikipedia entry on the Mean squared error
@@ -253,7 +253,7 @@ def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "join
     return np.max(np.abs(prefix_error_sum), axis=axis)
 
 
-rmse = partial(mse, squared=False)
+rmse = partial(mse_sklearn, squared=False)
 
 
 def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index c6e0da774..24db102b6 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -7,7 +7,7 @@
 from etna.metrics.functional_metrics import mape
 from etna.metrics.functional_metrics import max_deviation
 from etna.metrics.functional_metrics import medae
-from etna.metrics.functional_metrics import mse_with_missing_handling
+from etna.metrics.functional_metrics import mse
 from etna.metrics.functional_metrics import msle
 from etna.metrics.functional_metrics import r2_score
 from etna.metrics.functional_metrics import rmse
@@ -76,7 +76,7 @@ def __init__(self, mode: str = MetricAggregationMode.per_segment.value, missing_
         kwargs:
             metric's computation arguments
         """
-        mse_per_output = partial(mse_with_missing_handling, multioutput="raw_values")
+        mse_per_output = partial(mse, multioutput="raw_values")
         super().__init__(
             mode=mode,
             metric_fn=mse_per_output,
diff --git a/tests/test_metrics/test_functional_metrics.py b/tests/test_metrics/test_functional_metrics.py
index 0158bdf96..ba0e2436c 100644
--- a/tests/test_metrics/test_functional_metrics.py
+++ b/tests/test_metrics/test_functional_metrics.py
@@ -13,7 +13,6 @@
 from etna.metrics import sign
 from etna.metrics import smape
 from etna.metrics import wape
-from etna.metrics.functional_metrics import mse_with_missing_handling
 
 
 @pytest.fixture()
@@ -36,7 +35,6 @@ def y_pred_1d():
     (
         (mae, 1),
         (mse, 1),
-        (mse_with_missing_handling, 1),
         (rmse, 1),
         (mape, 66 + 2 / 3),
         (smape, 47.6190476),
@@ -61,7 +59,7 @@ def test_mle_metric_exception(y_true_1d, y_pred_1d):
 @pytest.mark.parametrize(
     "metric",
     (
-        mse_with_missing_handling,
+        mse,
         mape,
         smape,
         sign,
@@ -89,7 +87,6 @@ def y_pred_2d():
     (
         (mae, 1),
         (mse, 1),
-        (mse_with_missing_handling, 1),
         (rmse, 1),
         (mape, 42 + 3 / 11),
         (smape, 38.0952380),
@@ -109,7 +106,6 @@ def test_all_2d_metrics_joint(metric, right_metrics_value, y_true_2d, y_pred_2d)
     (
         (mae, {"multioutput": "raw_values"}, [1, 1]),
         (mse, {"multioutput": "raw_values"}, [1, 1]),
-        (mse_with_missing_handling, {"multioutput": "raw_values"}, [1, 1]),
         (rmse, {"multioutput": "raw_values"}, [1, 1]),
         (mape, {"multioutput": "raw_values"}, [9.5454545, 75]),
         (smape, {"multioutput": "raw_values"}, [9.5238095, 66 + 2 / 3]),
@@ -182,5 +178,5 @@ def test_all_2d_metrics_per_output(metric, params, right_metrics_value, y_true_2
     ],
 )
 def test_values_ok(y_true, y_pred, multioutput, expected):
-    result = mse_with_missing_handling(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
+    result = mse(y_true=y_true, y_pred=y_pred, multioutput=multioutput)
     npt.assert_allclose(result, expected)

From 6ca83b217917694871510e7b5c803a3170a50a91 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Fri, 6 Dec 2024 10:55:55 +0300
Subject: [PATCH 6/8] fix: change default value of metric mode to string
 per-segment

---
 etna/metrics/base.py              |  4 ++--
 etna/metrics/intervals_metrics.py |  5 ++---
 etna/metrics/metrics.py           | 23 +++++++++++------------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/etna/metrics/base.py b/etna/metrics/base.py
index 45c17cbd3..32373a145 100644
--- a/etna/metrics/base.py
+++ b/etna/metrics/base.py
@@ -128,7 +128,7 @@ class Metric(AbstractMetric, BaseMixin):
     def __init__(
         self,
         metric_fn: MetricFunction,
-        mode: str = MetricAggregationMode.per_segment.value,
+        mode: str = "per-segment",
         metric_fn_signature: str = "array_to_scalar",
         **kwargs,
     ):
@@ -387,7 +387,7 @@ class MetricWithMissingHandling(Metric):
     def __init__(
         self,
         metric_fn: MetricFunction,
-        mode: str = MetricAggregationMode.per_segment.value,
+        mode: str = "per-segment",
         metric_fn_signature: str = "array_to_scalar",
         missing_mode: str = "error",
         **kwargs,
diff --git a/etna/metrics/intervals_metrics.py b/etna/metrics/intervals_metrics.py
index 8e1847489..10284460e 100644
--- a/etna/metrics/intervals_metrics.py
+++ b/etna/metrics/intervals_metrics.py
@@ -9,7 +9,6 @@
 
 from etna.datasets import TSDataset
 from etna.metrics.base import Metric
-from etna.metrics.base import MetricAggregationMode
 from etna.metrics.functional_metrics import ArrayLike
 
 
@@ -56,7 +55,7 @@ class Coverage(Metric, _IntervalsMetricMixin):
     def __init__(
         self,
         quantiles: Optional[Tuple[float, float]] = None,
-        mode: str = MetricAggregationMode.per_segment.value,
+        mode: str = "per-segment",
         upper_name: Optional[str] = None,
         lower_name: Optional[str] = None,
         **kwargs,
@@ -175,7 +174,7 @@ class Width(Metric, _IntervalsMetricMixin):
     def __init__(
         self,
         quantiles: Optional[Tuple[float, float]] = None,
-        mode: str = MetricAggregationMode.per_segment.value,
+        mode: str = "per-segment",
         upper_name: Optional[str] = None,
         lower_name: Optional[str] = None,
         **kwargs,
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index 24db102b6..fe894bfdd 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -1,7 +1,6 @@
 from functools import partial
 
 from etna.metrics.base import Metric
-from etna.metrics.base import MetricAggregationMode
 from etna.metrics.base import MetricWithMissingHandling
 from etna.metrics.functional_metrics import mae
 from etna.metrics.functional_metrics import mape
@@ -27,7 +26,7 @@ class MAE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -57,7 +56,7 @@ class MSE(MetricWithMissingHandling):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, missing_mode: str = "error", **kwargs):
+    def __init__(self, mode: str = "per-segment", missing_mode: str = "error", **kwargs):
         """Init metric.
 
         Parameters
@@ -102,7 +101,7 @@ class RMSE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -137,7 +136,7 @@ class R2(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -173,7 +172,7 @@ class MAPE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -209,7 +208,7 @@ class SMAPE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -245,7 +244,7 @@ class MedAE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -281,7 +280,7 @@ class MSLE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -318,7 +317,7 @@ class Sign(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -354,7 +353,7 @@ class MaxDeviation(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters
@@ -389,7 +388,7 @@ class WAPE(Metric):
     You can read more about logic of multi-segment metrics in Metric docs.
     """
 
-    def __init__(self, mode: str = MetricAggregationMode.per_segment.value, **kwargs):
+    def __init__(self, mode: str = "per-segment", **kwargs):
         """Init metric.
 
         Parameters

From 878313ee0c4013c4f641104d38e71aef4f40f383 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Fri, 6 Dec 2024 11:08:11 +0300
Subject: [PATCH 7/8] docs: update docs for metrics

---
 etna/metrics/functional_metrics.py | 24 +++++++++++++-----------
 etna/metrics/metrics.py            |  4 ++++
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/etna/metrics/functional_metrics.py b/etna/metrics/functional_metrics.py
index 036e4ae70..7feb319ad 100644
--- a/etna/metrics/functional_metrics.py
+++ b/etna/metrics/functional_metrics.py
@@ -45,10 +45,10 @@ def _get_axis_by_multioutput(multioutput: str) -> Optional[int]:
 def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
     """Mean squared error with missing values handling.
 
-    `Wikipedia entry on the Mean squared error
-    <https://en.wikipedia.org/wiki/Mean_squared_error>`_
+    .. math::
+        MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
 
-    The nans are ignored during computation.
+    The nans are ignored during computation. If all values are nans, the result is NaN.
 
     Parameters
     ----------
@@ -91,8 +91,10 @@ def mse(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Arr
 def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
     """Mean absolute percentage error.
 
-    `Wikipedia entry on the Mean absolute percentage error
-    <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`_
+    .. math::
+       MAPE(y\_true, y\_pred) = \\frac{1}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \epsilon}
+
+    `Scale-dependent errors <https://otexts.com/fpp3/accuracy.html#scale-dependent-errors>`_
 
     Parameters
     ----------
@@ -135,11 +137,8 @@ def mape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput:
 def smape(y_true: ArrayLike, y_pred: ArrayLike, eps: float = 1e-15, multioutput: str = "joint") -> ArrayLike:
     """Symmetric mean absolute percentage error.
 
-    `Wikipedia entry on the Symmetric mean absolute percentage error
-    <https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error>`_
-
     .. math::
-        SMAPE = \dfrac{100}{n}\sum_{t=1}^{n}\dfrac{|ytrue_{t}-ypred_{t}|}{(|ypred_{t}|+|ytrue_{t}|) / 2}
+       SMAPE(y\_true, y\_pred) = \\frac{2 \\cdot 100 \\%}{n} \\cdot \\sum_{i=1}^{n} \\frac{\\mid y\_true_i - y\_pred_i\\mid}{\\mid y\_true_i \\mid + \\mid y\_pred_i \\mid}
 
     Parameters
     ----------
@@ -183,7 +182,7 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     """Sign error metric.
 
     .. math::
-        Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=0}^{n - 1}{sign(y\_true_i - y\_pred_i)}
+        Sign(y\_true, y\_pred) = \\frac{1}{n}\\cdot\\sum_{i=1}^{n}{sign(y\_true_i - y\_pred_i)}
 
     Parameters
     ----------
@@ -220,6 +219,9 @@ def sign(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
 def max_deviation(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> ArrayLike:
     """Max Deviation metric.
 
+    .. math::
+        MaxDeviation(y\_true, y\_pred) = \\max_{1 \\le j \\le n} | y_j |, where \\, y_j = \\sum_{i=1}^{j}{y\_pred_i - y\_true_i}
+
     Parameters
     ----------
     y_true:
@@ -260,7 +262,7 @@ def wape(y_true: ArrayLike, y_pred: ArrayLike, multioutput: str = "joint") -> Ar
     """Weighted average percentage Error metric.
 
     .. math::
-        WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=0}^{n}|y\\_true_i|}
+        WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|}
 
     Parameters
     ----------
diff --git a/etna/metrics/metrics.py b/etna/metrics/metrics.py
index fe894bfdd..67088a811 100644
--- a/etna/metrics/metrics.py
+++ b/etna/metrics/metrics.py
@@ -51,6 +51,9 @@ class MSE(MetricWithMissingHandling):
     .. math::
         MSE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n}{(y\_true_i - y\_pred_i)^2}}{n}
 
+    This metric can handle missing values with parameter ``missing_mode``.
+    If there are too many of them in ``ignore`` mode, the result will be ``None``.
+
     Notes
     -----
     You can read more about logic of multi-segment metrics in Metric docs.
@@ -383,6 +386,7 @@ class WAPE(Metric):
 
     .. math::
         WAPE(y\_true, y\_pred) = \\frac{\\sum_{i=1}^{n} |y\_true_i - y\_pred_i|}{\\sum_{i=1}^{n}|y\\_true_i|}
+
     Notes
     -----
     You can read more about logic of multi-segment metrics in Metric docs.

From e701e4972a7ac5aa38f47d99ca351eb4504e5a48 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Fri, 6 Dec 2024 11:09:36 +0300
Subject: [PATCH 8/8] chore: update changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e44d0ff32..dc8f73cc3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,7 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - 
 - Add docstring warning about handling non-regressors (including target) to children of `WindowStatisticsTransform` ([#474](https://github.com/etna-team/etna/pull/474))
-- 
+- Add parameter `missing_mode` into `MSE` metric ([#515](https://github.com/etna-team/etna/pull/515))
 - 
 - 
 -