From 7cc68eb7fda6c94cabf21e8f6301b2128c863cf9 Mon Sep 17 00:00:00 2001
From: brsnw250 <maxzherelo1@gmail.com>
Date: Mon, 24 Feb 2025 15:50:45 +0300
Subject: [PATCH 1/6] added test

---
 tests/test_datasets/test_dataset.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 4378fc7fa..06522cdda 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -1513,6 +1513,16 @@ def test_tsdataset_idx_slice_pass_prediction_intervals_to_output(ts_with_predict
     )
 
 
+def test_tsdataset_idx_slice_pass_hierarchical_structure_to_output(product_level_constant_forecast_with_quantiles):
+    ts = product_level_constant_forecast_with_quantiles
+    initial_hs = ts.hierarchical_structure
+    slice_hs = ts.tsdataset_idx_slice(start_idx=1, end_idx=2).hierarchical_structure
+
+    assert slice_hs is not None
+    assert slice_hs.level_names == initial_hs.level_names
+    assert slice_hs.level_structure == initial_hs.level_structure
+
+
 def test_to_torch_dataset_without_drop(tsdf_with_exog):
     def make_samples(df):
         return [{"target": df.target.values, "segment": df["segment"].values[0]}]

From 9a4cae3a8ff9fabfeab71061b3ba1016c1b91093 Mon Sep 17 00:00:00 2001
From: brsnw250 <maxzherelo1@gmail.com>
Date: Mon, 24 Feb 2025 15:51:42 +0300
Subject: [PATCH 2/6] reworked `tsdataset_idx_slice`

---
 etna/datasets/tsdataset.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
index ba30b6580..db1c2b4a5 100644
--- a/etna/datasets/tsdataset.py
+++ b/etna/datasets/tsdataset.py
@@ -520,16 +520,11 @@ def tsdataset_idx_slice(self, start_idx: Optional[int] = None, end_idx: Optional
         :
             TSDataset based on indexing slice.
         """
-        df_slice = self.df.iloc[start_idx:end_idx].copy(deep=True)
-        tsdataset_slice = TSDataset(df=df_slice, freq=self.freq)
-        # can't put known_future into constructor, _check_known_future fails with df_exog=None
-        tsdataset_slice.known_future = deepcopy(self.known_future)
-        tsdataset_slice._regressors = deepcopy(self.regressors)
-        if self.df_exog is not None:
-            tsdataset_slice.df_exog = self.df_exog.copy(deep=True)
-        tsdataset_slice._target_components_names = deepcopy(self._target_components_names)
-        tsdataset_slice._prediction_intervals_names = deepcopy(self._prediction_intervals_names)
-        return tsdataset_slice
+        ts_slice = deepcopy(self)
+        ts_slice.df = ts_slice.df.iloc[start_idx:end_idx].copy(deep=None)
+        ts_slice.raw_df = ts_slice.raw_df.iloc[start_idx:end_idx].copy(deep=None)
+
+        return ts_slice
 
     @staticmethod
     def _check_known_future(

From 199451e90390c0159db6b6208a4a5a5eb6bc4dac Mon Sep 17 00:00:00 2001
From: brsnw250 <maxzherelo1@gmail.com>
Date: Mon, 24 Feb 2025 17:10:27 +0300
Subject: [PATCH 3/6] updated changelog

---
 CHANGELOG.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a506a6c30..f6cd1e140 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,7 +46,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Breaking:** Bump minimum `optuna` version to 4.0 ([#599](https://github.com/etna-team/etna/pull/599))
 - **Breaking:** Bump minimum `statsforecast` version to 2.0 ([#599](https://github.com/etna-team/etna/pull/599))
 - Optimize performance of exogenous variables addition to the dataset ([#596](https://github.com/etna-team/etna/pull/596))
--
+- Update `TSDataset.tsdataset_idx_slice` method ([#618](https://github.com/etna-team/etna/pull/618))
+- 
 
 ### Fixed
 - Fix possibility of silent handling of duplicate features when updating dataset with `TSDataset.update_columns_from_pandas` ([#522](https://github.com/etna-team/etna/pull/552))
@@ -59,6 +60,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Breaking:** rename `DaylySeasonalitySSM` to `DailySeasonalitySSM` ([#615](https://github.com/etna-team/etna/pull/615))
 - Fix `TSDataset.train_test_split` to pass all features to train and test parts ([#545](https://github.com/etna-team/etna/pull/545))
 - Fix `ConfigSampler` to handle trials without hash ([#616](https://github.com/etna-team/etna/pull/616))
+- Fix method `TSDataset.tsdataset_idx_slice` loses hierarchical structure ([#618](https://github.com/etna-team/etna/pull/618))
+- 
 
 ### Removed
 - **Breaking:** Remove `FutureMixin`, `OutliersTransform.outliers_timestamps` and `OutliersTransform.original_values` ([#577](https://github.com/etna-team/etna/pull/577))

From 443d9d43849a11c2fa58561b1cbfe49302a56286 Mon Sep 17 00:00:00 2001
From: brsnw250 <maxzherelo1@gmail.com>
Date: Mon, 24 Feb 2025 18:41:02 +0300
Subject: [PATCH 4/6] updated implementation

---
 etna/datasets/tsdataset.py        | 55 ++++++++++++++-----------------
 etna/datasets/utils.py            | 17 ++++++++++
 tests/test_datasets/test_utils.py | 15 +++++++++
 3 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
index db1c2b4a5..8dbab0835 100644
--- a/etna/datasets/tsdataset.py
+++ b/etna/datasets/tsdataset.py
@@ -26,6 +26,7 @@
 from etna.datasets.utils import DataFrameFormat
 from etna.datasets.utils import _check_features_in_segments
 from etna.datasets.utils import _check_timestamp_param
+from etna.datasets.utils import _slice_index_wide_dataframe
 from etna.datasets.utils import _TorchDataset
 from etna.datasets.utils import apply_alignment
 from etna.datasets.utils import get_level_dataframe
@@ -520,9 +521,23 @@ def tsdataset_idx_slice(self, start_idx: Optional[int] = None, end_idx: Optional
         :
             TSDataset based on indexing slice.
         """
-        ts_slice = deepcopy(self)
-        ts_slice.df = ts_slice.df.iloc[start_idx:end_idx].copy(deep=None)
-        ts_slice.raw_df = ts_slice.raw_df.iloc[start_idx:end_idx].copy(deep=None)
+        self_df = self.df
+        self_raw_df = self.raw_df
+
+        try:
+            # we do this to avoid redundant copying of data
+            self.df = None
+            self.raw_df = None
+
+            ts_slice = deepcopy(self)
+            ts_slice.df = _slice_index_wide_dataframe(df=self_df, start=start_idx, stop=end_idx, label_indexing=False)
+            ts_slice.raw_df = _slice_index_wide_dataframe(
+                df=self_raw_df, start=start_idx, stop=end_idx, label_indexing=False
+            )
+
+        finally:
+            self.df = self_df
+            self.raw_df = self_raw_df
 
         return ts_slice
 
@@ -1255,36 +1270,16 @@ def train_test_split(
             # we do this to avoid redundant copying of data
             self.df = None
             self.raw_df = None
-            train = deepcopy(self)
 
-            # we want to make sure it makes only one copy
-            train_df = self_df.loc[train_start_defined:train_end_defined]
-            if train_df._is_view or train_df._is_copy is not None:
-                train.df = train_df.copy()
-            else:
-                train.df = train_df
-
-            # we want to make sure it makes only one copy
-            train_raw_df = self_raw_df.loc[train_start_defined:train_end_defined]
-            if train_raw_df._is_view or train_raw_df._is_copy is not None:
-                train.raw_df = train_raw_df.copy()
-            else:
-                train.raw_df = train_raw_df
+            train = deepcopy(self)
+            train.df = _slice_index_wide_dataframe(df=self_df, start=train_start_defined, stop=train_end_defined)
+            train.raw_df = _slice_index_wide_dataframe(
+                df=self_raw_df, start=train_start_defined, stop=train_end_defined
+            )
 
-            # we want to make sure it makes only one copy
             test = deepcopy(self)
-            test_df = self_df.loc[test_start_defined:test_end_defined]
-            if test_df._is_view or test_df._is_copy is not None:
-                test.df = test_df.copy()
-            else:
-                test.df = test_df
-
-            # we want to make sure it makes only one copy
-            test_raw_df = self_raw_df.loc[train_start_defined:test_end_defined]
-            if test_raw_df._is_view or test_raw_df._is_copy is not None:
-                test.raw_df = test_raw_df.copy()
-            else:
-                test.raw_df = test_raw_df
+            test.df = _slice_index_wide_dataframe(df=self_df, start=test_start_defined, stop=test_end_defined)
+            test.raw_df = _slice_index_wide_dataframe(df=self_raw_df, start=train_start_defined, stop=test_end_defined)
 
         finally:
             self.df = self_df
diff --git a/etna/datasets/utils.py b/etna/datasets/utils.py
index e1bb8937c..ad6f4ed9b 100644
--- a/etna/datasets/utils.py
+++ b/etna/datasets/utils.py
@@ -756,3 +756,20 @@ def _check_features_in_segments(columns: pd.MultiIndex, segments: Optional[List[
             raise ValueError(
                 f"There is a mismatch in feature sets between segments '{compare_segment}' and '{segment}'!"
             )
+
+
+def _slice_index_wide_dataframe(
+    df: pd.DataFrame,
+    start: Optional[Union[int, str, pd.Timestamp]] = None,
+    stop: Optional[Union[int, str, pd.Timestamp]] = None,
+    label_indexing: bool = True,
+) -> pd.DataFrame:
+    """Slice index of the dataframe in the wide format with copy."""
+    indexer = df.loc if label_indexing else df.iloc
+
+    # we want to make sure it makes only one copy
+    df = indexer[start:stop]  # type: ignore
+    if df._is_view or df._is_copy is not None:
+        df = df.copy(deep=None)
+
+    return df
diff --git a/tests/test_datasets/test_utils.py b/tests/test_datasets/test_utils.py
index d7081f76e..c156c8515 100644
--- a/tests/test_datasets/test_utils.py
+++ b/tests/test_datasets/test_utils.py
@@ -9,6 +9,7 @@
 from etna.datasets import generate_ar_df
 from etna.datasets.utils import DataFrameFormat
 from etna.datasets.utils import _check_features_in_segments
+from etna.datasets.utils import _slice_index_wide_dataframe
 from etna.datasets.utils import _TorchDataset
 from etna.datasets.utils import apply_alignment
 from etna.datasets.utils import determine_freq
@@ -1013,3 +1014,17 @@ def test_check_features_in_segments_ok(columns):
 )
 def test_check_features_in_segments_ok_with_expected_segments(columns):
     _check_features_in_segments(columns=columns, segments=[1, 2])
+
+
+@pytest.mark.parametrize("start, stop", ((0, 4), (4, -1), (-5, -1), (None, 6), (5, None), (None, None)))
+def test_slice_index_wide_dataframe_int_idx(df_aligned_datetime, start, stop):
+    res = _slice_index_wide_dataframe(df=df_aligned_datetime, start=start, stop=stop, label_indexing=False)
+    pd.testing.assert_frame_equal(res, df_aligned_datetime.iloc[start:stop])
+
+
+@pytest.mark.parametrize(
+    "start, stop", (("2020-01-01", "2020-01-04"), (None, "2020-01-10"), ("2020-01-09", None), (None, None))
+)
+def test_slice_index_wide_dataframe_label_idx(df_aligned_datetime, start, stop):
+    res = _slice_index_wide_dataframe(df=df_aligned_datetime, start=start, stop=stop, label_indexing=True)
+    pd.testing.assert_frame_equal(res, df_aligned_datetime.loc[start:stop])

From ca0da019de55d6c20ba760c2e44f96fd288abafc Mon Sep 17 00:00:00 2001
From: brsnw250 <maxzherelo1@gmail.com>
Date: Mon, 24 Feb 2025 18:42:59 +0300
Subject: [PATCH 5/6] fixed changelog

---
 CHANGELOG.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f6cd1e140..c553d611c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,7 +46,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Breaking:** Bump minimum `optuna` version to 4.0 ([#599](https://github.com/etna-team/etna/pull/599))
 - **Breaking:** Bump minimum `statsforecast` version to 2.0 ([#599](https://github.com/etna-team/etna/pull/599))
 - Optimize performance of exogenous variables addition to the dataset ([#596](https://github.com/etna-team/etna/pull/596))
-- Update `TSDataset.tsdataset_idx_slice` method ([#618](https://github.com/etna-team/etna/pull/618))
 - 
 
 ### Fixed
@@ -60,7 +59,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Breaking:** rename `DaylySeasonalitySSM` to `DailySeasonalitySSM` ([#615](https://github.com/etna-team/etna/pull/615))
 - Fix `TSDataset.train_test_split` to pass all features to train and test parts ([#545](https://github.com/etna-team/etna/pull/545))
 - Fix `ConfigSampler` to handle trials without hash ([#616](https://github.com/etna-team/etna/pull/616))
-- Fix method `TSDataset.tsdataset_idx_slice` loses hierarchical structure ([#618](https://github.com/etna-team/etna/pull/618))
+- Fix method `TSDataset.tsdataset_idx_slice` to not lose hierarchical structure ([#618](https://github.com/etna-team/etna/pull/618))
 - 
 
 ### Removed

From e36e09b2d130cb003e5c667ab5922fb7cf64306d Mon Sep 17 00:00:00 2001
From: brsnw250 <maxzherelo1@gmail.com>
Date: Tue, 25 Feb 2025 11:00:38 +0300
Subject: [PATCH 6/6] explicitly make deep copy

---
 etna/datasets/tsdataset.py | 4 ++--
 etna/datasets/utils.py     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
index 8dbab0835..223e45092 100644
--- a/etna/datasets/tsdataset.py
+++ b/etna/datasets/tsdataset.py
@@ -144,7 +144,7 @@ def __init__(
         self.freq = freq
         self.df_exog = None
         self.raw_df = self._prepare_df(df=df, freq=freq)
-        self.df = self.raw_df.copy(deep=None)
+        self.df = self.raw_df.copy(deep=True)
 
         self.hierarchical_structure = hierarchical_structure
         self.current_df_level: Optional[str] = self._get_dataframe_level(df=self.df)
@@ -1026,7 +1026,7 @@ def to_dataset(df: pd.DataFrame) -> pd.DataFrame:
         df.sort_index(axis=1, level=(0, 1), inplace=True)
 
         if df._is_view or df._is_copy is None:
-            df = df.copy(deep=None)
+            df = df.copy(deep=True)
 
         return df
 
diff --git a/etna/datasets/utils.py b/etna/datasets/utils.py
index ad6f4ed9b..64d7e7b70 100644
--- a/etna/datasets/utils.py
+++ b/etna/datasets/utils.py
@@ -770,6 +770,6 @@ def _slice_index_wide_dataframe(
     # we want to make sure it makes only one copy
     df = indexer[start:stop]  # type: ignore
     if df._is_view or df._is_copy is not None:
-        df = df.copy(deep=None)
+        df = df.copy(deep=True)
 
     return df