modin-project · dchigarev · Dec 5, 2023 · Nov 20, 2023 · Nov 22, 2023 · Dec 4, 2023
@@ -3272,6 +3272,27 @@ def broadcast_apply_full_axis(
                         kw["column_widths"] = self._column_widths_cache
                     elif len(new_columns) == 1 and new_partitions.shape[1] == 1:
                         kw["column_widths"] = [1]
+        else:
+            if (
+                axis == 0
+                and kw["row_lengths"] is None
+                and self._row_lengths_cache is not None
+                and ModinIndex.is_materialized_index(new_index)
+                and len(new_index) == sum(self._row_lengths_cache)
+                # to avoid problems that may arise when filtering empty dataframes
+                and all(r != 0 for r in self._row_lengths_cache)
+            ):
+                kw["row_lengths"] = self._row_lengths_cache
+            if (
+                axis == 1
+                and kw["column_widths"] is None
+                and self._column_widths_cache is not None
+                and ModinIndex.is_materialized_index(new_columns)
+                and len(new_columns) == sum(self._column_widths_cache)
+                # to avoid problems that may arise when filtering empty dataframes
+                and all(w != 0 for w in self._column_widths_cache)
+            ):
+                kw["column_widths"] = self._column_widths_cache
 
         result = self.__constructor__(
             new_partitions, index=new_index, columns=new_columns, **kw

@@ -1398,6 +1398,71 @@ def test_sort_values_cache():
     validate_partitions_cache(mf_initial, axis=1)
 
 
+def test_apply_full_axis_preserve_widths():
+    md_df = construct_modin_df_by_scheme(
+        pandas.DataFrame(
+            {"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [6, 7, 8, 9], "d": [0, 1, 2, 3]}
+        ),
+        {"row_lengths": [2, 2], "column_widths": [2, 2]},
+    )._query_compiler._modin_frame
+
+    assert md_df._row_lengths_cache == [2, 2]
+    assert md_df._column_widths_cache == [2, 2]
+
+    def func(df):
+        if df.iloc[0, 0] == 1:
+            return pandas.DataFrame(
+                {"a": [1, 2, 3], "b": [3, 4, 5], "c": [6, 7, 8], "d": [0, 1, 2]}
+            )
+        else:
+            return pandas.DataFrame({"a": [4], "b": [6], "c": [9], "d": [3]})
+
+    res = md_df.apply_full_axis(
+        func=func,
+        axis=1,
+        new_index=[0, 1, 2, 3],
+        new_columns=["a", "b", "c", "d"],
+        keep_partitioning=True,
+    )
+    col_widths_cache = res._column_widths_cache
+    actual_column_widths = [part.width() for part in res._partitions[0]]
+
+    assert col_widths_cache == actual_column_widths
+    assert res._row_lengths_cache is None
+
+
+def test_apply_full_axis_preserve_lengths():
+    md_df = construct_modin_df_by_scheme(
+        pandas.DataFrame(
+            {"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [6, 7, 8, 9], "d": [0, 1, 2, 3]}
+        ),
+        {"row_lengths": [2, 2], "column_widths": [2, 2]},
+    )._query_compiler._modin_frame
+
+    assert md_df._row_lengths_cache == [2, 2]
+    assert md_df._column_widths_cache == [2, 2]
+
+    def func(df):
+        if df.iloc[0, 0] == 1:
+            return pandas.DataFrame({"a": [3, 2, 3, 4], "b": [3, 4, 5, 6]})
+        else:
+            return pandas.DataFrame({"c": [9, 5, 6, 7]})
+
+    res = md_df.apply_full_axis(
+        func=func,
+        axis=0,
+        new_index=[0, 1, 2, 3],
+        new_columns=["a", "b", "c"],
+        keep_partitioning=True,
+    )
+
+    row_lengths_cache = res._row_lengths_cache
+    actual_row_lengths = [part.length() for part in res._partitions[:, 0]]
+
+    assert row_lengths_cache == actual_row_lengths
+    assert res._column_widths_cache is None
+
+
 class DummyFuture:
     """
     A dummy object emulating future's behaviour, this class is used in ``test_call_queue_serialization``.