Skip to content

Commit

Permalink
draft changes
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Nov 30, 2023
1 parent 6eaec0e commit bef002d
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 2 deletions.
6 changes: 4 additions & 2 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3269,7 +3269,8 @@ def broadcast_apply_full_axis(
kw["column_widths"] = [1]
else:
if (
kw["row_lengths"] is None
axis == 0
and kw["row_lengths"] is None
and new_index is not None
and self._row_lengths_cache is not None
and len(new_index) == sum(self._row_lengths_cache)
Expand All @@ -3278,7 +3279,8 @@ def broadcast_apply_full_axis(
):
kw["row_lengths"] = self._row_lengths_cache
if (
kw["column_widths"] is None
axis == 1
and kw["column_widths"] is None
and new_columns is not None
and self._column_widths_cache is not None
and len(new_columns) == sum(self._column_widths_cache)
Expand Down
66 changes: 66 additions & 0 deletions modin/test/storage_formats/pandas/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1398,6 +1398,72 @@ def test_sort_values_cache():
validate_partitions_cache(mf_initial, axis=1)


def test_apply_full_axis_preserve_widths():
md_df = construct_modin_df_by_scheme(
pandas.DataFrame(
{"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [6, 7, 8, 9], "d": [0, 1, 2, 3]}
),
{"row_lengths": [2, 2], "column_widths": [2, 2]},
)._query_compiler._modin_frame

assert md_df._row_lengths_cache == [2, 2]
assert md_df._column_widths_cache == [2, 2]

def func(df):
if df.iloc[0, 0] == 1:
return pandas.DataFrame(
{"a": [1, 2, 3], "b": [3, 4, 5], "c": [6, 7, 8], "d": [0, 1, 2]}
)
else:
return pandas.DataFrame({"a": [4], "b": [6], "c": [9], "d": [3]})

res = md_df.apply_full_axis(
func=func,
axis=1,
new_index=[0, 1, 2, 3],
new_columns=["a", "b", "c", "d"],
keep_partitioning=True,
)

actual_column_widths = [part.width() for part in res._partitions[0]]

assert res._column_widths_cache == actual_column_widths
assert res._row_lengths_cache is None


def test_apply_full_axis_preserve_lengths():
md_df = construct_modin_df_by_scheme(
pandas.DataFrame(
{"a": [1, 2, 3, 4], "b": [3, 4, 5, 6], "c": [6, 7, 8, 9], "d": [0, 1, 2, 3]}
),
{"row_lengths": [2, 2], "column_widths": [2, 2]},
)._query_compiler._modin_frame

assert md_df._row_lengths_cache == [2, 2]
assert md_df._column_widths_cache == [2, 2]

def func(df):
if df.iloc[0, 0] == 1:
return pandas.DataFrame(
{"a": [1, 2, 3], "b": [3, 4, 5], "c": [6, 7, 8], "d": [0, 1, 2]}
)
else:
return pandas.DataFrame({"a": [4], "b": [6], "c": [9], "d": [3]})

res = md_df.apply_full_axis(
func=func,
axis=0,
new_index=[0, 1, 2, 3],
new_columns=["a", "b", "c", "d"],
keep_partitioning=True,
)

actual_row_lengths = [part.length() for part in res._partitions[:, 0]]

assert res._row_lengths_cache == actual_row_lengths
assert res._column_widths_cache is None


class DummyFuture:
"""
A dummy object emulating future's behaviour, this class is used in ``test_call_queue_serialization``.
Expand Down

0 comments on commit bef002d

Please sign in to comment.