Skip to content

Commit

Permalink
use old logic on duplicates in '.concat()'
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <[email protected]>
  • Loading branch information
dchigarev committed Nov 20, 2023
1 parent ffb32cf commit c17dacd
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
11 changes: 10 additions & 1 deletion modin/core/dataframe/pandas/metadata/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,16 @@ def concat(cls, values: list) -> "ModinDtypes":
else:
raise NotImplementedError(type(val))

desc = DtypesDescriptor.concat(preprocessed_vals)
try:
desc = DtypesDescriptor.concat(preprocessed_vals)
except NotImplementedError as e:
# 'DtypesDescriptor' doesn't support duplicated labels, however, if all values are pandas Serieses,
# we still can perform concatenation using pure pandas
if "duplicated" not in e.args[0].lower() or not all(
isinstance(val, pandas.Series) for val in values
):
raise e
desc = pandas.concat(values)
return ModinDtypes(desc)

def set_index(self, new_index: Union[pandas.Index, "ModinIndex"]) -> "ModinDtypes":
Expand Down
18 changes: 18 additions & 0 deletions modin/test/storage_formats/pandas/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1815,6 +1815,24 @@ def test_concat(self):
)
assert res.equals(exp)

def test_ModinDtypes_duplicated_concat(self):
# test that 'ModinDtypes' is able to perform dtypes concatenation on duplicated labels
# if all of them are Serieses
res = ModinDtypes.concat([pandas.Series([np.dtype(int)], index=["a"])] * 2)
assert isinstance(res._value, pandas.Series)
assert res._value.equals(
pandas.Series([np.dtype(int), np.dtype(int)], index=["a", "a"])
)

# test that 'ModinDtypes.concat' with duplicated labels raises when not all dtypes are materialized
with pytest.raises(NotImplementedError):
res = ModinDtypes.concat(
[
pandas.Series([np.dtype(int)], index=["a"]),
DtypesDescriptor(cols_with_unknown_dtypes=["a"]),
]
)

def test_update_parent(self):
"""
Test that updating parents in ``DtypesDescriptor`` also propagates to stored lazy categoricals.
Expand Down

0 comments on commit c17dacd

Please sign in to comment.