Skip to content

Commit

Permalink
PERF-#6696: Use cached dtypes in fillna when possible. (#6697)
Browse files Browse the repository at this point in the history
Signed-off-by: Andrey Pavlenko <[email protected]>
  • Loading branch information
AndreyPavlenko authored Nov 13, 2023
1 parent 8a332c1 commit 41ecc92
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,7 @@ def fillna(self, **kwargs):
method = kwargs.get("method", None)
limit = kwargs.get("limit", None)
full_axis = method is not None or limit is not None
new_dtypes = None
if isinstance(value, BaseQueryCompiler):
if squeeze_self:
# Self is a Series type object
Expand Down Expand Up @@ -2487,15 +2488,33 @@ def fillna(df):
}
return df.fillna(value=func_dict, **kwargs)

if self._modin_frame.has_materialized_dtypes:
dtypes = self._modin_frame.dtypes
value_dtypes = pandas.DataFrame(
{k: [v] for (k, v) in value.items()}
).dtypes
if all(
find_common_type([dtypes[col], dtype]) == dtypes[col]
for (col, dtype) in value_dtypes.items()
if col in dtypes
):
new_dtypes = dtypes

else:
if self._modin_frame.has_materialized_dtypes:
dtype = pandas.Series(value).dtype
if all(
find_common_type([t, dtype]) == t for t in self._modin_frame.dtypes
):
new_dtypes = self._modin_frame.dtypes

def fillna(df):
return df.fillna(value=value, **kwargs)

if full_axis:
new_modin_frame = self._modin_frame.fold(axis, fillna)
else:
new_modin_frame = self._modin_frame.map(fillna)
new_modin_frame = self._modin_frame.map(fillna, dtypes=new_dtypes)
return self.__constructor__(new_modin_frame)

def quantile_for_list_of_values(self, **kwargs):
Expand Down

0 comments on commit 41ecc92

Please sign in to comment.