Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: Delede code on pd < 1.2.2 #797

Merged
merged 11 commits into from
Aug 20, 2024
19 changes: 5 additions & 14 deletions python/xorbits/_mars/dataframe/base/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ...core import OutputType, recursive_tile
from ...core.custom_log import redirect_custom_log
from ...serialization.serializables import AnyField, BoolField, DictField, TupleField
from ...utils import enter_current_session, pd_release_version, quiet_stdio
from ...utils import enter_current_session, quiet_stdio
from ..core import DATAFRAME_CHUNK_TYPE, DATAFRAME_TYPE
from ..operands import DataFrameOperand, DataFrameOperandMixin
from ..utils import (
Expand All @@ -33,8 +33,6 @@
validate_axis,
)

_with_convert_dtype = pd_release_version < (1, 2, 0)


class TransformOperand(DataFrameOperand, DataFrameOperandMixin):
_op_type_ = opcodes.TRANSFORM
Expand Down Expand Up @@ -246,17 +244,10 @@ def _infer_df_func_returns(self, df, dtypes):
if self.call_agg:
infer_df = test_df.agg(self._func, args=self.args, **self.kwds)
else:
if not _with_convert_dtype:
infer_df = test_df.transform(
self._func, *self.args, **self.kwds
)
else: # pragma: no cover
infer_df = test_df.transform(
self._func,
convert_dtype=self.convert_dtype,
args=self.args,
**self.kwds
)
infer_df = test_df.transform(
self._func, *self.args, **self.kwds
)

except: # noqa: E722
infer_df = None

Expand Down
30 changes: 9 additions & 21 deletions python/xorbits/_mars/dataframe/indexing/reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
StringField,
)
from ...tensor import tensor as astensor
from ...utils import lazy_import, pd_release_version
from ...utils import lazy_import
from ..core import INDEX_TYPE
from ..core import Index as DataFrameIndexType
from ..initializer import Index as asindex
Expand All @@ -42,9 +42,6 @@

cudf = lazy_import("cudf")

# under pandas<1.1, SparseArray ignores zeros on creation
_pd_sparse_miss_zero = pd_release_version[:2] < (1, 1)


class DataFrameReindex(DataFrameOperand, DataFrameOperandMixin):
_op_type_ = opcodes.REINDEX
Expand Down Expand Up @@ -274,23 +271,14 @@ def _sparse_reindex(cls, inp, index=None, columns=None):
)
# convert to SparseDtype(xxx, np.nan)
# to ensure 0 in sparse_array not converted to np.nan
if not _pd_sparse_miss_zero:
sparse_array = pd.arrays.SparseArray.from_spmatrix(spmatrix)
sparse_array = pd.arrays.SparseArray(
sparse_array.sp_values,
sparse_index=sparse_array.sp_index,
fill_value=np.nan,
dtype=pd.SparseDtype(sparse_array.dtype, np.nan),
)
else:
from pandas._libs.sparse import IntIndex

sparse_array = pd.arrays.SparseArray(
data,
sparse_index=IntIndex(index_shape, ind),
fill_value=np.nan,
dtype=pd.SparseDtype(data.dtype, np.nan),
)
sparse_array = pd.arrays.SparseArray.from_spmatrix(spmatrix)
sparse_array = pd.arrays.SparseArray(
sparse_array.sp_values,
sparse_index=sparse_array.sp_index,
fill_value=np.nan,
dtype=pd.SparseDtype(sparse_array.dtype, np.nan),
)

series = pd.Series(sparse_array, index=index)

i_to_columns[i] = series
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,6 @@ def compute(data, **kwargs):

# behavior of 'skew', 'kurt' differs for cases with and without level
skip_funcs = ("skew", "kurt")
if pd_release_version <= (1, 2, 0):
# fails under pandas 1.2. see pandas-dev/pandas#38774 for more details
skip_funcs += ("sem",)

if func_name not in skip_funcs:
data_dict = dict((str(i), rs.rand(100)) for i in range(10))
Expand Down
14 changes: 1 addition & 13 deletions python/xorbits/_mars/dataframe/window/rolling/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,12 @@
StringField,
TupleField,
)
from ....utils import calc_nsplits, has_unknown_shape, lazy_import, pd_release_version
from ....utils import calc_nsplits, has_unknown_shape, lazy_import
from ...core import DATAFRAME_TYPE
from ...operands import DataFrameOperand, DataFrameOperandMixin
from ...utils import build_empty_df, build_empty_series, parse_index

cudf = lazy_import("cudf")
_with_pandas_issue_38908 = pd_release_version == (1, 2, 0)


class DataFrameRollingAgg(DataFrameOperand, DataFrameOperandMixin):
Expand Down Expand Up @@ -485,17 +484,6 @@ def execute(cls, ctx, op: "DataFrameRollingAgg"):
else:
data = inp

# fix for pandas 1.2.0
# see: https://github.com/pandas-dev/pandas/issues/38908
# df.rolling().aggregate('skew') modified original data
# so we copy it first for skew only
if (
_with_pandas_issue_38908
and op.func in ["skew", "kurt"]
and op.outputs[0].index[0] == 0
):
data = data.copy()

r = data.rolling(
window=window,
min_periods=op.min_periods,
Expand Down
14 changes: 0 additions & 14 deletions python/xorbits/_mars/lib/groupby_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

cudf = lazy_import("cudf")

_HAS_SQUEEZE = pd_release_version < (1, 1, 0)
_HAS_DROPNA = pd_release_version >= (1, 1, 0)
_GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0)

Expand All @@ -47,7 +46,6 @@ def __init__(
as_index=True,
sort=True,
group_keys=_default_group_keys,
squeeze=False,
observed=False,
dropna=True,
grouper_cache=None,
Expand Down Expand Up @@ -79,7 +77,6 @@ def _is_frame_groupby(data: Any) -> bool:
self.as_index = fill_value(as_index, "as_index", "_as_index")
self.sort = fill_value(sort, "sort", "_sort")
self.group_keys = fill_value(group_keys, "group_keys", "_group_keys")
self.squeeze = fill_value(squeeze, "squeeze")
self.observed = fill_value(observed, "observed")
self.dropna = fill_value(dropna, "dropna", "_dropna")

Expand All @@ -92,12 +89,9 @@ def _is_frame_groupby(data: Any) -> bool:
exclusions=exclusions,
as_index=as_index,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
dropna=dropna,
)
if not _HAS_SQUEEZE: # pragma: no branch
groupby_kw.pop("squeeze")
if not _HAS_DROPNA: # pragma: no branch
groupby_kw.pop("dropna")

Expand Down Expand Up @@ -127,7 +121,6 @@ def __getitem__(self, item):
as_index=self.as_index,
sort=self.sort,
group_keys=self.group_keys,
squeeze=self.squeeze,
observed=self.observed,
dropna=self.dropna,
)
Expand Down Expand Up @@ -219,7 +212,6 @@ def to_tuple(self, truncate=False, pickle_function=False):
self.as_index,
self.sort,
self.group_keys,
self.squeeze,
self.observed,
self.dropna,
getattr(getattr(self.groupby_obj, "grouper", None), "_cache", dict()),
Expand All @@ -237,7 +229,6 @@ def from_tuple(cls, tp):
as_index,
sort,
group_keys,
squeeze,
observed,
dropna,
grouper_cache,
Expand All @@ -256,7 +247,6 @@ def from_tuple(cls, tp):
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
dropna=dropna,
grouper_cache=grouper_cache,
Expand All @@ -271,7 +261,6 @@ def wrapped_groupby(
as_index=True,
sort=True,
group_keys=_default_group_keys,
squeeze=False,
observed=False,
dropna=True,
):
Expand All @@ -282,12 +271,9 @@ def wrapped_groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
dropna=dropna,
)
if not _HAS_SQUEEZE: # pragma: no branch
groupby_kw.pop("squeeze")
if not _HAS_DROPNA: # pragma: no branch
groupby_kw.pop("dropna")
# cudf currently not support observed,
Expand Down
1 change: 0 additions & 1 deletion python/xorbits/_mars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,6 @@ def merge_chunks(chunk_results: List[Tuple[Tuple[int], Any]]) -> Any:
as_index=v.as_index,
sort=v.sort,
group_keys=v.group_keys,
squeeze=v.squeeze,
observed=v.observed,
)
return grouped.groupby_obj
Expand Down
Loading