diff --git a/python/xorbits/_mars/dataframe/base/transform.py b/python/xorbits/_mars/dataframe/base/transform.py index 779406811..9627fd8c7 100644 --- a/python/xorbits/_mars/dataframe/base/transform.py +++ b/python/xorbits/_mars/dataframe/base/transform.py @@ -21,7 +21,7 @@ from ...core import OutputType, recursive_tile from ...core.custom_log import redirect_custom_log from ...serialization.serializables import AnyField, BoolField, DictField, TupleField -from ...utils import enter_current_session, pd_release_version, quiet_stdio +from ...utils import enter_current_session, quiet_stdio from ..core import DATAFRAME_CHUNK_TYPE, DATAFRAME_TYPE from ..operands import DataFrameOperand, DataFrameOperandMixin from ..utils import ( @@ -33,8 +33,6 @@ validate_axis, ) -_with_convert_dtype = pd_release_version < (1, 2, 0) - class TransformOperand(DataFrameOperand, DataFrameOperandMixin): _op_type_ = opcodes.TRANSFORM @@ -246,17 +244,10 @@ def _infer_df_func_returns(self, df, dtypes): if self.call_agg: infer_df = test_df.agg(self._func, args=self.args, **self.kwds) else: - if not _with_convert_dtype: - infer_df = test_df.transform( - self._func, *self.args, **self.kwds - ) - else: # pragma: no cover - infer_df = test_df.transform( - self._func, - convert_dtype=self.convert_dtype, - args=self.args, - **self.kwds - ) + infer_df = test_df.transform( + self._func, *self.args, **self.kwds + ) + except: # noqa: E722 infer_df = None diff --git a/python/xorbits/_mars/dataframe/indexing/reindex.py b/python/xorbits/_mars/dataframe/indexing/reindex.py index 6f896416a..4d1523ca9 100644 --- a/python/xorbits/_mars/dataframe/indexing/reindex.py +++ b/python/xorbits/_mars/dataframe/indexing/reindex.py @@ -32,7 +32,7 @@ StringField, ) from ...tensor import tensor as astensor -from ...utils import lazy_import, pd_release_version +from ...utils import lazy_import from ..core import INDEX_TYPE from ..core import Index as DataFrameIndexType from ..initializer import Index as asindex @@ -42,9 +42,6 @@ cudf = lazy_import("cudf") -# under pandas<1.1, SparseArray ignores zeros on creation -_pd_sparse_miss_zero = pd_release_version[:2] < (1, 1) - class DataFrameReindex(DataFrameOperand, DataFrameOperandMixin): _op_type_ = opcodes.REINDEX @@ -274,23 +271,14 @@ def _sparse_reindex(cls, inp, index=None, columns=None): ) # convert to SparseDtype(xxx, np.nan) # to ensure 0 in sparse_array not converted to np.nan - if not _pd_sparse_miss_zero: - sparse_array = pd.arrays.SparseArray.from_spmatrix(spmatrix) - sparse_array = pd.arrays.SparseArray( - sparse_array.sp_values, - sparse_index=sparse_array.sp_index, - fill_value=np.nan, - dtype=pd.SparseDtype(sparse_array.dtype, np.nan), - ) - else: - from pandas._libs.sparse import IntIndex - - sparse_array = pd.arrays.SparseArray( - data, - sparse_index=IntIndex(index_shape, ind), - fill_value=np.nan, - dtype=pd.SparseDtype(data.dtype, np.nan), - ) + sparse_array = pd.arrays.SparseArray.from_spmatrix(spmatrix) + sparse_array = pd.arrays.SparseArray( + sparse_array.sp_values, + sparse_index=sparse_array.sp_index, + fill_value=np.nan, + dtype=pd.SparseDtype(sparse_array.dtype, np.nan), + ) + series = pd.Series(sparse_array, index=index) i_to_columns[i] = series diff --git a/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py b/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py index 53b121805..0b4cbe7da 100644 --- a/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py +++ b/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py @@ -311,9 +311,6 @@ def compute(data, **kwargs): # behavior of 'skew', 'kurt' differs for cases with and without level skip_funcs = ("skew", "kurt") - if pd_release_version <= (1, 2, 0): - # fails under pandas 1.2. see pandas-dev/pandas#38774 for more details - skip_funcs += ("sem",) if func_name not in skip_funcs: data_dict = dict((str(i), rs.rand(100)) for i in range(10)) diff --git a/python/xorbits/_mars/dataframe/window/rolling/aggregation.py b/python/xorbits/_mars/dataframe/window/rolling/aggregation.py index e4eaa1fd0..299292606 100644 --- a/python/xorbits/_mars/dataframe/window/rolling/aggregation.py +++ b/python/xorbits/_mars/dataframe/window/rolling/aggregation.py @@ -30,13 +30,12 @@ StringField, TupleField, ) -from ....utils import calc_nsplits, has_unknown_shape, lazy_import, pd_release_version +from ....utils import calc_nsplits, has_unknown_shape, lazy_import from ...core import DATAFRAME_TYPE from ...operands import DataFrameOperand, DataFrameOperandMixin from ...utils import build_empty_df, build_empty_series, parse_index cudf = lazy_import("cudf") -_with_pandas_issue_38908 = pd_release_version == (1, 2, 0) class DataFrameRollingAgg(DataFrameOperand, DataFrameOperandMixin): @@ -485,17 +484,6 @@ def execute(cls, ctx, op: "DataFrameRollingAgg"): else: data = inp - # fix for pandas 1.2.0 - # see: https://github.com/pandas-dev/pandas/issues/38908 - # df.rolling().aggregate('skew') modified original data - # so we copy it first for skew only - if ( - _with_pandas_issue_38908 - and op.func in ["skew", "kurt"] - and op.outputs[0].index[0] == 0 - ): - data = data.copy() - r = data.rolling( window=window, min_periods=op.min_periods, diff --git a/python/xorbits/_mars/lib/groupby_wrapper.py b/python/xorbits/_mars/lib/groupby_wrapper.py index ab7b0d0b4..be68a724f 100644 --- a/python/xorbits/_mars/lib/groupby_wrapper.py +++ b/python/xorbits/_mars/lib/groupby_wrapper.py @@ -26,7 +26,6 @@ cudf = lazy_import("cudf") -_HAS_SQUEEZE = pd_release_version < (1, 1, 0) _HAS_DROPNA = pd_release_version >= (1, 1, 0) _GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0) @@ -47,7 +46,6 @@ def __init__( as_index=True, sort=True, group_keys=_default_group_keys, - squeeze=False, observed=False, dropna=True, grouper_cache=None, @@ -79,7 +77,6 @@ def _is_frame_groupby(data: Any) -> bool: self.as_index = fill_value(as_index, "as_index", "_as_index") self.sort = fill_value(sort, "sort", "_sort") self.group_keys = fill_value(group_keys, "group_keys", "_group_keys") - self.squeeze = fill_value(squeeze, "squeeze") self.observed = fill_value(observed, "observed") self.dropna = fill_value(dropna, "dropna", "_dropna") @@ -92,12 +89,9 @@ def _is_frame_groupby(data: Any) -> bool: exclusions=exclusions, as_index=as_index, group_keys=group_keys, - squeeze=squeeze, observed=observed, dropna=dropna, ) - if not _HAS_SQUEEZE: # pragma: no branch - groupby_kw.pop("squeeze") if not _HAS_DROPNA: # pragma: no branch groupby_kw.pop("dropna") @@ -127,7 +121,6 @@ def __getitem__(self, item): as_index=self.as_index, sort=self.sort, group_keys=self.group_keys, - squeeze=self.squeeze, observed=self.observed, dropna=self.dropna, ) @@ -219,7 +212,6 @@ def to_tuple(self, truncate=False, pickle_function=False): self.as_index, self.sort, self.group_keys, - self.squeeze, self.observed, self.dropna, getattr(getattr(self.groupby_obj, "grouper", None), "_cache", dict()), @@ -237,7 +229,6 @@ def from_tuple(cls, tp): as_index, sort, group_keys, - squeeze, observed, dropna, grouper_cache, @@ -256,7 +247,6 @@ def from_tuple(cls, tp): as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, observed=observed, dropna=dropna, grouper_cache=grouper_cache, @@ -271,7 +261,6 @@ def wrapped_groupby( as_index=True, sort=True, group_keys=_default_group_keys, - squeeze=False, observed=False, dropna=True, ): @@ -282,12 +271,9 @@ def wrapped_groupby( as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, observed=observed, dropna=dropna, ) - if not _HAS_SQUEEZE: # pragma: no branch - groupby_kw.pop("squeeze") if not _HAS_DROPNA: # pragma: no branch groupby_kw.pop("dropna") # cudf currently not support observed, diff --git a/python/xorbits/_mars/utils.py b/python/xorbits/_mars/utils.py index 91f00ede4..b24031e7c 100644 --- a/python/xorbits/_mars/utils.py +++ b/python/xorbits/_mars/utils.py @@ -761,7 +761,6 @@ def merge_chunks(chunk_results: List[Tuple[Tuple[int], Any]]) -> Any: as_index=v.as_index, sort=v.sort, group_keys=v.group_keys, - squeeze=v.squeeze, observed=v.observed, ) return grouped.groupby_obj