diff --git a/.github/workflows/asv.yaml b/.github/workflows/asv.yaml
index 165912b38..1e1053965 100644
--- a/.github/workflows/asv.yaml
+++ b/.github/workflows/asv.yaml
@@ -37,8 +37,7 @@ jobs:
         id: build
         shell: bash -el {0}
         run: |
-          pip install -e "git+https://github.com/xorbitsai/xoscar.git@main#subdirectory=python&egg=xoscar"
-          pip install numpy scipy cython asv coverage
+          pip install numpy scipy cython asv==0.5.1 coverage
           cd python && pip install -e ".[dev,extra]"
 
       - name: Run ASV benchmarks
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index b1c3cb8f6..a508a6f9a 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -172,7 +172,7 @@ jobs:
           ../CI/install-hadoop.sh
           echo "import coverage; coverage.process_startup()" > \
             $(python -c "import site; print(site.getsitepackages()[-1])")/coverage.pth
-          conda install --quiet --yes -c conda-forge skein libffi conda-pack
+          conda install --quiet --yes -c conda-forge skein libffi conda-pack grpcio=1.42.0
         fi
         if [[ "$MODULE" == "vineyard" ]]; then
           pip install "vineyard<0.16.1" -i https://pypi.org/simple
@@ -250,7 +250,7 @@ jobs:
     - name: Install on GPU
       if: ${{ matrix.module == 'gpu' }}
       run: |
-        pip install -e "git+https://github.com/xorbitsai/xoscar.git@main#subdirectory=python&egg=xoscar"
+        pip install -U xoscar
         python setup.py build_ext -i
       working-directory: ./python
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a7239dfcb..c1ed9e8c5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,7 +32,7 @@ repos:
       - id: prettier
         types_or: [html, javascript]
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.5
+    rev: v2.2.6
     hooks:
       - id: codespell
         exclude: _mars/lib
diff --git a/asv/asv.conf.json b/asv/asv.conf.json
index f751dc5c7..5e240b4f1 100644
--- a/asv/asv.conf.json
+++ b/asv/asv.conf.json
@@ -88,7 +88,7 @@
         "numpy": [],
         "Cython": ["0.29.24"],
         "pandas": [],
-        "scipy": [],
+        "scipy": ["1.10.0"],
         "scikit-learn": [],
         "numexpr": [],
         "cloudpickle": [],
diff --git a/python/setup.cfg b/python/setup.cfg
index 949a19ef8..ec6576c65 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -41,6 +41,7 @@ install_requires =
     tqdm>=4.1.0
     uvloop>=0.14.0; sys_platform!="win32"
     pyarrow>=5.0.0
+    fsspec>=2022.7.1,!=2022.8.0
 
 [options.packages.find]
 exclude =
@@ -84,7 +85,6 @@ doc =
 extra =
     pillow>=7.0.0
     lz4>=1.0.0
-    fsspec>=2022.7.1,!=2022.8.0
     numexpr>=2.6.4
 jax =
     jax>=0.4.0; sys.platform != "win32"
@@ -96,14 +96,11 @@ ray =
 vineyard =
     vineyard>=0.3; sys.platform != "win32"
 aws =
-    fsspec>=2022.7.1,!=2022.8.0
     s3fs
 azure =
-    fsspec>=2022.7.1,!=2022.8.0
     adlfs
 datasets =
     datasets
-    fsspec>=2022.7.1,!=2022.8.0
 
 [coverage:run]
 branch = True
diff --git a/python/xorbits/__init__.py b/python/xorbits/__init__.py
index ff1566ba1..49df2c017 100644
--- a/python/xorbits/__init__.py
+++ b/python/xorbits/__init__.py
@@ -24,6 +24,7 @@ def _install():
     from .lightgbm import _install as _install_lightgbm
     from .numpy import _install as _install_numpy
     from .pandas import _install as _install_pandas
+    from .sklearn import _install as _install_sklearn
     from .web import _install as _install_web
     from .xgboost import _install as _install_xgboost
 
@@ -34,6 +35,7 @@ def _install():
     _install_xgboost()
     _install_datasets()
     _install_experimental()
+    _install_sklearn()
 
 
 _install()
diff --git a/python/xorbits/_mars/config.py b/python/xorbits/_mars/config.py
index 505f7bed2..3dabfbe5c 100644
--- a/python/xorbits/_mars/config.py
+++ b/python/xorbits/_mars/config.py
@@ -342,9 +342,6 @@ def validate(x):
 default_options.register_option("serialize_method", "pickle")
 
 # dataframe-related options
-default_options.register_option(
-    "dataframe.mode.use_inf_as_na", False, validator=is_bool
-)
 default_options.register_option(
     "dataframe.use_arrow_dtype", None, validator=any_validator(is_null, is_bool)
 )
diff --git a/python/xorbits/_mars/core/base.py b/python/xorbits/_mars/core/base.py
index 1513a4cc8..dbc71959c 100644
--- a/python/xorbits/_mars/core/base.py
+++ b/python/xorbits/_mars/core/base.py
@@ -94,7 +94,7 @@ def __copy__(self):
         return self.copy()
 
     def copy(self):
-        return self.copy_to(type(self)(_key=self.key))
+        return self.copy_to(type(self)())
 
     def copy_to(self, target: "Base"):
         target_fields = target._FIELDS
diff --git a/python/xorbits/_mars/core/entity/tileables.py b/python/xorbits/_mars/core/entity/tileables.py
index 0a4feda6d..b43ade6ca 100644
--- a/python/xorbits/_mars/core/entity/tileables.py
+++ b/python/xorbits/_mars/core/entity/tileables.py
@@ -364,7 +364,14 @@ def __copy__(self):
     def _view(self):
         return super().copy()
 
-    def copy(self: TileableType) -> TileableType:
+    def copy(self: TileableType, **kw) -> TileableType:
+        from ...dataframe import Index
+        from ...deploy.oscar.session import SyncSession
+
+        new_name = None
+        if isinstance(self, Index):
+            new_name = kw.pop("name", None)
+
         new_op = self.op.copy()
         if new_op.create_view:
             # if the operand is a view, make it a copy
@@ -378,6 +385,24 @@ def copy(self: TileableType) -> TileableType:
         new_outs = new_op.new_tileables(
             self.op.inputs, kws=params, output_limit=len(params)
         )
+
+        sess = self._executed_sessions[-1] if self._executed_sessions else None
+        to_incref_keys = []
+        for _out in new_outs:
+            if sess:
+                _out._attach_session(sess)
+                to_incref_keys.append(_out.key)
+                if self.data in sess._tileable_to_fetch:
+                    sess._tileable_to_fetch[_out.data] = sess._tileable_to_fetch[
+                        self.data
+                    ]
+            if new_name:
+                _out.name = new_name
+
+        if to_incref_keys:
+            assert sess is not None
+            SyncSession.from_isolated_session(sess).incref(*to_incref_keys)
+
         pos = -1
         for i, out in enumerate(self.op.outputs):
             # create a ref to copied one
diff --git a/python/xorbits/_mars/dataframe/base/cartesian_chunk.py b/python/xorbits/_mars/dataframe/base/cartesian_chunk.py
index 6da9acd21..774f6747f 100644
--- a/python/xorbits/_mars/dataframe/base/cartesian_chunk.py
+++ b/python/xorbits/_mars/dataframe/base/cartesian_chunk.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
+
 import numpy as np
 import pandas as pd
 
@@ -22,11 +24,13 @@
 from ...serialization.serializables import (
     DictField,
     FunctionField,
+    Int32Field,
     KeyField,
+    StringField,
     TupleField,
 )
 from ...utils import enter_current_session, has_unknown_shape, quiet_stdio
-from ..operands import DataFrameOperand, DataFrameOperandMixin, OutputType
+from ..operands import DataFrameOperand, OutputType
 from ..utils import (
     build_df,
     build_empty_df,
@@ -34,63 +38,31 @@
     parse_index,
     validate_output_types,
 )
+from .core import DataFrameAutoMergeMixin
+
+logger = logging.getLogger(__name__)
 
 
-class DataFrameCartesianChunk(DataFrameOperand, DataFrameOperandMixin):
+class DataFrameCartesianChunk(DataFrameOperand, DataFrameAutoMergeMixin):
     _op_type_ = opcodes.CARTESIAN_CHUNK
 
-    _left = KeyField("left")
-    _right = KeyField("right")
-    _func = FunctionField("func")
-    _args = TupleField("args")
-    _kwargs = DictField("kwargs")
+    left = KeyField("left")
+    right = KeyField("right")
+    func = FunctionField("func")
+    args = TupleField("args")
+    kwargs = DictField("kwargs")
+    auto_merge = StringField("auto_merge")
+    auto_merge_threshold = Int32Field("auto_merge_threshold")
 
-    def __init__(
-        self,
-        left=None,
-        right=None,
-        func=None,
-        args=None,
-        kwargs=None,
-        output_types=None,
-        **kw
-    ):
-        super().__init__(
-            _left=left,
-            _right=right,
-            _func=func,
-            _args=args,
-            _kwargs=kwargs,
-            _output_types=output_types,
-            **kw
-        )
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
         if self.memory_scale is None:
             self.memory_scale = 2.0
 
-    @property
-    def left(self):
-        return self._left
-
-    @property
-    def right(self):
-        return self._right
-
-    @property
-    def func(self):
-        return self._func
-
-    @property
-    def args(self):
-        return self._args
-
-    @property
-    def kwargs(self):
-        return self._kwargs
-
     def _set_inputs(self, inputs):
         super()._set_inputs(inputs)
-        self._left = self._inputs[0]
-        self._right = self._inputs[1]
+        self.left = self.inputs[0]
+        self.right = self.inputs[1]
 
     @staticmethod
     def _build_test_obj(obj):
@@ -103,7 +75,7 @@ def _build_test_obj(obj):
     def __call__(self, left, right, index=None, dtypes=None):
         test_left = self._build_test_obj(left)
         test_right = self._build_test_obj(right)
-        output_type = self._output_types[0] if self._output_types else None
+        output_type = self.output_types[0] if self.output_types else None
 
         if output_type == OutputType.df_or_series:
             return self.new_df_or_series([left, right])
@@ -111,7 +83,7 @@ def __call__(self, left, right, index=None, dtypes=None):
         # try run to infer meta
         try:
             with np.errstate(all="ignore"), quiet_stdio():
-                obj = self._func(test_left, test_right, *self._args, **self._kwargs)
+                obj = self.func(test_left, test_right, *self.args, **self.kwargs)
         except:  # noqa: E722  # nosec  # pylint: disable=bare-except
             if output_type == OutputType.series:
                 obj = pd.Series([], dtype=np.dtype(object))
@@ -126,11 +98,11 @@ def __call__(self, left, right, index=None, dtypes=None):
                 )
 
         if getattr(obj, "ndim", 0) == 1 or output_type == OutputType.series:
-            shape = self._kwargs.pop("shape", (np.nan,))
+            shape = self.kwargs.pop("shape", (np.nan,))
             if index is None:
                 index = obj.index
             index_value = parse_index(
-                index, left, right, self._func, self._args, self._kwargs
+                index, left, right, self.func, self.args, self.kwargs
             )
             return self.new_series(
                 [left, right],
@@ -147,7 +119,7 @@ def __call__(self, left, right, index=None, dtypes=None):
             if index is None:
                 index = obj.index
             index_value = parse_index(
-                index, left, right, self._func, self._args, self._kwargs
+                index, left, right, self.func, self.args, self.kwargs
             )
             return self.new_dataframe(
                 [left, right],
@@ -164,6 +136,14 @@ def tile(cls, op: "DataFrameCartesianChunk"):
         out = op.outputs[0]
         out_type = op.output_types[0]
 
+        auto_merge_threshold = op.auto_merge_threshold
+        auto_merge_before, auto_merge_after = cls._get_auto_merge_options(op.auto_merge)
+
+        merge_before_res = yield from cls._merge_before(
+            op, auto_merge_before, auto_merge_threshold, left, right, logger
+        )
+        left, right = merge_before_res[0], merge_before_res[1]
+
         if left.ndim == 2 and left.chunk_shape[1] > 1:
             if has_unknown_shape(left):
                 yield
@@ -240,7 +220,12 @@ def tile(cls, op: "DataFrameCartesianChunk"):
         params["nsplits"] = tuple(tuple(ns) for ns in nsplits) if nsplits else nsplits
         params["chunks"] = out_chunks
         new_op = op.copy()
-        return new_op.new_tileables(op.inputs, kws=[params])
+        ret = new_op.new_tileables(op.inputs, kws=[params])
+
+        ret = yield from cls._merge_after(
+            op, auto_merge_after, auto_merge_threshold, ret, logger
+        )
+        return ret
 
     @classmethod
     @redirect_custom_log
@@ -250,7 +235,16 @@ def execute(cls, ctx, op: "DataFrameCartesianChunk"):
         ctx[op.outputs[0].key] = op.func(left, right, *op.args, **(op.kwargs or dict()))
 
 
-def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
+def cartesian_chunk(
+    left,
+    right,
+    func,
+    skip_infer=False,
+    args=(),
+    auto_merge: str = "both",
+    auto_merge_threshold: int = 8,
+    **kwargs,
+):
     output_type = kwargs.pop("output_type", None)
     output_types = kwargs.pop("output_types", None)
     object_type = kwargs.pop("object_type", None)
@@ -265,6 +259,10 @@ def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
     index = kwargs.pop("index", None)
     dtypes = kwargs.pop("dtypes", None)
     memory_scale = kwargs.pop("memory_scale", None)
+    if auto_merge not in ["both", "none", "before", "after"]:  # pragma: no cover
+        raise ValueError(
+            f"auto_merge can only be `both`, `none`, `before` or `after`, got {auto_merge}"
+        )
 
     op = DataFrameCartesianChunk(
         left=left,
@@ -274,5 +272,7 @@ def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
         kwargs=kwargs,
         output_types=output_types,
         memory_scale=memory_scale,
+        auto_merge=auto_merge,
+        auto_merge_threshold=auto_merge_threshold,
     )
     return op(left, right, index=index, dtypes=dtypes)
diff --git a/python/xorbits/_mars/dataframe/base/core.py b/python/xorbits/_mars/dataframe/base/core.py
index b7f529dc1..57796babc 100644
--- a/python/xorbits/_mars/dataframe/base/core.py
+++ b/python/xorbits/_mars/dataframe/base/core.py
@@ -13,9 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
+import logging
+
+from ...core import TileStatus
+from ...core.context import get_context
 from ...serialization.serializables import KeyField
+from ...typing import OperandType, TileableType
 from ..core import DATAFRAME_TYPE, SERIES_TYPE
 from ..operands import DataFrameOperand, DataFrameOperandMixin
+from ..utils import auto_merge_chunks
 
 
 class DataFrameDeviceConversionBase(DataFrameOperand, DataFrameOperandMixin):
@@ -63,3 +71,94 @@ def tile(cls, op):
         return new_op.new_tileables(
             op.inputs, chunks=out_chunks, nsplits=op.inputs[0].nsplits, **out.params
         )
+
+
+class DataFrameAutoMergeMixin(DataFrameOperandMixin):
+    @classmethod
+    def _get_auto_merge_options(cls, auto_merge: str) -> tuple[bool, bool]:
+        if auto_merge == "both":
+            return True, True
+        elif auto_merge == "none":
+            return False, False
+        elif auto_merge == "before":
+            return True, False
+        else:
+            assert auto_merge == "after"
+            return False, True
+
+    @classmethod
+    def _merge_before(
+        cls,
+        op: OperandType,
+        auto_merge_before: bool,
+        auto_merge_threshold: int,
+        left: TileableType,
+        right: TileableType,
+        logger: logging.Logger,
+    ):
+        ctx = get_context()
+
+        if (
+            auto_merge_before
+            and len(left.chunks) + len(right.chunks) > auto_merge_threshold
+        ):
+            yield TileStatus([left, right] + left.chunks + right.chunks, progress=0.2)
+            left_chunk_size = len(left.chunks)
+            right_chunk_size = len(right.chunks)
+            left = auto_merge_chunks(ctx, left)
+            right = auto_merge_chunks(ctx, right)
+            logger.info(
+                "Auto merge before %s, left data shape: %s, chunk count: %s -> %s, "
+                "right data shape: %s, chunk count: %s -> %s.",
+                op,
+                left.shape,
+                left_chunk_size,
+                len(left.chunks),
+                right.shape,
+                right_chunk_size,
+                len(right.chunks),
+            )
+        else:
+            logger.info(
+                "Skip auto merge before %s, left data shape: %s, chunk count: %d, "
+                "right data shape: %s, chunk count: %d.",
+                op,
+                left.shape,
+                len(left.chunks),
+                right.shape,
+                len(right.chunks),
+            )
+        return [left, right]
+
+    @classmethod
+    def _merge_after(
+        cls,
+        op: OperandType,
+        auto_merge_after: bool,
+        auto_merge_threshold: int,
+        ret: TileableType,
+        logger: logging.Logger,
+    ):
+        if auto_merge_after and len(ret[0].chunks) > auto_merge_threshold:
+            # if how=="inner", output data size will reduce greatly with high probability，
+            # use auto_merge_chunks to combine small chunks.
+            yield TileStatus(
+                ret[0].chunks, progress=0.8
+            )  # trigger execution for chunks
+            merged = auto_merge_chunks(get_context(), ret[0])
+            logger.info(
+                "Auto merge after %s, data shape: %s, chunk count: %s -> %s.",
+                op,
+                merged.shape,
+                len(ret[0].chunks),
+                len(merged.chunks),
+            )
+            return [merged]
+        else:
+            logger.info(
+                "Skip auto merge after %s, data shape: %s, chunk count: %d.",
+                op,
+                ret[0].shape,
+                len(ret[0].chunks),
+            )
+            return ret
diff --git a/python/xorbits/_mars/dataframe/base/tests/test_base_execution.py b/python/xorbits/_mars/dataframe/base/tests/test_base_execution.py
index 23c19e122..529812ea5 100644
--- a/python/xorbits/_mars/dataframe/base/tests/test_base_execution.py
+++ b/python/xorbits/_mars/dataframe/base/tests/test_base_execution.py
@@ -1729,12 +1729,10 @@ def test_value_counts_execution(setup):
     r = series.value_counts()
     pd.testing.assert_series_equal(r.execute().fetch(), s.value_counts())
 
-    # pandas issue: https://github.com/pandas-dev/pandas/issues/54857
-    if pd.__version__ != "2.1.0":
-        r = series.value_counts(bins=5, normalize=True)
-        pd.testing.assert_series_equal(
-            r.execute().fetch(), s.value_counts(bins=5, normalize=True)
-        )
+    r = series.value_counts(bins=5, normalize=True)
+    pd.testing.assert_series_equal(
+        r.execute().fetch(), s.value_counts(bins=5, normalize=True)
+    )
 
     # test multi chunks
     series = from_pandas_series(s, chunk_size=30)
@@ -1746,11 +1744,10 @@ def test_value_counts_execution(setup):
     pd.testing.assert_series_equal(r.execute().fetch(), s.value_counts(normalize=True))
 
     # test bins and normalize
-    if pd.__version__ != "2.1.0":
-        r = series.value_counts(method="tree", bins=5, normalize=True)
-        pd.testing.assert_series_equal(
-            r.execute().fetch(), s.value_counts(bins=5, normalize=True)
-        )
+    r = series.value_counts(method="tree", bins=5, normalize=True)
+    pd.testing.assert_series_equal(
+        r.execute().fetch(), s.value_counts(bins=5, normalize=True)
+    )
 
 
 def test_astype(setup):
@@ -3185,3 +3182,53 @@ def test_nunique(setup, method, chunked, axis):
         raw_df.nunique(axis=axis),
         mdf.nunique(axis=axis, method=method).execute().fetch(),
     )
+
+
+@pytest.mark.parametrize("chunk_size", [None, 10])
+def test_copy_deep(setup, chunk_size):
+    ns = np.random.RandomState(0)
+    df = pd.DataFrame(ns.rand(100, 10), columns=["a" + str(i) for i in range(10)])
+    mdf = from_pandas_df(df, chunk_size=chunk_size)
+
+    # test case that there is no other result between copy and origin data
+    res = mdf.copy()
+    res["a0"] = res["a0"] + 1
+    dfc = df.copy(deep=True)
+    dfc["a0"] = dfc["a0"] + 1
+    pd.testing.assert_frame_equal(res.execute().fetch(), dfc)
+    pd.testing.assert_frame_equal(mdf.execute().fetch(), df)
+
+    s = pd.Series(ns.randint(0, 100, size=(100,)))
+    ms = from_pandas_series(s, chunk_size=chunk_size)
+
+    res = ms.copy()
+    res.iloc[0] = 111.0
+    sc = s.copy(deep=True)
+    sc.iloc[0] = 111.0
+    pd.testing.assert_series_equal(res.execute().fetch(), sc)
+    pd.testing.assert_series_equal(ms.execute().fetch(), s)
+
+    index = pd.Index([i for i in range(100)], name="test")
+    m_index = from_pandas_index(index, chunk_size=chunk_size)
+
+    res = m_index.copy()
+    assert res is not m_index
+    pd.testing.assert_index_equal(res.execute().fetch(), index.copy())
+    pd.testing.assert_index_equal(m_index.execute().fetch(), index)
+
+    res = m_index.copy(name="abc")
+    pd.testing.assert_index_equal(res.execute().fetch(), index.copy(name="abc"))
+    pd.testing.assert_index_equal(m_index.execute().fetch(), index)
+
+    # test case that there is other ops between copy and origin data
+    xdf = (mdf + 1) * 2 / 7
+    expected = (df + 1) * 2 / 7
+    pd.testing.assert_frame_equal(xdf.execute().fetch(), expected)
+
+    xdf_c = xdf.copy()
+    expected_c = expected.copy(deep=True)
+    pd.testing.assert_frame_equal(xdf_c.execute().fetch(), expected)
+    xdf_c["a1"] = xdf_c["a1"] + 0.8
+    expected_c["a1"] = expected_c["a1"] + 0.8
+    pd.testing.assert_frame_equal(xdf_c.execute().fetch(), expected_c)
+    pd.testing.assert_frame_equal(xdf.execute().fetch(), expected)
diff --git a/python/xorbits/_mars/dataframe/base/value_counts.py b/python/xorbits/_mars/dataframe/base/value_counts.py
index 457250014..ae80f3db3 100644
--- a/python/xorbits/_mars/dataframe/base/value_counts.py
+++ b/python/xorbits/_mars/dataframe/base/value_counts.py
@@ -193,6 +193,9 @@ def execute(cls, ctx, op: "DataFrameValueCounts"):
             # convert CategoricalDtype which generated in `cut`
             # to IntervalDtype
             result.index = result.index.astype("interval")
+            # index name changed since pandas 2.1.1
+            if pd_release_version >= (2, 1, 1):
+                result.index.name = None
         if op.nrows:
             result = result.head(op.nrows)
         result.name = op.outputs[0].name
diff --git a/python/xorbits/_mars/dataframe/core.py b/python/xorbits/_mars/dataframe/core.py
index c4facdcd7..ff0b6d9fc 100644
--- a/python/xorbits/_mars/dataframe/core.py
+++ b/python/xorbits/_mars/dataframe/core.py
@@ -954,6 +954,16 @@ def __str__(self):
     def __repr__(self):
         return self._to_str(representation=True)
 
+    def _to_arr(self):
+        if len(self._executed_sessions) == 0:  # pragma: no cover
+            raise NotImplementedError
+
+        data = self.fetch(session=self._executed_sessions[-1])
+        return np.asarray(data)
+
+    def __array__(self):
+        return self._to_arr()
+
     def _to_mars_tensor(self, dtype=None, order="K", extract_multi_index=False):
         tensor = self.to_tensor(extract_multi_index=extract_multi_index)
         dtype = dtype if dtype is not None else tensor.dtype
@@ -1157,6 +1167,37 @@ def to_series(self, index=None, name=None):
 
         return series_from_index(self, index=index, name=name)
 
+    def copy(self, name=None, deep=False):
+        """
+        Make a copy of this object.
+
+        Name is set on the new object.
+
+        Parameters
+        ----------
+        name : Label, optional
+            Set name for new object.
+        deep : bool, default False
+
+        Returns
+        -------
+        Index
+            Index refer to new object which is a copy of this object.
+
+        Notes
+        -----
+        In most cases, there should be no functional difference from using
+        ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
+
+        Examples
+        --------
+        >>> idx = pd.Index(['a', 'b', 'c'])
+        >>> new_idx = idx.copy()
+        >>> idx is new_idx
+        False
+        """
+        return super().copy(name=name)
+
 
 class RangeIndex(Index):
     __slots__ = ()
@@ -1414,6 +1455,16 @@ def __str__(self):
     def __repr__(self):
         return self._to_str(representation=False)
 
+    def _to_arr(self):
+        if len(self._executed_sessions) == 0:  # pragma: no cover
+            raise NotImplementedError
+
+        data = self.fetch(session=self._executed_sessions[-1])
+        return np.asarray(data)
+
+    def __array__(self):
+        return self._to_arr()
+
     @property
     def dtype(self):
         return getattr(self, "_dtype", None) or getattr(self.op, "dtype", None)
@@ -1571,10 +1622,9 @@ def copy(self, deep=True):  # pylint: disable=arguments-differ
         copy : Series or DataFrame
             Object type matches caller.
         """
-        if deep:
-            return super().copy()
-        else:
-            return super()._view()
+        if deep is False:
+            raise NotImplementedError("Not support `deep=False` for now")
+        return super().copy()
 
     def __len__(self):
         return len(self._data)
@@ -2598,6 +2648,11 @@ def apply_if_callable(maybe_callable, obj, **kwargs):
             data[k] = apply_if_callable(v, data)
         return data
 
+    def copy(self, deep=True):
+        if deep is False:
+            raise NotImplementedError("Not support `deep=False` for now")
+        return super().copy()
+
 
 class DataFrameGroupByChunkData(BaseDataFrameChunkData):
     type_name = "DataFrameGroupBy"
diff --git a/python/xorbits/_mars/dataframe/datasource/core.py b/python/xorbits/_mars/dataframe/datasource/core.py
index dad9f2049..2d9b0e828 100644
--- a/python/xorbits/_mars/dataframe/datasource/core.py
+++ b/python/xorbits/_mars/dataframe/datasource/core.py
@@ -134,7 +134,10 @@ def post_tile(cls, op: OperandType, results: List[TileableType]):
         if (
             op.incremental_index
             and results is not None
-            and isinstance(results[0].index_value.value, IndexValue.RangeIndex)
+            and (
+                results[0].index_value is None
+                or isinstance(results[0].index_value.value, IndexValue.RangeIndex)
+            )
         ):
             result = results[0]
             chunks = []
@@ -159,7 +162,10 @@ def pre_execute(cls, ctx: Union[dict, Context], op: OperandType):
         out = op.outputs[0]
         if (
             op.incremental_index
-            and isinstance(out.index_value.value, IndexValue.RangeIndex)
+            and (
+                out.index_value is None
+                or isinstance(out.index_value.value, IndexValue.RangeIndex)
+            )
             and getattr(op, "incremental_index_recorder_name", None)
         ):
             index = out.index[0]
@@ -173,7 +179,10 @@ def post_execute(cls, ctx: Union[dict, Context], op: OperandType):
         result = ctx[out.key]
         if (
             op.incremental_index
-            and isinstance(out.index_value.value, IndexValue.RangeIndex)
+            and (
+                out.index_value is None
+                or isinstance(out.index_value.value, IndexValue.RangeIndex)
+            )
             and getattr(op, "incremental_index_recorder_name", None)
         ):
             recorder_name = op.incremental_index_recorder_name
diff --git a/python/xorbits/_mars/dataframe/datasource/read_csv.py b/python/xorbits/_mars/dataframe/datasource/read_csv.py
index b0ea4666e..43a29c47e 100644
--- a/python/xorbits/_mars/dataframe/datasource/read_csv.py
+++ b/python/xorbits/_mars/dataframe/datasource/read_csv.py
@@ -103,6 +103,7 @@ class DataFrameReadCSV(
     sep = StringField("sep")
     header = AnyField("header")
     index_col = Int32Field("index_col")
+    index_names = ListField("index_names")
     skiprows = Int32Field("skiprows")
     compression = StringField("compression")
     usecols = AnyField("usecols")
@@ -114,6 +115,7 @@ class DataFrameReadCSV(
     storage_options = DictField("storage_options")
     merge_small_files = BoolField("merge_small_files")
     merge_small_file_options = DictField("merge_small_file_options")
+    is_http_url = BoolField("is_http_url", None)
 
     def get_columns(self):
         return self.usecols
@@ -150,8 +152,32 @@ def _tile_compressed(cls, op):
             nsplits=nsplits,
         )
 
+    @classmethod
+    def _tile_http_url(cls, op: "DataFrameReadCSV"):
+        out_chunks = []
+        out_df = op.outputs[0]
+        for i, url in enumerate(op.path):
+            chunk_op = op.copy().reset_key()
+            chunk_op.path = url
+            out_chunks.append(
+                chunk_op.new_chunk(None, index=(i, 0), shape=(np.nan, np.nan))
+            )
+        new_op = op.copy()
+        nsplits = ((np.nan,) * len(out_chunks), (np.nan,))
+        return new_op.new_dataframes(
+            None,
+            out_df.shape,
+            dtypes=out_df.dtypes,
+            index_value=out_df.index_value,
+            columns_value=out_df.columns_value,
+            chunks=out_chunks,
+            nsplits=nsplits,
+        )
+
     @classmethod
     def _tile(cls, op: "DataFrameReadCSV"):
+        if op.is_http_url:
+            return cls._tile_http_url(op)
         if op.compression:
             return cls._tile_compressed(op)
 
@@ -266,6 +292,7 @@ def _pandas_read_csv(cls, f, op):
                 nrows=op.nrows,
                 **csv_kwargs,
             )
+            df.index.names = op.index_names
             if op.keep_usecols_order:
                 df = df[op.usecols]
         return df
@@ -300,8 +327,34 @@ def _cudf_read_csv(cls, op):  # pragma: no cover
             df = df[op.usecols]
         return df
 
+    @classmethod
+    def _execute_http_url(cls, ctx, op):
+        xdf = cudf if op.gpu else pd
+        out_df = op.outputs[0]
+        csv_kwargs = op.extra_params.copy()
+        if xdf is pd and op.use_arrow_dtype:
+            csv_kwargs.update(arrow_dtype_kwargs())
+        df = xdf.read_csv(
+            op.path,
+            sep=op.sep,
+            names=op.names,
+            header=op.header,
+            index_col=op.index_col,
+            usecols=op.usecols,
+            nrows=op.nrows,
+            compression=op.compression,
+            **csv_kwargs,
+        )
+        if op.keep_usecols_order:
+            df = df[op.usecols]
+        ctx[out_df.key] = df
+
     @classmethod
     def execute(cls, ctx, op):
+        if op.is_http_url:
+            cls._execute_http_url(ctx, op)
+            return
+
         xdf = cudf if op.gpu else pd
         out_df = op.outputs[0]
         csv_kwargs = op.extra_params.copy()
@@ -330,6 +383,8 @@ def execute(cls, ctx, op):
         ctx[out_df.key] = df
 
     def estimate_size(cls, ctx, op):
+        if op.is_http_url:
+            return super().estimate_size(ctx, op)
         phy_size = op.size * (op.memory_scale or 1)
         ctx[op.outputs[0].key] = (phy_size, phy_size * 2)
 
@@ -337,7 +392,10 @@ def __call__(
         self, index_value=None, columns_value=None, dtypes=None, chunk_bytes=None
     ):
         self._output_types = [OutputType.dataframe]
-        shape = (np.nan, len(dtypes))
+        if dtypes is not None:
+            shape = (np.nan, len(dtypes))
+        else:
+            shape = (np.nan, np.nan)
         return self.new_dataframe(
             None,
             shape,
@@ -352,7 +410,7 @@ def read_csv(
     path: str,
     names: Union[List, Tuple] = None,
     sep: str = ",",
-    index_col: int = None,
+    index_col: Union[int, str, List[int], List[str]] = None,
     compression: str = None,
     header: Union[str, List] = "infer",
     dtype: Union[str, Dict] = None,
@@ -657,6 +715,33 @@ def read_csv(
     """
     if use_arrow_dtype is None:
         use_arrow_dtype = options.dataframe.use_arrow_dtype
+
+    single_path = path[0] if isinstance(path, (list, tuple)) else path
+    if isinstance(single_path, str) and (
+        single_path.startswith("http://") or single_path.startswith("https://")
+    ):
+        urls = path if isinstance(path, (list, tuple)) else [path]
+        op = DataFrameReadCSV(
+            path=urls,
+            names=names,
+            sep=sep,
+            header=header,
+            index_col=index_col,
+            usecols=usecols,
+            skiprows=skiprows,
+            compression=compression,
+            gpu=gpu,
+            incremental_index=incremental_index,
+            use_arrow_dtype=use_arrow_dtype,
+            storage_options=storage_options,
+            memory_scale=memory_scale,
+            merge_small_files=merge_small_files,
+            merge_small_file_options=merge_small_file_options,
+            is_http_url=True,
+            **kwargs,
+        )
+        return op()
+
     # infer dtypes and columns
     if isinstance(path, (list, tuple)):
         file_path = path[0]
@@ -709,8 +794,8 @@ def read_csv(
     else:
         index_value = parse_index(mini_df.index)
     columns_value = parse_index(mini_df.columns, store_data=True)
-    if index_col and not isinstance(index_col, int):
-        index_col = list(mini_df.columns).index(index_col)
+    # Set names and index_col may lose multiindex names, so we have to fix it.
+    index_names = mini_df.index.names
 
     # convert path to abs_path
     abs_path = convert_to_abspath(path, storage_options)
@@ -721,6 +806,7 @@ def read_csv(
         sep=sep,
         header=header,
         index_col=index_col,
+        index_names=index_names,
         usecols=usecols,
         skiprows=skiprows,
         compression=compression,
diff --git a/python/xorbits/_mars/dataframe/datasource/read_parquet.py b/python/xorbits/_mars/dataframe/datasource/read_parquet.py
index bd4e65b39..a7d317095 100644
--- a/python/xorbits/_mars/dataframe/datasource/read_parquet.py
+++ b/python/xorbits/_mars/dataframe/datasource/read_parquet.py
@@ -51,7 +51,7 @@
 )
 from ...utils import is_object_dtype, lazy_import
 from ..operands import OutputType
-from ..utils import arrow_dtype_kwargs, parse_index
+from ..utils import PD_VERSION_GREATER_THAN_2_10, arrow_dtype_kwargs, parse_index
 from .core import (
     ColumnPruneSupportedDataSourceMixin,
     IncrementalIndexDatasource,
@@ -391,8 +391,7 @@ def _tile_no_partitioned(cls, op: "DataFrameReadParquet"):
             paths = sorted(paths)
             if not isinstance(fs, fsspec.implementations.local.LocalFileSystem):
                 parsed_path = urlparse(op.path)
-                path_prefix = f"{parsed_path.scheme}://{parsed_path.netloc}"
-                paths = [path_prefix + path for path in paths]
+                paths = [f"{parsed_path.scheme}://{path}" for path in paths]
         elif isinstance(op.path, str) and op.path.endswith(".zip"):
             file = fs.open(op.path, storage_options=op.storage_options)
             z = zipfile.ZipFile(file)
@@ -406,8 +405,7 @@ def _tile_no_partitioned(cls, op: "DataFrameReadParquet"):
             paths = fs.glob(op.path, storage_options=op.storage_options)
             if not isinstance(fs, fsspec.implementations.local.LocalFileSystem):
                 parsed_path = urlparse(op.path)
-                path_prefix = f"{parsed_path.scheme}://{parsed_path.netloc}"
-                paths = [path_prefix + path for path in paths]
+                paths = [f"{parsed_path.scheme}://{path}" for path in paths]
         first_chunk_row_num, first_chunk_raw_bytes = None, None
         for i, pth in enumerate(paths):
             if i == 0:
@@ -416,7 +414,7 @@ def _tile_no_partitioned(cls, op: "DataFrameReadParquet"):
                         first_chunk_row_num = get_engine(op.engine).get_row_num(f)
                         first_chunk_raw_bytes = sys.getsizeof(f)
                 else:
-                    of = fsspec.open(pth, storage_options=op.storage_options)
+                    of = fsspec.open(pth)
                     with of as f:
                         first_chunk_row_num = get_engine(op.engine).get_row_num(f)
                     first_chunk_raw_bytes = fsspec.get_fs_token_paths(
@@ -778,7 +776,7 @@ def read_parquet(
         If index_col not specified, ensure range index incremental,
         gain a slightly better performance if setting False.
     use_arrow_dtype: bool, default None
-        If True, use arrow dtype to store columns.
+        If True, use arrow dtype to store columns. Default enabled if pandas >= 2.1
     storage_options: dict, optional
         Options for storage connection.
     memory_scale: int, optional
@@ -798,6 +796,10 @@ def read_parquet(
     engine_type = check_engine(engine)
     engine = get_engine(engine_type)
 
+    # We enable arrow dtype by default if pandas >= 2.1
+    if use_arrow_dtype is None and engine_type == "pyarrow":
+        use_arrow_dtype = PD_VERSION_GREATER_THAN_2_10
+
     single_path = path[0] if isinstance(path, list) else path
     is_partitioned = False
     if isinstance(single_path, str) and (
@@ -830,6 +832,10 @@ def read_parquet(
         raise ValueError(
             f"The 'use_arrow_dtype' argument is not supported for the {engine_type} engine"
         )
+    # We enable arrow dtype by default if pandas >= 2.1
+    if use_arrow_dtype is None:
+        use_arrow_dtype = PD_VERSION_GREATER_THAN_2_10
+
     types_mapper = pd.ArrowDtype if use_arrow_dtype else None
 
     if fs.isdir(single_path):
@@ -849,11 +855,6 @@ def read_parquet(
     else:
         if not isinstance(path, list):
             file_path = fs.glob(path, storage_options=storage_options)[0]
-            if not isinstance(fs, fsspec.implementations.local.LocalFileSystem):
-                parsed_path = urlparse(path)
-                path_prefix = f"{parsed_path.scheme}://{parsed_path.netloc}"
-                file_path = path_prefix + file_path
-
         else:
             file_path = path[0]
         with fs.open(file_path, storage_options=storage_options) as f:
diff --git a/python/xorbits/_mars/dataframe/datasource/tests/test_datasource.py b/python/xorbits/_mars/dataframe/datasource/tests/test_datasource.py
index 074f5f1cb..7bb9358ae 100644
--- a/python/xorbits/_mars/dataframe/datasource/tests/test_datasource.py
+++ b/python/xorbits/_mars/dataframe/datasource/tests/test_datasource.py
@@ -674,14 +674,14 @@ def test_read_parquet_estimate_size():
         chunk = tiled.chunks[0]
         estimate_size(sizes, chunk.op)
         estimated_size = sizes[chunk.key][0]
-        assert estimated_size > test_df.memory_usage(deep=True).sum() * 1.5
+        assert estimated_size >= test_df.memory_usage(deep=True).sum() * 1.5
 
         df = read_parquet(file_path, columns=["a", "c"])
         tiled = tile(df)
         sizes = dict()
         chunk = tiled.chunks[0]
         estimate_size(sizes, chunk.op)
-        assert sizes[chunk.key][0] < estimated_size * (2 / 3)
+        assert sizes[chunk.key][0] <= estimated_size * (2 / 3)
 
         df = read_parquet(file_path, use_arrow_dtype=True)
         tiled = tile(df)
@@ -690,7 +690,7 @@ def test_read_parquet_estimate_size():
         estimate_size(sizes, chunk.op)
         estimated_size_arrow = sizes[chunk.key][0]
         estimated_size_arrow < estimated_size
-        assert estimated_size_arrow > test_df.memory_usage(deep=True).sum() * 1.5
+        assert estimated_size_arrow >= test_df.memory_usage(deep=True).sum() * 1.5
 
         df = read_parquet(file_path, use_arrow_dtype=True, columns=["a", "c"])
         tiled = tile(df)
@@ -698,4 +698,4 @@ def test_read_parquet_estimate_size():
         chunk = tiled.chunks[0]
         estimate_size(sizes, chunk.op)
         estimated_size_arrow = sizes[chunk.key][0]
-        assert sizes[chunk.key][0] < estimated_size * 2 / 3
+        assert sizes[chunk.key][0] <= estimated_size * 2 / 3
diff --git a/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_execution.py b/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_execution.py
index f7d6ff18d..1e7148934 100644
--- a/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_execution.py
+++ b/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_execution.py
@@ -48,7 +48,7 @@
 from ....config import option_context
 from ....tests.core import require_cudf, require_cupy
 from ....utils import get_next_port, pd_release_version
-from ...utils import is_pandas_2
+from ...utils import PD_VERSION_GREATER_THAN_2_10, is_pandas_2
 from ..dataframe import from_pandas as from_pandas_df
 from ..from_records import from_records
 from ..from_tensor import dataframe_from_1d_tileables, dataframe_from_tensor
@@ -606,6 +606,10 @@ def test_read_csv_execution(setup):
         mdf2 = md.read_csv(file_path, index_col=0, chunk_bytes=100).execute().fetch()
         pd.testing.assert_frame_equal(pdf, mdf2)
 
+        mdf3 = md.read_csv(file_path, index_col=[0, 1]).execute().fetch()
+        pdf3 = pd.read_csv(file_path, index_col=[0, 1])
+        pd.testing.assert_frame_equal(pdf3, mdf3)
+
     # test nan
     with tempfile.TemporaryDirectory() as tempdir:
         file_path = os.path.join(tempdir, "test.csv")
@@ -1295,6 +1299,8 @@ def test_read_parquet_arrow(setup, engine):
             "c": np.random.rand(10),
         }
     )
+    if PD_VERSION_GREATER_THAN_2_10 and engine != "fastparquet":
+        test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
 
     with tempfile.TemporaryDirectory() as tempdir:
         file_path = os.path.join(tempdir, "test.parquet")
@@ -1352,6 +1358,9 @@ def test_read_parquet_arrow(setup, engine):
                 }
             )
 
+            if PD_VERSION_GREATER_THAN_2_10 and engine != "fastparquet":
+                df = df.convert_dtypes(dtype_backend="pyarrow")
+
             file_paths = [os.path.join(tempdir, f"test{i}.parquet") for i in range(3)]
             df[:100].to_parquet(file_paths[0], row_group_size=50)
             df[100:200].to_parquet(file_paths[1], row_group_size=30)
@@ -1447,6 +1456,8 @@ def test_read_parquet_zip(setup, engine):
                 "c": np.random.rand(300),
             }
         )
+        if PD_VERSION_GREATER_THAN_2_10 and engine != "fastparquet":
+            df = df.convert_dtypes(dtype_backend="pyarrow")
 
         file_paths = [os.path.join(tempdir, f"test{i}.parquet") for i in range(3)]
         df[:100].to_parquet(file_paths[0], row_group_size=50)
@@ -1545,7 +1556,9 @@ def test_read_parquet_fast_parquet(setup):
         # assert sum(s[0] for s in size_res) > test_df.memory_usage(deep=True).sum()
 
 
-def _start_tornado(port: int, file_path0: str, file_path1: str, zip_path: str):
+def _start_tornado(
+    port: int, file_path0: str, file_path1: str, csv_path: str, zip_path: str
+):
     import tornado.ioloop
     import tornado.web
 
@@ -1559,6 +1572,11 @@ def get(self):
             with open(file_path1, "rb") as f:
                 self.write(f.read())
 
+    class CSVHandler(tornado.web.RequestHandler):
+        def get(self):
+            with open(csv_path, "rb") as f:
+                self.write(f.read())
+
     class RangeZipFileHandler(tornado.web.RequestHandler):
         def get(self):
             file_path = zip_path
@@ -1596,6 +1614,7 @@ def parse_range_header(self, range_header):
             (r"/read-parquet0", Parquet0Handler),
             (r"/read-parquet1", Parquet1Handler),
             (r"/test.zip", RangeZipFileHandler),
+            (r"/read-csv", CSVHandler),
         ]
     )
     app.listen(port)
@@ -1607,6 +1626,7 @@ def start_http_server():
     with tempfile.TemporaryDirectory() as tempdir:
         file_path0 = os.path.join(tempdir, "test0.parquet")
         file_path1 = os.path.join(tempdir, "test1.parquet")
+        csv_path = os.path.join(tempdir, "test.csv")
 
         df = pd.DataFrame(
             {
@@ -1617,6 +1637,7 @@ def start_http_server():
         )
         df.iloc[:50].to_parquet(file_path0)
         df.iloc[50:].to_parquet(file_path1)
+        df.to_csv(csv_path)
         import zipfile
 
         zip_path = os.path.join(tempdir, "test.zip")
@@ -1627,7 +1648,8 @@ def start_http_server():
 
         port = get_next_port()
         proc = multiprocessing.Process(
-            target=_start_tornado, args=(port, file_path0, file_path1, zip_path)
+            target=_start_tornado,
+            args=(port, file_path0, file_path1, csv_path, zip_path),
         )
         proc.daemon = True
         proc.start()
@@ -1635,13 +1657,15 @@ def start_http_server():
         yield df, [
             f"http://127.0.0.1:{port}/read-parquet0",
             f"http://127.0.0.1:{port}/read-parquet1",
-        ], f"http://127.0.0.1:{port}/test.zip"
+        ], f"http://127.0.0.1:{port}/test.zip", f"http://127.0.0.1:{port}/read-csv"
         # Terminate the process
         proc.terminate()
 
 
 def test_read_parquet_with_http_url(setup, start_http_server):
-    df, urls, zip_url = start_http_server
+    df, urls, zip_url, _ = start_http_server
+    if PD_VERSION_GREATER_THAN_2_10:
+        df = df.convert_dtypes(dtype_backend="pyarrow")
     mdf = md.read_parquet(urls).execute().fetch()
     pd.testing.assert_frame_equal(df, mdf)
     if is_pandas_2():
@@ -1766,6 +1790,8 @@ def test_read_parquet_ftp(ftp_writable, setup):
     host, port, user, pw = ftp_writable
     data = {"Column1": [1, 2, 3], "Column2": ["A", "B", "C"]}
     df = pd.DataFrame(data)
+    if PD_VERSION_GREATER_THAN_2_10:
+        df = df.convert_dtypes(dtype_backend="pyarrow")
     with tempfile.TemporaryDirectory() as tempdir:
         local_file_path = os.path.join(tempdir, "test.parquet")
         df.to_parquet("ftp://{}:{}@{}:{}/test.parquet".format(user, pw, host, port))
@@ -1788,3 +1814,30 @@ def test_read_parquet_ftp(ftp_writable, setup):
             "ftp://{}:{}@{}:{}/test.zip".format(user, pw, host, port)
         )
         pd.testing.assert_frame_equal(df, mdf_zip.to_pandas())
+
+
+def test_read_csv_http_url(setup, start_http_server):
+    df, _, _, csv_url = start_http_server
+    mdf = md.read_csv(csv_url)
+    pd.testing.assert_frame_equal(pd.read_csv(csv_url), mdf.execute().fetch())
+
+    mdf = md.read_csv(csv_url, names=["col1", "col2", "col3"])
+    pd.testing.assert_frame_equal(
+        pd.read_csv(csv_url, names=["col1", "col2", "col3"]), mdf.execute().fetch()
+    )
+
+    mdf = md.read_csv(csv_url, header=0)
+    pd.testing.assert_frame_equal(pd.read_csv(csv_url, header=0), mdf.execute().fetch())
+
+    mdf = md.read_csv(csv_url, header=None)
+    pd.testing.assert_frame_equal(
+        pd.read_csv(csv_url, header=None), mdf.execute().fetch()
+    )
+
+    if is_pandas_2():
+        df = df.convert_dtypes(dtype_backend="pyarrow")
+        mdf = md.read_csv(csv_url, use_arrow_dtype=True).execute().fetch()
+        pd.testing.assert_frame_equal(
+            pd.read_csv(csv_url, dtype_backend="pyarrow"), mdf
+        )
+        assert isinstance(mdf.dtypes.iloc[1], pd.ArrowDtype)
diff --git a/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_hdfs.py b/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_hdfs.py
index 29afdc4be..8bbbf1478 100644
--- a/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_hdfs.py
+++ b/python/xorbits/_mars/dataframe/datasource/tests/test_datasource_hdfs.py
@@ -21,6 +21,7 @@
 import pytest
 
 from .... import dataframe as md
+from ....dataframe.utils import PD_VERSION_GREATER_THAN_2_10
 from ....tests.core import require_hadoop
 
 TEST_DIR = "/tmp/test"
@@ -124,7 +125,9 @@ def test_read_parquet_execution(setup, setup_hdfs):
 
     df = md.read_parquet(f"hdfs://localhost:8020{TEST_DIR}/test.parquet")
     res = df.to_pandas()
-    pd.testing.assert_frame_equal(res, test_df)
+    if PD_VERSION_GREATER_THAN_2_10:
+        expected = test_df.convert_dtypes(dtype_backend="pyarrow")
+    pd.testing.assert_frame_equal(res, expected)
 
     hdfs.mkdir(f"{TEST_DIR}/test_partitioned")
 
@@ -139,4 +142,7 @@ def test_read_parquet_execution(setup, setup_hdfs):
 
     df = md.read_parquet(f"hdfs://localhost:8020{TEST_DIR}/test_partitioned")
     res = df.to_pandas()
+    if PD_VERSION_GREATER_THAN_2_10:
+        test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
+        test_df2 = test_df2.convert_dtypes(dtype_backend="pyarrow")
     pd.testing.assert_frame_equal(res, pd.concat([test_df, test_df2]))
diff --git a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py
index 1ffdeaaac..485fa252b 100644
--- a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py
+++ b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py
@@ -40,6 +40,7 @@
 from .... import dataframe as md
 from ....tests.core import flaky
 from ... import DataFrame
+from ...utils import PD_VERSION_GREATER_THAN_2_10
 
 
 def test_to_csv_execution(setup):
@@ -182,7 +183,12 @@ def test_to_parquet_arrow_execution(setup):
         read_df = md.read_parquet(path)
         result = read_df.execute().fetch()
         result = result.sort_index()
-        pd.testing.assert_frame_equal(result, raw)
+        if PD_VERSION_GREATER_THAN_2_10:
+            expected = raw.convert_dtypes(dtype_backend="pyarrow")
+        else:
+            expected = raw
+
+        pd.testing.assert_frame_equal(result, expected)
 
         # test read_parquet then to_parquet
         read_df = md.read_parquet(path)
@@ -195,9 +201,11 @@ def test_to_parquet_arrow_execution(setup):
         read_df = md.read_parquet(path)
         result = read_df.execute().fetch()
         result["col3"] = result["col3"].astype("object")
+        if PD_VERSION_GREATER_THAN_2_10:
+            expected["col3"] = expected["col3"].astype("object")
         pd.testing.assert_frame_equal(
             result.sort_values("col1").reset_index(drop=True),
-            raw.sort_values("col1").reset_index(drop=True),
+            expected.sort_values("col1").reset_index(drop=True),
         )
 
 
diff --git a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_hdfs.py b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_hdfs.py
index 0341aab3d..78c5b75ae 100644
--- a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_hdfs.py
+++ b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_hdfs.py
@@ -19,6 +19,7 @@
 
 from .... import dataframe as md
 from ....tests.core import require_hadoop
+from ...utils import PD_VERSION_GREATER_THAN_2_10
 
 TEST_DIR = "/tmp/test"
 
@@ -55,6 +56,9 @@ def test_to_parquet_execution(setup, setup_hdfs):
     hdfs.mkdir(dir_name)
     df.to_parquet(dir_name).execute()
 
+    if PD_VERSION_GREATER_THAN_2_10:
+        test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
+
     result = md.read_parquet(dir_name).to_pandas()
     pd.testing.assert_frame_equal(result.reset_index(drop=True), test_df)
 
diff --git a/python/xorbits/_mars/dataframe/groupby/__init__.py b/python/xorbits/_mars/dataframe/groupby/__init__.py
index 4b76ca6c4..c4369be4e 100644
--- a/python/xorbits/_mars/dataframe/groupby/__init__.py
+++ b/python/xorbits/_mars/dataframe/groupby/__init__.py
@@ -26,6 +26,7 @@ def _install():
     from .fill import bfill, ffill, fillna
     from .getitem import df_groupby_getitem
     from .head import head
+    from .nth import nth
 
     # Just for enabling custom agg function registration.
     # Therefore, del this immediately after import.
@@ -73,6 +74,7 @@ def _install():
         setattr(cls, "cumsum", cumsum)
 
         setattr(cls, "head", head)
+        setattr(cls, "nth", nth)
 
         setattr(cls, "rolling", rolling)
 
diff --git a/python/xorbits/_mars/dataframe/groupby/aggregation.py b/python/xorbits/_mars/dataframe/groupby/aggregation.py
index 1d67e0305..1d038944b 100644
--- a/python/xorbits/_mars/dataframe/groupby/aggregation.py
+++ b/python/xorbits/_mars/dataframe/groupby/aggregation.py
@@ -27,10 +27,10 @@
 from ...core import ENTITY_TYPE, OutputType
 from ...core.context import get_context
 from ...core.custom_log import redirect_custom_log
+from ...core.entity.utils import recursive_tile
 from ...core.operand import OperandStage
 from ...serialization.serializables import (
     AnyField,
-    BoolField,
     DictField,
     Int32Field,
     Int64Field,
@@ -170,7 +170,6 @@ class DataFrameGroupByAgg(DataFrameOperand, DataFrameOperandMixin):
     groupby_params = DictField("groupby_params")
 
     method = StringField("method")
-    use_inf_as_na = BoolField("use_inf_as_na")
 
     # for chunk
     combine_size = Int32Field("combine_size")
@@ -482,6 +481,7 @@ def _gen_map_chunks(
             # force as_index=True for map phase
             map_op.output_types = op.output_types
             map_op.groupby_params = map_op.groupby_params.copy()
+            map_op.raw_groupby_params = map_op.raw_groupby_params.copy()
             map_op.groupby_params["as_index"] = True
             if isinstance(map_op.groupby_params["by"], list):
                 by = []
@@ -493,6 +493,7 @@ def _gen_map_chunks(
                     else:
                         by.append(v)
                 map_op.groupby_params["by"] = by
+                map_op.raw_groupby_params["by"] = by
             map_op.stage = OperandStage.map
             map_op.pre_funcs = func_infos.pre_funcs
             map_op.agg_funcs = func_infos.agg_funcs
@@ -928,6 +929,20 @@ def tile(cls, op: "DataFrameGroupByAgg"):
             in_df = build_concatenated_rows_frame(in_df)
         out_df = op.outputs[0]
 
+        by = op.groupby_params["by"]
+        in_df_nsplits_settled: bool = all([not np.isnan(v) for v in in_df.nsplits[0]])
+        if isinstance(by, list):
+            for i, _by in enumerate(by):
+                if (
+                    isinstance(_by, ENTITY_TYPE)
+                    and all([not np.isnan(v) for v in _by.nsplits[0]])
+                    and in_df_nsplits_settled
+                ):
+                    by[i] = yield from recursive_tile(
+                        _by.rechunk({0: in_df.nsplits[0]})
+                    )
+                    yield by[i].chunks
+
         func_infos = cls._compile_funcs(op, in_df)
 
         if op.method == "auto":
@@ -945,6 +960,10 @@ def tile(cls, op: "DataFrameGroupByAgg"):
         else:  # pragma: no cover
             raise NotImplementedError
 
+    @classmethod
+    def _get_new_by_data(cls, by: List, ctx: Dict):
+        return [ctx[v.key] if isinstance(v, ENTITY_TYPE) else v for v in by]
+
     @classmethod
     def _get_grouped(cls, op: "DataFrameGroupByAgg", df, ctx, copy=False, grouper=None):
         if copy:
@@ -958,13 +977,7 @@ def _get_grouped(cls, op: "DataFrameGroupByAgg", df, ctx, copy=False, grouper=No
             params["by"] = grouper
             params.pop("level", None)
         elif isinstance(params.get("by"), list):
-            new_by = []
-            for v in params["by"]:
-                if isinstance(v, ENTITY_TYPE):
-                    new_by.append(ctx[v.key])
-                else:
-                    new_by.append(v)
-            params["by"] = new_by
+            params["by"] = cls._get_new_by_data(params["by"], ctx)
 
         grouped = df.groupby(**params)
 
@@ -986,10 +999,23 @@ def _pack_inputs(agg_funcs: List[ReductionAggStep], in_data):
             pos += step.output_limit
         return out_dict
 
-    @staticmethod
+    @classmethod
     def _do_custom_agg(
-        func_name: str, op: "DataFrameGroupByAgg", in_data: pd.DataFrame
+        cls, func_name: str, op: "DataFrameGroupByAgg", in_data: pd.DataFrame, ctx: Dict
     ) -> Union[pd.Series, pd.DataFrame]:
+        # Must be tuple way, like x=('col', 'agg_func_name')
+        # See `is_funcs_aggregate` func,
+        # if not this way, the code doesn't go here or switch to transform execution.
+        if op.raw_func is None:
+            func_name = list(op.raw_func_kw.values())[0][1]
+        if (
+            func_name == "nunique"
+            and "by" in op.groupby_params
+            and isinstance(op.groupby_params["by"], list)
+        ):
+            op.raw_groupby_params["by"] = cls._get_new_by_data(
+                op.groupby_params["by"], ctx
+            )
         if op.stage == OperandStage.map:
             return custom_agg_functions[func_name].execute_map(op, in_data)
         elif op.stage == OperandStage.combine:
@@ -1107,7 +1133,7 @@ def _wrapped_func(col):
         ) in op.agg_funcs:
             input_obj = ret_map_groupbys[input_key]
             if map_func_name == "custom_reduction":
-                agg_dfs.append(cls._do_custom_agg(raw_func_name, op, in_data))
+                agg_dfs.append(cls._do_custom_agg(raw_func_name, op, in_data, ctx))
             else:
                 single_func = map_func_name == op.raw_func
                 agg_dfs.append(
@@ -1155,7 +1181,7 @@ def _execute_combine(cls, ctx, op: "DataFrameGroupByAgg"):
         ) in zip(ctx[op.inputs[0].key], op.agg_funcs):
             input_obj = in_data_dict[output_key]
             if agg_func_name == "custom_reduction":
-                combines.append(cls._do_custom_agg(raw_func_name, op, raw_input))
+                combines.append(cls._do_custom_agg(raw_func_name, op, raw_input, ctx))
             else:
                 combines.append(
                     cls._do_predefined_agg(input_obj, agg_func_name, gpu=op.gpu, **kwds)
@@ -1196,7 +1222,7 @@ def _execute_agg(cls, ctx, op: "DataFrameGroupByAgg"):
         ) in op.agg_funcs:
             if agg_func_name == "custom_reduction":
                 in_data_dict[output_key] = cls._do_custom_agg(
-                    raw_func_name, op, in_data_dict[output_key]
+                    raw_func_name, op, in_data_dict[output_key], ctx
                 )
             else:
                 input_obj = cls._get_grouped(op, in_data_dict[output_key], ctx)
@@ -1286,18 +1312,14 @@ def _execute_agg(cls, ctx, op: "DataFrameGroupByAgg"):
     @redirect_custom_log
     @enter_current_session
     def execute(cls, ctx, op: "DataFrameGroupByAgg"):
-        try:
-            pd.set_option("mode.use_inf_as_na", op.use_inf_as_na)
-            if op.stage == OperandStage.map:
-                cls._execute_map(ctx, op)
-            elif op.stage == OperandStage.combine:
-                cls._execute_combine(ctx, op)
-            elif op.stage == OperandStage.agg:
-                cls._execute_agg(ctx, op)
-            else:  # pragma: no cover
-                raise ValueError("Aggregation operand not executable")
-        finally:
-            pd.reset_option("mode.use_inf_as_na")
+        if op.stage == OperandStage.map:
+            cls._execute_map(ctx, op)
+        elif op.stage == OperandStage.combine:
+            cls._execute_combine(ctx, op)
+        elif op.stage == OperandStage.agg:
+            cls._execute_agg(ctx, op)
+        else:  # pragma: no cover
+            raise ValueError("Aggregation operand not executable")
 
 
 def agg(groupby, func=None, method="auto", combine_size=None, *args, **kwargs):
@@ -1355,8 +1377,6 @@ def agg(groupby, func=None, method="auto", combine_size=None, *args, **kwargs):
             func, *args, _call_agg=True, index=index_value, **kwargs
         )
 
-    use_inf_as_na = kwargs.pop("_use_inf_as_na", options.dataframe.mode.use_inf_as_na)
-
     agg_op = DataFrameGroupByAgg(
         raw_func=func,
         raw_func_kw=kwargs,
@@ -1365,6 +1385,5 @@ def agg(groupby, func=None, method="auto", combine_size=None, *args, **kwargs):
         groupby_params=groupby.op.groupby_params,
         combine_size=combine_size or options.combine_size,
         chunk_store_limit=options.chunk_store_limit,
-        use_inf_as_na=use_inf_as_na,
     )
     return agg_op(groupby)
diff --git a/python/xorbits/_mars/dataframe/groupby/fill.py b/python/xorbits/_mars/dataframe/groupby/fill.py
index 7d71a89de..4baa08ea1 100644
--- a/python/xorbits/_mars/dataframe/groupby/fill.py
+++ b/python/xorbits/_mars/dataframe/groupby/fill.py
@@ -18,7 +18,7 @@
 
 from ... import opcodes
 from ...core import OutputType
-from ...serialization.serializables import AnyField, DictField, Int64Field, StringField
+from ...serialization.serializables import AnyField, Int64Field, StringField
 from ..operands import DataFrameOperand, DataFrameOperandMixin
 from ..utils import build_empty_df, build_empty_series, parse_index
 
@@ -29,7 +29,6 @@ class GroupByFillOperand(DataFrameOperand, DataFrameOperandMixin):
     value = AnyField("value", default=None)
     method = StringField("method", default=None)
     limit = Int64Field("limit", default=None)
-    downcast = DictField("downcast", default=None)
 
     def _calc_out_dtypes(self, in_groupby):
         mock_groupby = in_groupby.op.build_mock_groupby()
@@ -40,7 +39,6 @@ def _calc_out_dtypes(self, in_groupby):
                 value=self.value,
                 method=self.method,
                 limit=self.limit,
-                downcast=self.downcast,
             )
         else:
             result_df = getattr(mock_groupby, func_name)(limit=self.limit)
@@ -133,7 +131,6 @@ def execute(cls, ctx, op: "GroupByFillOperand"):
                 value=op.value,
                 method=op.method,
                 limit=op.limit,
-                downcast=op.downcast,
             )
         else:
             result = getattr(in_data, func_name)(limit=op.limit)
@@ -184,7 +181,7 @@ def bfill(groupby, limit=None):
     return op(groupby)
 
 
-def fillna(groupby, value=None, method=None, limit=None, downcast=None):
+def fillna(groupby, value=None, method=None, limit=None):
     """
     Fill NA/NaN values using the specified method
 
@@ -197,11 +194,8 @@ def fillna(groupby, value=None, method=None, limit=None, downcast=None):
     limit:  int, default None
             If method is specified, this is the maximum number of consecutive
             NaN values to forward/backward fill
-    downcast:   dict, default None
-                A dict of item->dtype of what to downcast if possible,
-                or the string ‘infer’ which will try to downcast to an appropriate equal type
 
     return: DataFrame or None
     """
-    op = GroupByFillNa(value=value, method=method, limit=limit, downcast=downcast)
+    op = GroupByFillNa(value=value, method=method, limit=limit)
     return op(groupby)
diff --git a/python/xorbits/_mars/dataframe/groupby/nth.py b/python/xorbits/_mars/dataframe/groupby/nth.py
new file mode 100644
index 000000000..da2cb14a0
--- /dev/null
+++ b/python/xorbits/_mars/dataframe/groupby/nth.py
@@ -0,0 +1,232 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pandas as pd
+
+from ...core import OutputType, get_output_types, recursive_tile
+from ...serialization.serializables import DictField, IndexField, StringField
+from ..core import IndexValue
+from ..operands import DataFrameOperand, DataFrameOperandMixin
+from ..utils import build_concatenated_rows_frame, parse_index
+
+
+class GroupByNthSelector(DataFrameOperand, DataFrameOperandMixin):
+    _op_module_ = "dataframe.groupby"
+
+    groupby_params = DictField("groupby_params")
+    n = IndexField("n")
+    dropna = StringField("dropna", default=None)
+
+    def __call__(self, groupby):
+        df = groupby
+        while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
+            df = df.inputs[0]
+        selection = groupby.op.groupby_params.pop("selection", None)
+        if df.ndim > 1 and selection:
+            if isinstance(selection, tuple) and selection not in df.dtypes:
+                selection = list(selection)
+
+            result_df = df[selection]
+        else:
+            result_df = df
+
+        self._output_types = (
+            [OutputType.dataframe] if result_df.ndim == 2 else [OutputType.series]
+        )
+        params = result_df.params
+        params["shape"] = (np.nan,) + result_df.shape[1:]
+        if isinstance(df.index_value.value, IndexValue.RangeIndex):
+            params["index_value"] = parse_index(pd.RangeIndex(-1), df.key)
+
+        return self.new_tileable([df], **params)
+
+    @classmethod
+    def tile(cls, op: "GroupByNthSelector"):
+        in_df = op.inputs[0]
+        groupby_params = op.groupby_params.copy()
+        selection = groupby_params.pop("selection", None)
+        if len(in_df.shape) > 1:
+            in_df = build_concatenated_rows_frame(in_df)
+        out_df = op.outputs[0]
+        # if there is only one chunk, tile with a single chunk
+        if len(in_df.chunks) <= 1:
+            new_shape = (np.nan,)
+            new_nsplits = ((np.nan,),)
+            if out_df.ndim > 1:
+                new_shape += (out_df.shape[1],)
+                new_nsplits += ((out_df.shape[1],),)
+            c = in_df.chunks[0]
+            chunk_op = op.copy().reset_key()
+            params = out_df.params
+            params["shape"] = new_shape
+            params["index"] = (0,) * out_df.ndim
+            out_chunk = chunk_op.new_chunk([c], **params)
+
+            tileable_op = op.copy().reset_key()
+            return tileable_op.new_tileables(
+                [in_df], nsplits=new_nsplits, chunks=[out_chunk], **params
+            )
+
+        if in_df.ndim > 1 and selection:
+            if isinstance(selection, tuple) and selection not in in_df.dtypes:
+                selection = list(selection)
+
+            if not isinstance(selection, list):
+                pre_selection = [selection]
+            else:
+                pre_selection = list(selection)
+
+            if isinstance(groupby_params.get("by"), list):
+                pre_selection += [
+                    el for el in groupby_params["by"] if el not in pre_selection
+                ]
+
+            if len(pre_selection) != in_df.shape[1]:
+                in_df = yield from recursive_tile(in_df[pre_selection])
+
+        # pre chunks
+        pre_chunks = []
+        for c in in_df.chunks:
+            pre_op = op.copy().reset_key()
+            pre_op._output_types = get_output_types(c)
+            pre_op.groupby_params = op.groupby_params.copy()
+            pre_op.groupby_params.pop("selection", None)
+            params = c.params
+            params["shape"] = (np.nan,) + c.shape[1:]
+            pre_chunks.append(pre_op.new_chunk([c], **params))
+
+        new_op = op.copy().reset_key()
+        new_op._output_types = get_output_types(in_df)
+        new_nsplits = ((np.nan,) * len(in_df.nsplits[0]),) + in_df.nsplits[1:]
+        pre_tiled = new_op.new_tileable(
+            [in_df], chunks=pre_chunks, nsplits=new_nsplits, **in_df.params
+        )
+        # generate groupby
+        grouped = yield from recursive_tile(pre_tiled.groupby(**groupby_params))
+        if selection:
+            grouped = yield from recursive_tile(grouped[selection])
+
+        # generate post chunks
+        post_chunks = []
+        for c in grouped.chunks:
+            post_op = op.copy().reset_key()
+            post_op.groupby_params = op.groupby_params.copy()
+            post_op.groupby_params.pop("selection", None)
+            if op.output_types[0] == OutputType.dataframe:
+                index = c.index
+            else:
+                index = (c.index[0],)
+            params = out_df.params
+            params["index"] = index
+            post_chunks.append(post_op.new_chunk([c], **params))
+
+        new_op = op.copy().reset_key()
+        new_nsplits = ((np.nan,) * len(in_df.nsplits[0]),)
+        if out_df.ndim > 1:
+            new_nsplits += ((out_df.shape[1],),)
+        return new_op.new_tileables(
+            [in_df], chunks=post_chunks, nsplits=new_nsplits, **out_df.params
+        )
+
+    @classmethod
+    def execute(cls, ctx, op: "GroupByNthSelector"):
+        in_data = ctx[op.inputs[0].key]
+        params = op.groupby_params.copy()
+        selection = params.pop("selection", None)
+
+        if hasattr(in_data, "groupby"):
+            grouped = in_data.groupby(**params)
+        else:
+            grouped = in_data
+        if selection:
+            grouped = grouped[selection]
+        result = grouped.nth(op.n, op.dropna)
+        ctx[op.outputs[0].key] = result
+
+
+def nth(groupby, n, dropna=None):
+    """
+    Take the nth row from each group if n is an int, or a subset of rows
+    if n is a list of ints.
+
+    If dropna, will take the nth non-null row, dropna is either
+    Truthy (if a Series) or 'all', 'any' (if a DataFrame);
+    this is equivalent to calling dropna(how=dropna) before the
+    groupby.
+
+    Parameters
+    ----------
+    n : int or list of ints
+        a single nth value for the row or a list of nth values
+    dropna : None or str, optional
+        apply the specified dropna operation before counting which row is
+        the nth row. Needs to be None, 'any' or 'all'
+
+    Examples
+    --------
+    >>> import mars.dataframe as md
+    >>> df = md.DataFrame({'A': [1, 1, 2, 1, 2],
+    ...                    'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B'])
+    >>> g = df.groupby('A')
+    >>> g.nth(0).execute()
+            B
+    A
+    1  NaN
+    2  3.0
+    >>> g.nth(1).execute()
+            B
+    A
+    1  2.0
+    2  5.0
+    >>> g.nth(-1).execute()
+            B
+    A
+    1  4.0
+    2  5.0
+    >>> g.nth([0, 1]).execute()
+            B
+    A
+    1  NaN
+    1  2.0
+    2  3.0
+    2  5.0
+
+    Specifying ``dropna`` allows count ignoring NaN
+
+    >>> g.nth(0, dropna='any').execute()
+            B
+    A
+    1  2.0
+    2  3.0
+
+    NaNs denote group exhausted when using dropna
+
+    >>> g.nth(3, dropna='any').execute()
+        B
+    A
+    1 NaN
+    2 NaN
+
+    Specifying ``as_index=False`` in ``groupby`` keeps the original index.
+
+    >>> df.groupby('A', as_index=False).nth(1).execute()
+        A    B
+    1  1  2.0
+    4  2  5.0
+    """
+    groupby_params = groupby.op.groupby_params.copy()
+    groupby_params.pop("as_index", None)
+    op = GroupByNthSelector(n=n, dropna=dropna, groupby_params=groupby_params)
+    return op(groupby)
diff --git a/python/xorbits/_mars/dataframe/groupby/nunique.py b/python/xorbits/_mars/dataframe/groupby/nunique.py
index 424b9797e..c07577ba9 100644
--- a/python/xorbits/_mars/dataframe/groupby/nunique.py
+++ b/python/xorbits/_mars/dataframe/groupby/nunique.py
@@ -15,8 +15,9 @@
 
 import pandas as pd
 
-from ...core import OutputType
+from ...core import ENTITY_TYPE, OutputType
 from ...utils import implements
+from ..utils import is_dataframe
 from .aggregation import DataFrameGroupByAgg
 from .custom_aggregation import (
     DataFrameCustomGroupByAggMixin,
@@ -79,6 +80,22 @@ def _get_selection_columns(cls, op: DataFrameGroupByAgg) -> Union[None, List]:
                 selection = [selection]
             return selection
 
+    @classmethod
+    def _drop_duplicates_by_series(cls, in_data: pd.DataFrame, origin_cols: List):
+        if isinstance(in_data.index, pd.MultiIndex):
+            origin_index_name = in_data.index.names
+        else:
+            origin_index_name = in_data.index.name
+        res = in_data.reset_index()
+        new_cols = list(res.columns)
+        index_cols = [v for v in new_cols if v not in origin_cols]
+        res = res.drop_duplicates().set_index(index_cols)
+        if isinstance(res.index, pd.MultiIndex):
+            res.index.names = origin_index_name
+        else:
+            res.index.name = origin_index_name
+        return res
+
     @classmethod
     def _get_execute_map_result(
         cls, op: DataFrameGroupByAgg, in_data: pd.DataFrame
@@ -86,10 +103,23 @@ def _get_execute_map_result(
         selections = cls._get_selection_columns(op)
         by_cols = op.raw_groupby_params["by"]
         if by_cols is not None:
-            cols = (
-                [*selections, *by_cols] if selections is not None else in_data.columns
-            )
-            res = in_data[cols].drop_duplicates(subset=cols).set_index(by_cols)
+            # When `by` some series, the series will be used to determine the groups.
+            # We first need to set the index of the data to these series,
+            # and then `reset_index` to let these series become data columns.
+            # Next bring these columns for `drop_duplicates` and reset these columns to index.
+            if isinstance(by_cols, list) and any(
+                [isinstance(v, pd.Series) for v in by_cols]
+            ):
+                origin_cols = list(in_data.columns)
+                res = in_data.set_index(by_cols)
+                res = cls._drop_duplicates_by_series(res, origin_cols)
+            else:
+                cols = (
+                    [*selections, *by_cols]
+                    if selections is not None
+                    else in_data.columns
+                )
+                res = in_data[cols].drop_duplicates(subset=cols).set_index(by_cols)
         else:  # group by level
             selections = selections if selections is not None else in_data.columns
             level_indexes = cls._get_level_indexes(op, in_data)
@@ -111,9 +141,17 @@ def _get_execute_map_result(
     def _get_execute_combine_result(
         cls, op: DataFrameGroupByAgg, in_data: pd.DataFrame
     ) -> Union[pd.DataFrame, pd.Series]:
-        # in_data.index.names means MultiIndex (groupby on multi cols)
-        index_col = in_data.index.name or in_data.index.names
-        res = in_data.reset_index().drop_duplicates().set_index(index_col)
+        by = op.raw_groupby_params["by"]
+        if isinstance(by, list) and any([isinstance(v, ENTITY_TYPE) for v in by]):
+            # `in_data` may be series when there is index op after groupby
+            origin_cols = (
+                list(in_data.columns) if is_dataframe(in_data) else [in_data.name]
+            )
+            res = cls._drop_duplicates_by_series(in_data, origin_cols)
+        else:
+            # in_data.index.names means MultiIndex (groupby on multi cols)
+            index_col = in_data.index.name or in_data.index.names
+            res = in_data.reset_index().drop_duplicates().set_index(index_col)
         if op.output_types[0] == OutputType.series:
             res = res.squeeze()
         return res
@@ -127,7 +165,12 @@ def _get_execute_agg_result(
         by = op.raw_groupby_params["by"]
 
         if by is not None:
-            if op.output_types[0] == OutputType.dataframe:
+            if isinstance(by, list) and any(
+                [isinstance(_by, ENTITY_TYPE) for _by in by]
+            ):
+                # nothing to do here, just group by level is correct
+                pass
+            elif op.output_types[0] == OutputType.dataframe:
                 groupby_params.pop("level", None)
                 groupby_params["by"] = cols
                 in_data = in_data.reset_index()
@@ -136,6 +179,11 @@ def _get_execute_agg_result(
             # since level field in op.groupby_params is not correct.
             groupby_params["level"] = op.raw_groupby_params["level"]
 
+        # For the tuple usage: .agg(x=('a', 'nunique')), firstly set `as_index=True`.
+        # Otherwise, subsequent processing will lose the information about the grouped columns.
+        # TODO: This is due to `reduction` functions, but for now, let's keep it simple.
+        if op.raw_func is None:
+            groupby_params["as_index"] = True
         res = in_data.groupby(**groupby_params).nunique()
         return res
 
diff --git a/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_execution.py b/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_execution.py
index 003870aed..f3ebbc56d 100644
--- a/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_execution.py
+++ b/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_execution.py
@@ -1878,3 +1878,101 @@ def test_series_groupby_rolling_agg(setup, window, min_periods, center, closed,
     mresult = mresult.execute().fetch()
 
     pd.testing.assert_series_equal(presult, mresult.sort_index())
+
+
+@pytest.mark.skipif(pd.__version__ <= "1.5.3", reason="pandas version is too low")
+@pytest.mark.parametrize(
+    "chunk_size, dropna", list(product([None, 3], [None, "any", "all"]))
+)
+def test_groupby_nth(setup, chunk_size, dropna):
+    df1 = pd.DataFrame(
+        {
+            "a": np.random.randint(0, 5, size=20),
+            "b": np.random.randint(0, 5, size=20),
+            "c": np.random.randint(0, 5, size=20),
+            "d": np.random.randint(0, 5, size=20),
+        }
+    )
+    mdf = md.DataFrame(df1, chunk_size=chunk_size)
+
+    r = mdf.groupby("b").nth(0)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b").nth(0)
+    )
+    r = mdf.groupby("b").nth(-1)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b").nth(-1)
+    )
+    r = mdf.groupby("b")[["a", "c"]].nth(0)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b")[["a", "c"]].nth(0)
+    )
+
+    # test nth with list index
+    r = mdf.groupby("b").nth([0, 1])
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b").nth([0, 1])
+    )
+
+    # test nth with slice
+    r = mdf.groupby("b").nth(slice(None, 1))
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b").nth(slice(None, 1))
+    )
+
+    # test nth with selection
+    r = mdf.groupby("b")[["a", "d"]].nth(0)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b")[["a", "d"]].nth(0)
+    )
+    r = mdf.groupby("b")[["c", "a", "d"]].nth(0)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b")[["c", "a", "d"]].nth(0)
+    )
+    r = mdf.groupby("b")["c"].nth(0)
+    pd.testing.assert_series_equal(
+        r.execute().fetch().sort_index(), df1.groupby("b")["c"].nth(0)
+    )
+
+    series1 = pd.Series([3, 4, 5, 3, 5, 4, 1, 2, 3])
+    ms = md.Series(series1, chunk_size=chunk_size)
+
+    r = ms.groupby(lambda x: x % 2).nth(0)
+    pd.testing.assert_series_equal(
+        r.execute().fetch().sort_index(), series1.groupby(lambda x: x % 2).nth(0)
+    )
+
+    # test with special index
+    series1 = pd.Series([3, 4, 5, 3, 5, 4, 1, 2, 3], index=[4, 1, 2, 3, 5, 8, 6, 7, 9])
+    ms = md.Series(series1, chunk_size=chunk_size)
+
+    r = ms.groupby(lambda x: x % 2).nth(0)
+    pd.testing.assert_series_equal(
+        r.execute().fetch().sort_index(),
+        series1.groupby(lambda x: x % 2).nth(0).sort_index(),
+    )
+
+    df2 = pd.DataFrame(
+        {
+            "a": [3, 5, 2, np.nan, 1, 2, 4, 6, 2, 4],
+            "b": [8, 3, 4, 1, 8, np.nan, 2, 2, 2, 3],
+            "c": [1, 8, 8, np.nan, 3, 5, 0, 0, 5, 4],
+            "d": [np.nan, 7, 6, 3, 6, 3, 2, 1, 5, 8],
+        }
+    )
+
+    mdf = md.DataFrame(df2)
+
+    r = mdf.groupby("b").nth(0, dropna=dropna)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df2.groupby("b").nth(0, dropna=dropna)
+    )
+    r = mdf.groupby("b").nth(-1, dropna=dropna)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(), df2.groupby("b").nth(-1, dropna=dropna)
+    )
+    r = mdf.groupby("b")[["a", "c"]].nth(0, dropna=dropna)
+    pd.testing.assert_frame_equal(
+        r.execute().fetch().sort_index(),
+        df2.groupby("b")[["a", "c"]].nth(0, dropna=dropna),
+    )
diff --git a/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_nunique_execution.py b/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_nunique_execution.py
index 08bb49706..68e678230 100644
--- a/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_nunique_execution.py
+++ b/python/xorbits/_mars/dataframe/groupby/tests/test_groupby_nunique_execution.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -332,3 +334,151 @@ def test_groupby_agg_nunique(setup, gen_data1):
             )
             expected = df.groupby("b", sort=sort).agg(["sum", "nunique"])
             pd.testing.assert_frame_equal(r.sort_index(), expected.sort_index())
+
+
+@pytest.mark.parametrize(
+    "chunk_size, as_index, sort",
+    itertools.product([None, 13], [True, False], [True, False]),
+)
+def test_groupby_agg_nunique_with_tuple_kwargs(
+    setup, gen_data2, chunk_size, as_index, sort
+):
+    df = gen_data2
+    mdf = md.DataFrame(df, chunk_size=chunk_size)
+
+    res = mdf.groupby("b", as_index=as_index, sort=sort).agg(e=("a", "nunique"))
+    expected = df.groupby("b", as_index=as_index, sort=sort).agg(e=("a", "nunique"))
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby("b", as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("c", "nunique")
+    )
+    expected = df.groupby("b", as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("c", "nunique")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby("b", as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("d", "sum")
+    )
+    expected = df.groupby("b", as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("d", "sum")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby("b", as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("d", "sum"), g=("c", "nunique")
+    )
+    expected = df.groupby("b", as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("d", "sum"), g=("c", "nunique")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    # by multi columns
+    res = mdf.groupby(["b", "c"], as_index=as_index, sort=sort).agg(e=("a", "nunique"))
+    expected = df.groupby(["b", "c"], as_index=as_index, sort=sort).agg(
+        e=("a", "nunique")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby(["b", "c"], as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("d", "mean")
+    )
+    expected = df.groupby(["b", "c"], as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("d", "mean")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    # index after groupby
+    res = mdf.groupby(["b"], as_index=as_index, sort=sort)[["a"]].agg(
+        e=("a", "nunique")
+    )
+    expected = df.groupby(["b"], as_index=as_index, sort=sort)[["a"]].agg(
+        e=("a", "nunique")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby(["b"], as_index=as_index, sort=sort)[["a", "c"]].agg(
+        e=("a", "nunique"), f=("c", "nunique")
+    )
+    expected = df.groupby(["b"], as_index=as_index, sort=sort)[["a", "c"]].agg(
+        e=("a", "nunique"), f=("c", "nunique")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+
+@pytest.mark.parametrize(
+    "chunk_size, as_index, sort",
+    itertools.product([None, 13], [True, False], [True, False]),
+)
+def test_groupby_nunique_by_series(setup, gen_data2, chunk_size, as_index, sort):
+    df = gen_data2
+    mdf = md.DataFrame(df, chunk_size=chunk_size)
+
+    by1 = pd.Series([i + 100 for i in range(100)])
+    mby1 = md.Series(by1)
+
+    by2 = pd.Series([i + 200 for i in range(100)])
+    mby2 = md.Series(by2)
+
+    res = mdf.groupby(mby1, as_index=as_index, sort=sort).nunique()
+    expected = df.groupby(by1, as_index=as_index, sort=sort).nunique()
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby([mby1, mby2], as_index=as_index, sort=sort).nunique()
+    expected = df.groupby([by1, by2], as_index=as_index, sort=sort).nunique()
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby([mby1, mby2], as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("c", "nunique")
+    )
+    expected = df.groupby([by1, by2], as_index=as_index, sort=sort).agg(
+        e=("a", "nunique"), f=("c", "nunique")
+    )
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    # test by with duplicates
+    rs = np.random.RandomState(0)
+    by3 = pd.Series(rs.choice([i for i in range(1, 6)], size=(100,)))
+    mby3 = md.Series(by3)
+
+    res = mdf.groupby(mby3, as_index=as_index, sort=sort).nunique()
+    expected = df.groupby(by3, as_index=as_index, sort=sort).nunique()
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    # test by other chunk size
+    by4 = pd.Series(rs.choice([i for i in range(10)], size=(100,)))
+    mby4 = md.Series(by4, chunk_size=21)
+
+    res = mdf.groupby(mby4, as_index=as_index, sort=sort).nunique()
+    expected = df.groupby(by4, as_index=as_index, sort=sort).nunique()
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    # test index after groupby
+    res = mdf.groupby(mby3, as_index=as_index, sort=sort)[["a", "b"]].nunique()
+    expected = df.groupby(by3, as_index=as_index, sort=sort)[["a", "b"]].nunique()
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby(mby3, as_index=as_index, sort=sort)[["a"]].nunique()
+    expected = df.groupby(by3, as_index=as_index, sort=sort)[["a"]].nunique()
+    pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    res = mdf.groupby(mby3, as_index=as_index, sort=sort)["a"].nunique()
+    expected = df.groupby(by3, as_index=as_index, sort=sort)["a"].nunique()
+    if as_index:
+        pd.testing.assert_series_equal(res.execute().fetch(), expected)
+    else:
+        pd.testing.assert_frame_equal(res.execute().fetch(), expected)
+
+    # test different methods
+    for method in ["auto", "tree", "shuffle"]:
+        res = mdf.groupby(mby3, as_index=as_index, sort=sort).nunique(method=method)
+        expected = df.groupby(by3, as_index=as_index, sort=sort).nunique()
+        real = res.execute().fetch()
+        if method == "shuffle":
+            pd.testing.assert_frame_equal(
+                real.sort_values(["a", "b", "c", "d"]).reset_index(drop=True),
+                expected.sort_values(["a", "b", "c", "d"]).reset_index(drop=True),
+            )
+        else:
+            pd.testing.assert_frame_equal(real, expected)
diff --git a/python/xorbits/_mars/dataframe/hash_utils.py b/python/xorbits/_mars/dataframe/hash_utils.py
index 348c4b16e..0d4211e2d 100644
--- a/python/xorbits/_mars/dataframe/hash_utils.py
+++ b/python/xorbits/_mars/dataframe/hash_utils.py
@@ -9,10 +9,11 @@
 from typing import TYPE_CHECKING, Hashable, Iterable, Iterator, cast
 
 import numpy as np
+import pandas as pd
 from pandas._libs import lib
 from pandas._libs.hashing import hash_object_array
 from pandas._typing import ArrayLike, npt
-from pandas.core.dtypes.common import is_categorical_dtype, is_list_like
+from pandas.core.dtypes.common import is_list_like
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCExtensionArray,
@@ -272,7 +273,7 @@ def hash_array(
     # For categoricals, we hash the categories, then remap the codes to the
     # hash values. (This check is above the complex check so that we don't ask
     # numpy if categorical is a subdtype of complex, as it will choke).
-    if is_categorical_dtype(dtype):
+    if isinstance(dtype, pd.CategoricalDtype):
         vals = cast("Categorical", vals)
         return _hash_categorical(vals, encoding, hash_key)
 
diff --git a/python/xorbits/_mars/dataframe/indexing/__init__.py b/python/xorbits/_mars/dataframe/indexing/__init__.py
index 45aabf4b2..7d30d7aaf 100644
--- a/python/xorbits/_mars/dataframe/indexing/__init__.py
+++ b/python/xorbits/_mars/dataframe/indexing/__init__.py
@@ -38,7 +38,7 @@ def _install():
     from .sample import sample
     from .set_axis import df_set_axis, series_set_axis
     from .set_index import set_index
-    from .setitem import dataframe_setitem
+    from .setitem import dataframe_setitem, series_setitem
     from .where import mask, where
 
     for cls in DATAFRAME_TYPE + SERIES_TYPE:
@@ -69,6 +69,7 @@ def _install():
 
     for cls in SERIES_TYPE:
         setattr(cls, "__getitem__", series_getitem)
+        setattr(cls, "__setitem__", series_setitem)
         setattr(cls, "reset_index", series_reset_index)
         setattr(cls, "rename", series_rename)
         setattr(cls, "set_axis", series_set_axis)
diff --git a/python/xorbits/_mars/dataframe/indexing/setitem.py b/python/xorbits/_mars/dataframe/indexing/setitem.py
index 41190475d..103165137 100644
--- a/python/xorbits/_mars/dataframe/indexing/setitem.py
+++ b/python/xorbits/_mars/dataframe/indexing/setitem.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import collections
+from typing import Union
 
 import numpy as np
 import pandas as pd
@@ -24,7 +25,7 @@
 from ...serialization.serializables import AnyField, KeyField
 from ...tensor.core import TENSOR_TYPE
 from ...utils import pd_release_version
-from ..core import DATAFRAME_TYPE, SERIES_TYPE, DataFrame
+from ..core import DATAFRAME_TYPE, SERIES_TYPE, DataFrame, Series
 from ..initializer import DataFrame as asframe
 from ..initializer import Series as asseries
 from ..operands import DataFrameOperand, DataFrameOperandMixin
@@ -49,8 +50,6 @@ def __init__(self, target=None, indexes=None, value=None, output_types=None, **k
             _output_types=output_types,
             **kw,
         )
-        if self.output_types is None:
-            self.output_types = [OutputType.dataframe]
 
     @property
     def target(self):
@@ -74,7 +73,7 @@ def _set_inputs(self, inputs):
     def _is_scalar_tensor(t):
         return isinstance(t, TENSOR_TYPE) and t.ndim == 0
 
-    def __call__(self, target: DataFrame, value):
+    def _call_dataframe(self, target: DataFrame, value):
         raw_target = target
 
         inputs = [target]
@@ -145,8 +144,54 @@ def __call__(self, target: DataFrame, value):
         )
         raw_target.data = ret.data
 
+    def _call_series(self, target: Series, value):
+        inputs = [target]
+        dtype = target.dtype
+        shape = target.shape
+        index_value = target.index_value
+
+        target.data = self.new_series(
+            inputs, shape=shape, dtype=dtype, index_value=index_value, name=target.name
+        ).data
+
+    def __call__(self, target: Union[DataFrame, Series], value):
+        if target.ndim == 2:
+            self._call_dataframe(target, value)
+        else:
+            self._call_series(target, value)
+
     @classmethod
     def tile(cls, op: "DataFrameSetitem"):
+        if op.target.ndim == 2:
+            res = yield from cls._tile_dataframe(op)
+            return res
+        else:
+            return cls._tile_series(op)
+
+    @classmethod
+    def _tile_series(cls, op: "DataFrameSetitem"):
+        in_df = op.inputs[0]
+        result_chunks = []
+
+        for chk in in_df.chunks:
+            new_op = op.copy().reset_key()
+            new_op.output_types = [OutputType.series]
+            params = dict(
+                shape=chk.shape,
+                index=chk.index,
+                dtype=chk.dtype,
+                index_value=chk.index_value,
+            )
+            result_chunks.append(new_op.new_chunk([chk], **params))
+
+        _new_op = op.copy()
+        params = op.outputs[0].params.copy()
+        params["nsplits"] = in_df.nsplits
+        params["chunks"] = result_chunks
+        return _new_op.new_seriess(op.inputs, **params)
+
+    @classmethod
+    def _tile_dataframe(cls, op: "DataFrameSetitem"):
         from ..merge.concat import DataFrameConcat
 
         out = op.outputs[0]
@@ -307,6 +352,36 @@ def estimate_size(cls, ctx: dict, op: "DataFrameSetitem"):
 
     @classmethod
     def execute(cls, ctx, op: "DataFrameSetitem"):
+        target = ctx[op.target.key]
+        if target.ndim == 2:
+            cls._execute_dataframe(ctx, op)
+        else:
+            cls._execute_series(ctx, op)
+
+    @classmethod
+    def _execute_series(cls, ctx, op: "DataFrameSetitem"):
+        target = ctx[op.target.key]
+
+        indexes = op.indexes
+        value = op.value
+
+        try:
+            _ = target[indexes]
+            indexed = True
+        except KeyError:
+            indexed = False
+
+        if indexed:
+            try:
+                target[indexes] = value
+            except ValueError:
+                target = target.copy(deep=True)
+                target[indexes] = value
+
+        ctx[op.outputs[0].key] = target
+
+    @classmethod
+    def _execute_dataframe(cls, ctx, op: "DataFrameSetitem"):
         target = ctx[op.target.key]
         # only deep copy when updating
         indexes = (
@@ -336,3 +411,11 @@ def execute(cls, ctx, op: "DataFrameSetitem"):
 def dataframe_setitem(df, col, value):
     op = DataFrameSetitem(target=df, indexes=col, value=value)
     return op(df, value)
+
+
+def series_setitem(series, index, value):
+    """
+    Currently only supports series whose indexes contain `index`
+    """
+    op = DataFrameSetitem(target=series, indexes=index, value=value)
+    return op(series, value)
diff --git a/python/xorbits/_mars/dataframe/indexing/tests/test_indexing_execution.py b/python/xorbits/_mars/dataframe/indexing/tests/test_indexing_execution.py
index 3367ee77c..c63ba124b 100644
--- a/python/xorbits/_mars/dataframe/indexing/tests/test_indexing_execution.py
+++ b/python/xorbits/_mars/dataframe/indexing/tests/test_indexing_execution.py
@@ -38,6 +38,7 @@
 from ...datasource.read_csv import DataFrameReadCSV
 from ...datasource.read_parquet import DataFrameReadParquet
 from ...datasource.read_sql import DataFrameReadSQL
+from ...utils import PD_VERSION_GREATER_THAN_2_10
 
 _allow_set_missing_list = pd_release_version[:2] >= (1, 1)
 
@@ -193,6 +194,36 @@ def test_iloc_getitem(setup_gpu, gpu):
         pd.testing.assert_index_equal(index.execute().fetch(), data[selection])
 
 
+def test_series_setitem(setup):
+    data1 = pd.Series(np.arange(10))
+    series = md.Series(data1, chunk_size=3)
+    series[2] = 777
+    real = series.execute().fetch()
+    data1[2] = 777
+    pd.testing.assert_series_equal(real, data1)
+
+    arrays = [
+        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+        ["one", "two", "one", "two", "one", "two", "one", "two"],
+    ]
+    tuples = list(zip(*arrays))
+    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    data2 = pd.Series(np.random.randn(8), index=index)
+    series = md.Series(data2, chunk_size=3)
+    series["bar", "two"] = 0.888888
+    real = series.execute().fetch()
+    data2["bar", "two"] = 0.888888
+    pd.testing.assert_series_equal(real, data2)
+
+    data3 = [9, 99, 999, 9999]
+    series = md.Series(data3, chunk_size=1)
+    series[1] = 88
+    real = series.execute().fetch()
+    expected = pd.Series(data3)
+    expected[1] = 88
+    pd.testing.assert_series_equal(real, expected)
+
+
 def test_iloc_setitem(setup):
     df1 = pd.DataFrame(
         [[1, 3, 3], [4, 2, 6], [7, 8, 9]],
@@ -1211,6 +1242,10 @@ def test_optimization(setup):
                 extra_config={"operand_executors": operand_executors}
             ).fetch()
             expected = pd_df.head(3)
+            if PD_VERSION_GREATER_THAN_2_10:
+                result = result.convert_dtypes(dtype_backend="pyarrow")
+                expected = expected.convert_dtypes(dtype_backend="pyarrow")
+
             pd.testing.assert_frame_equal(result, expected)
 
         dirname = os.path.join(tempdir, "test_parquet2")
@@ -1228,6 +1263,10 @@ def test_optimization(setup):
             extra_config={"operand_executors": operand_executors}
         ).fetch()
         expected = pd_df.head(3)
+        if PD_VERSION_GREATER_THAN_2_10:
+            result = result.convert_dtypes(dtype_backend="pyarrow")
+            expected = expected.convert_dtypes(dtype_backend="pyarrow")
+
         pd.testing.assert_frame_equal(result, expected)
 
 
@@ -1639,6 +1678,9 @@ def test_sample_execution(setup):
         df = md.read_parquet(file_path)
         r1 = df.sample(frac=0.05, random_state=0)
         r2 = pd.read_parquet(file_path).sample(frac=0.05, random_state=0)
+        if PD_VERSION_GREATER_THAN_2_10:
+            r2 = r2.convert_dtypes(dtype_backend="pyarrow")
+
         pd.testing.assert_frame_equal(r1.execute().fetch(), r2)
 
     # test series
diff --git a/python/xorbits/_mars/dataframe/merge/merge.py b/python/xorbits/_mars/dataframe/merge/merge.py
index 6882be58c..4db6993aa 100644
--- a/python/xorbits/_mars/dataframe/merge/merge.py
+++ b/python/xorbits/_mars/dataframe/merge/merge.py
@@ -39,6 +39,7 @@
 from ...typing import TileableType
 from ...utils import has_unknown_shape, lazy_import
 from ..base.bloom_filter import filter_by_bloom_filter
+from ..base.core import DataFrameAutoMergeMixin
 from ..core import DataFrame, DataFrameChunk, Series
 from ..operands import DataFrameOperand, DataFrameOperandMixin, DataFrameShuffleProxy
 from ..utils import (
@@ -173,7 +174,7 @@ class MergeMethod(Enum):
     shuffle = 2
 
 
-class DataFrameMerge(DataFrameOperand, DataFrameOperandMixin):
+class DataFrameMerge(DataFrameOperand, DataFrameAutoMergeMixin):
     _op_type_ = OperandDef.DATAFRAME_MERGE
 
     how = StringField("how")
@@ -668,18 +669,6 @@ def _can_merge_with_broadcast(
     ) -> bool:
         return how in [big_side, "inner"] and np.log2(big_chunk_size) > small_chunk_size
 
-    @classmethod
-    def _get_auto_merge_options(cls, auto_merge: str) -> Tuple[bool, bool]:
-        if auto_merge == "both":
-            return True, True
-        elif auto_merge == "none":
-            return False, False
-        elif auto_merge == "before":
-            return True, False
-        else:
-            assert auto_merge == "after"
-            return False, True
-
     @classmethod
     def _choose_merge_method(
         cls, op: "DataFrameMerge", left: TileableType, right: TileableType
@@ -755,36 +744,10 @@ def tile(cls, op: "DataFrameMerge"):
         auto_merge_threshold = op.auto_merge_threshold
         auto_merge_before, auto_merge_after = cls._get_auto_merge_options(op.auto_merge)
 
-        if (
-            auto_merge_before
-            and len(left.chunks) + len(right.chunks) > auto_merge_threshold
-        ):
-            yield TileStatus([left, right] + left.chunks + right.chunks, progress=0.2)
-            left_chunk_size = len(left.chunks)
-            right_chunk_size = len(right.chunks)
-            left = auto_merge_chunks(ctx, left)
-            right = auto_merge_chunks(ctx, right)
-            logger.info(
-                "Auto merge before %s, left data shape: %s, chunk count: %s -> %s, "
-                "right data shape: %s, chunk count: %s -> %s.",
-                op,
-                left.shape,
-                left_chunk_size,
-                len(left.chunks),
-                right.shape,
-                right_chunk_size,
-                len(right.chunks),
-            )
-        else:
-            logger.info(
-                "Skip auto merge before %s, left data shape: %s, chunk count: %d, "
-                "right data shape: %s, chunk count: %d.",
-                op,
-                left.shape,
-                len(left.chunks),
-                right.shape,
-                len(right.chunks),
-            )
+        merge_before_res = yield from cls._merge_before(
+            op, auto_merge_before, auto_merge_threshold, left, right, logger
+        )
+        left, right = merge_before_res[0], merge_before_res[1]
 
         method = cls._choose_merge_method(op, left, right)
         if cls._if_apply_bloom_filter(method, op, left, right):
@@ -818,33 +781,14 @@ def tile(cls, op: "DataFrameMerge"):
             assert method == MergeMethod.shuffle
             ret = cls._tile_shuffle(op, left, right)
 
-        if (
-            op.how == "inner"
-            and auto_merge_after
-            and len(ret[0].chunks) > auto_merge_threshold
-        ):
+        if op.how == "inner":
             # if how=="inner", output data size will reduce greatly with high probability，
             # use auto_merge_chunks to combine small chunks.
-            yield TileStatus(
-                ret[0].chunks, progress=0.8
-            )  # trigger execution for chunks
-            merged = auto_merge_chunks(get_context(), ret[0])
-            logger.info(
-                "Auto merge after %s, data shape: %s, chunk count: %s -> %s.",
-                op,
-                merged.shape,
-                len(ret[0].chunks),
-                len(merged.chunks),
+            ret = yield from cls._merge_after(
+                op, auto_merge_after, auto_merge_threshold, ret, logger
             )
-            return [merged]
-        else:
-            logger.info(
-                "Skip auto merge after %s, data shape: %s, chunk count: %d.",
-                op,
-                ret[0].shape,
-                len(ret[0].chunks),
-            )
-            return ret
+
+        return ret
 
     @classmethod
     def execute(cls, ctx, op):
diff --git a/python/xorbits/_mars/dataframe/missing/checkna.py b/python/xorbits/_mars/dataframe/missing/checkna.py
index 530c042d8..3ccc70545 100644
--- a/python/xorbits/_mars/dataframe/missing/checkna.py
+++ b/python/xorbits/_mars/dataframe/missing/checkna.py
@@ -21,7 +21,6 @@
 from ... import dataframe as md
 from ... import opcodes
 from ... import tensor as mt
-from ...config import options
 from ...core import OutputType
 from ...serialization.serializables import BoolField
 from ..operands import (
@@ -39,14 +38,10 @@ class DataFrameCheckNA(DataFrameOperand, DataFrameOperandMixin):
     _op_type_ = opcodes.CHECK_NA
 
     _positive = BoolField("positive")
-    _use_inf_as_na = BoolField("use_inf_as_na")
 
-    def __init__(
-        self, positive=None, use_inf_as_na=None, sparse=None, output_types=None, **kw
-    ):
+    def __init__(self, positive=None, sparse=None, output_types=None, **kw):
         super().__init__(
             _positive=positive,
-            _use_inf_as_na=use_inf_as_na,
             _output_types=output_types,
             sparse=sparse,
             **kw,
@@ -56,10 +51,6 @@ def __init__(
     def positive(self) -> bool:
         return self._positive
 
-    @property
-    def use_inf_as_na(self) -> bool:
-        return self._use_inf_as_na
-
     def __call__(self, df):
         if isinstance(df, DATAFRAME_TYPE):
             self.output_types = [OutputType.dataframe]
@@ -107,15 +98,10 @@ def tile(cls, op: "DataFrameCheckNA"):
     @classmethod
     def execute(cls, ctx, op: "DataFrameCheckNA"):
         in_data = ctx[op.inputs[0].key]
-        old_use_inf_as_na = pd.get_option("mode.use_inf_as_na")
-        try:
-            pd.set_option("mode.use_inf_as_na", op.use_inf_as_na)
-            if op.positive:
-                ctx[op.outputs[0].key] = in_data.isna()
-            else:
-                ctx[op.outputs[0].key] = in_data.notna()
-        finally:
-            pd.set_option("mode.use_inf_as_na", old_use_inf_as_na)
+        if op.positive:
+            ctx[op.outputs[0].key] = in_data.isna()
+        else:
+            ctx[op.outputs[0].key] = in_data.notna()
 
 
 def _from_pandas(obj: Any):
@@ -200,14 +186,9 @@ def isna(obj):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, ENTITY_TYPE):
         if isinstance(obj, TENSOR_TYPE):
-            if options.dataframe.mode.use_inf_as_na:
-                return ~mt.isfinite(obj)
-            else:
-                return mt.isnan(obj)
+            return mt.isnan(obj)
         else:
-            op = DataFrameCheckNA(
-                positive=True, use_inf_as_na=options.dataframe.mode.use_inf_as_na
-            )
+            op = DataFrameCheckNA(positive=True)
             return op(obj)
     else:
         return _from_pandas(pd.isna(obj))
@@ -279,14 +260,9 @@ def notna(obj):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, ENTITY_TYPE):
         if isinstance(obj, TENSOR_TYPE):
-            if options.dataframe.mode.use_inf_as_na:
-                return mt.isfinite(obj)
-            else:
-                return ~mt.isnan(obj)
+            return ~mt.isnan(obj)
         else:
-            op = DataFrameCheckNA(
-                positive=False, use_inf_as_na=options.dataframe.mode.use_inf_as_na
-            )
+            op = DataFrameCheckNA(positive=False)
             return op(obj)
     else:
         return _from_pandas(pd.notna(obj))
diff --git a/python/xorbits/_mars/dataframe/missing/dropna.py b/python/xorbits/_mars/dataframe/missing/dropna.py
index af71e3cd0..a3d50f466 100644
--- a/python/xorbits/_mars/dataframe/missing/dropna.py
+++ b/python/xorbits/_mars/dataframe/missing/dropna.py
@@ -19,7 +19,6 @@
 import pandas as pd
 
 from ... import opcodes
-from ...config import options
 from ...core import OutputType, recursive_tile
 from ...serialization.serializables import AnyField, BoolField, Int32Field, StringField
 from ...utils import no_default, pd_release_version
@@ -37,7 +36,6 @@ class DataFrameDropNA(DataFrameOperand, DataFrameOperandMixin):
     _how = StringField("how")
     _thresh = Int32Field("thresh")
     _subset = AnyField("subset")
-    _use_inf_as_na = BoolField("use_inf_as_na")
 
     # when True, dropna will be called on the input,
     # otherwise non-nan counts will be used
@@ -51,7 +49,6 @@ def __init__(
         how=None,
         thresh=None,
         subset=None,
-        use_inf_as_na=None,
         drop_directly=None,
         subset_size=None,
         sparse=None,
@@ -63,7 +60,6 @@ def __init__(
             _how=how,
             _thresh=thresh,
             _subset=subset,
-            _use_inf_as_na=use_inf_as_na,
             _drop_directly=drop_directly,
             _subset_size=subset_size,
             _output_types=output_types,
@@ -87,10 +83,6 @@ def thresh(self) -> int:
     def subset(self) -> list:
         return self._subset
 
-    @property
-    def use_inf_as_na(self) -> bool:
-        return self._use_inf_as_na
-
     @property
     def drop_directly(self) -> bool:
         return self._drop_directly
@@ -150,9 +142,7 @@ def tile(cls, op: "DataFrameDropNA"):
         subset_df = in_df
         if op.subset:
             subset_df = in_df[op.subset]
-        count_series = yield from recursive_tile(
-            subset_df.agg("count", axis=1, _use_inf_as_na=op.use_inf_as_na)
-        )
+        count_series = yield from recursive_tile(subset_df.agg("count", axis=1))
 
         nsplits, out_shape, left_chunks, right_chunks = align_dataframe_series(
             in_df, count_series, axis=0
@@ -185,35 +175,30 @@ def tile(cls, op: "DataFrameDropNA"):
 
     @classmethod
     def execute(cls, ctx, op: "DataFrameDropNA"):
-        try:
-            pd.set_option("mode.use_inf_as_na", op.use_inf_as_na)
-
-            in_data = ctx[op.inputs[0].key]
-            if op.drop_directly:
-                if isinstance(in_data, pd.DataFrame):
-                    result = in_data.dropna(
-                        axis=op.axis, how=op.how, thresh=op.thresh, subset=op.subset
-                    )
-                elif isinstance(in_data, pd.Series):
-                    result = in_data.dropna(axis=op.axis, how=op.how)
-                else:
-                    result = in_data.dropna(how=op.how)
-                ctx[op.outputs[0].key] = result
-                return
-
-            in_counts = ctx[op.inputs[1].key]
-            if op.how == "all":
-                in_counts = in_counts[in_counts > 0]
+        in_data = ctx[op.inputs[0].key]
+        if op.drop_directly:
+            if isinstance(in_data, pd.DataFrame):
+                result = in_data.dropna(
+                    axis=op.axis, how=op.how, thresh=op.thresh, subset=op.subset
+                )
+            elif isinstance(in_data, pd.Series):
+                result = in_data.dropna(axis=op.axis, how=op.how)
             else:
-                if op.thresh is None or op.thresh is no_default:
-                    thresh = op.subset_size
-                else:  # pragma: no cover
-                    thresh = op.thresh
-                in_counts = in_counts[in_counts >= thresh]
+                result = in_data.dropna(how=op.how)
+            ctx[op.outputs[0].key] = result
+            return
 
-            ctx[op.outputs[0].key] = in_data.reindex(in_counts.index)
-        finally:
-            pd.reset_option("mode.use_inf_as_na")
+        in_counts = ctx[op.inputs[1].key]
+        if op.how == "all":
+            in_counts = in_counts[in_counts > 0]
+        else:
+            if op.thresh is None or op.thresh is no_default:
+                thresh = op.subset_size
+            else:  # pragma: no cover
+                thresh = op.thresh
+            in_counts = in_counts[in_counts >= thresh]
+
+        ctx[op.outputs[0].key] = in_data.reindex(in_counts.index)
 
 
 def df_dropna(
@@ -328,14 +313,12 @@ def df_dropna(
     if thresh is no_default and how is no_default:
         how = "any"
 
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameDropNA(
         axis=axis,
         how=how,
         thresh=thresh,
         subset=subset,
         output_types=[OutputType.dataframe],
-        use_inf_as_na=use_inf_as_na,
     )
     out_df = op(df)
     if inplace:
@@ -417,12 +400,10 @@ def series_dropna(series, axis=0, inplace=False, how=None):
     dtype: object
     """
     axis = validate_axis(axis, series)
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameDropNA(
         axis=axis,
         how=how,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
     )
     out_series = op(series)
     if inplace:
@@ -445,8 +426,5 @@ def index_dropna(index, how="any"):
     -------
     Index
     """
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
-    op = DataFrameDropNA(
-        axis=0, how=how, output_types=[OutputType.index], use_inf_as_na=use_inf_as_na
-    )
+    op = DataFrameDropNA(axis=0, how=how, output_types=[OutputType.index])
     return op(index)
diff --git a/python/xorbits/_mars/dataframe/missing/fillna.py b/python/xorbits/_mars/dataframe/missing/fillna.py
index 15c33226a..65bca253b 100644
--- a/python/xorbits/_mars/dataframe/missing/fillna.py
+++ b/python/xorbits/_mars/dataframe/missing/fillna.py
@@ -19,10 +19,9 @@
 import pandas as pd
 
 from ... import opcodes
-from ...config import options
 from ...core import ENTITY_TYPE, Entity, OutputType, get_output_types
 from ...core.operand import OperandStage
-from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
+from ...serialization.serializables import AnyField, Int64Field, StringField
 from ..align import (
     align_dataframe_dataframe,
     align_dataframe_series,
@@ -42,8 +41,6 @@ class FillNA(DataFrameOperand, DataFrameOperandMixin):
     _method = StringField("method")
     _axis = AnyField("axis")
     _limit = Int64Field("limit")
-    _downcast = AnyField("downcast")
-    _use_inf_as_na = BoolField("use_inf_as_na")
 
     _output_limit = Int64Field("output_limit")
 
@@ -53,8 +50,6 @@ def __init__(
         method=None,
         axis=None,
         limit=None,
-        downcast=None,
-        use_inf_as_na=None,
         output_types=None,
         output_limit=None,
         **kw
@@ -64,8 +59,6 @@ def __init__(
             _method=method,
             _axis=axis,
             _limit=limit,
-            _downcast=downcast,
-            _use_inf_as_na=use_inf_as_na,
             _output_types=output_types,
             _output_limit=output_limit,
             **kw
@@ -87,14 +80,6 @@ def axis(self):
     def limit(self):
         return self._limit
 
-    @property
-    def downcast(self):
-        return self._downcast
-
-    @property
-    def use_inf_as_na(self):
-        return self._use_inf_as_na
-
     def _set_inputs(self, inputs):
         super()._set_inputs(inputs)
         if self._method is None and len(inputs) > 1:
@@ -131,7 +116,9 @@ def _execute_map(cls, ctx, op):
         method = op.method
 
         filled = input_data.fillna(
-            method=method, axis=axis, limit=limit, downcast=op.downcast
+            method=method,
+            axis=axis,
+            limit=limit,
         )
         ctx[op.outputs[0].key] = cls._get_first_slice(op, filled, 1)
         del filled
@@ -151,7 +138,9 @@ def _execute_combine(cls, ctx, op):
 
         if not summaries:
             ctx[op.outputs[0].key] = input_data.fillna(
-                method=method, axis=axis, limit=limit, downcast=op.downcast
+                method=method,
+                axis=axis,
+                limit=limit,
             )
             return
 
@@ -166,7 +155,9 @@ def _execute_combine(cls, ctx, op):
 
         if is_pandas_2():
             concat_df = concat_df.fillna(
-                method=method, axis=axis, limit=limit, downcast=op.downcast
+                method=method,
+                axis=axis,
+                limit=limit,
             )
         else:
             concat_df.fillna(
@@ -174,37 +165,29 @@ def _execute_combine(cls, ctx, op):
                 axis=axis,
                 inplace=True,
                 limit=limit,
-                downcast=op.downcast,
             )
         ctx[op.outputs[0].key] = cls._get_first_slice(op, concat_df, -1)
 
     @classmethod
     def execute(cls, ctx, op):
-        try:
-            pd.set_option("mode.use_inf_as_na", op.use_inf_as_na)
-            if op.stage == OperandStage.map:
-                cls._execute_map(ctx, op)
-            elif op.stage == OperandStage.combine:
-                cls._execute_combine(ctx, op)
+        if op.stage == OperandStage.map:
+            cls._execute_map(ctx, op)
+        elif op.stage == OperandStage.combine:
+            cls._execute_combine(ctx, op)
+        else:
+            input_data = ctx[op.inputs[0].key]
+            value = getattr(op, "value", None)
+            if isinstance(op.value, ENTITY_TYPE):
+                value = ctx[op.value.key]
+            if not isinstance(input_data, pd.Index):
+                ctx[op.outputs[0].key] = input_data.fillna(
+                    value=value,
+                    method=op.method,
+                    axis=op.axis,
+                    limit=op.limit,
+                )
             else:
-                input_data = ctx[op.inputs[0].key]
-                value = getattr(op, "value", None)
-                if isinstance(op.value, ENTITY_TYPE):
-                    value = ctx[op.value.key]
-                if not isinstance(input_data, pd.Index):
-                    ctx[op.outputs[0].key] = input_data.fillna(
-                        value=value,
-                        method=op.method,
-                        axis=op.axis,
-                        limit=op.limit,
-                        downcast=op.downcast,
-                    )
-                else:
-                    ctx[op.outputs[0].key] = input_data.fillna(
-                        value=value, downcast=op.downcast
-                    )
-        finally:
-            pd.reset_option("mode.use_inf_as_na")
+                ctx[op.outputs[0].key] = input_data.fillna(value=value)
 
     @classmethod
     def _tile_one_by_one(cls, op):
@@ -490,9 +473,7 @@ def __call__(self, a, value_df=None):
             )
 
 
-def fillna(
-    df, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None
-):
+def fillna(df, value=None, method=None, axis=None, inplace=False, limit=None):
     """
     Fill NA/NaN values using the specified method.
 
@@ -521,10 +502,6 @@ def fillna(
         be partially filled. If method is not specified, this is the
         maximum number of entries along the entire axis where NaNs will be
         filled. Must be greater than 0 if not None.
-    downcast : dict, default is None
-        A dict of item->dtype of what to downcast if possible,
-        or the string 'infer' which will try to downcast to an appropriate
-        equal type (e.g. float64 to int64 if possible).
 
     Returns
     -------
@@ -595,10 +572,6 @@ def fillna(
             % type(value).__name__
         )
 
-    if downcast is not None:
-        raise NotImplementedError(
-            'Currently argument "downcast" is not implemented yet'
-        )
     if limit is not None:
         raise NotImplementedError('Currently argument "limit" is not implemented yet')
 
@@ -607,14 +580,11 @@ def fillna(
     else:
         value_df = None
 
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = FillNA(
         value=value,
         method=method,
         axis=axis,
         limit=limit,
-        downcast=downcast,
-        use_inf_as_na=use_inf_as_na,
         output_types=get_output_types(df),
     )
     out_df = op(df, value_df=value_df)
@@ -624,7 +594,7 @@ def fillna(
         return out_df
 
 
-def ffill(df, axis=None, inplace=False, limit=None, downcast=None):
+def ffill(df, axis=None, inplace=False, limit=None):
     """
     Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
 
@@ -633,12 +603,10 @@ def ffill(df, axis=None, inplace=False, limit=None, downcast=None):
     {klass} or None
         Object with missing values filled or None if ``inplace=True``.
     """
-    return fillna(
-        df, method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
-    )
+    return fillna(df, method="ffill", axis=axis, inplace=inplace, limit=limit)
 
 
-def bfill(df, axis=None, inplace=False, limit=None, downcast=None):
+def bfill(df, axis=None, inplace=False, limit=None):
     """
     Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
 
@@ -647,12 +615,10 @@ def bfill(df, axis=None, inplace=False, limit=None, downcast=None):
     {klass} or None
         Object with missing values filled or None if ``inplace=True``.
     """
-    return fillna(
-        df, method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
-    )
+    return fillna(df, method="bfill", axis=axis, inplace=inplace, limit=limit)
 
 
-def index_fillna(index, value=None, downcast=None):
+def index_fillna(index, value=None):
     """
     Fill NA/NaN values with the specified value.
 
@@ -661,10 +627,6 @@ def index_fillna(index, value=None, downcast=None):
     value : scalar
         Scalar value to use to fill holes (e.g. 0).
         This value cannot be a list-likes.
-    downcast : dict, default is None
-        A dict of item->dtype of what to downcast if possible,
-        or the string 'infer' which will try to downcast to an appropriate
-        equal type (e.g. float64 to int64 if possible).
 
     Returns
     -------
@@ -678,11 +640,8 @@ def index_fillna(index, value=None, downcast=None):
     if isinstance(value, (list, pd.Series, SERIES_TYPE)):
         raise ValueError("'value' must be a scalar, passed: %s" % type(value))
 
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = FillNA(
         value=value,
-        downcast=downcast,
-        use_inf_as_na=use_inf_as_na,
         output_types=get_output_types(index),
     )
     return op(index)
diff --git a/python/xorbits/_mars/dataframe/missing/tests/test_missing.py b/python/xorbits/_mars/dataframe/missing/tests/test_missing.py
index cab62c383..50a7a7578 100644
--- a/python/xorbits/_mars/dataframe/missing/tests/test_missing.py
+++ b/python/xorbits/_mars/dataframe/missing/tests/test_missing.py
@@ -56,8 +56,6 @@ def test_fill_na():
         series.fillna(value=df)
     with pytest.raises(ValueError):
         series.fillna(value=df_raw)
-    with pytest.raises(NotImplementedError):
-        series.fillna(value=series_raw, downcast="infer")
     with pytest.raises(NotImplementedError):
         series.ffill(limit=1)
 
@@ -226,19 +224,10 @@ def test_replace():
     assert r.chunks[0].op.limit is None
 
 
-@pytest.mark.parametrize("inf_as_na", [True, False])
-def test_isna(setup, inf_as_na):
-    from ....config import options
+def test_isna(setup):
     from ..checkna import isna
 
-    old_mars_inf_as_na = options.dataframe.mode.use_inf_as_na
-    options.dataframe.mode.use_inf_as_na = inf_as_na
-    # this option could be changed by mars execution.
-    old_pd_inf_as_na = pd.get_option("mode.use_inf_as_na")
-    pd.options.mode.use_inf_as_na = inf_as_na
-
     # scalars
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     assert isna("dog") == pd.isna("dog")
     assert isna(None) == pd.isna(None)
     assert isna(md.NA) == pd.isna(pd.NA)
@@ -247,47 +236,39 @@ def test_isna(setup, inf_as_na):
     assert isna(type) == pd.isna(type)
 
     # multi index
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     with pytest.raises(NotImplementedError):
         midx = md.MultiIndex()
         isna(midx)
 
     # list
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     l = [1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT]
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     actual = isna(l).execute().fetch()
     expected = pd.isna(l)
     np.testing.assert_array_equal(expected, actual)
 
     # tuple
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     t = (1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT)
     assert not isna(t)
 
     # numpy ndarray
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     narr = np.array((1, 2, 3, np.Inf, np.NaN))
     actual = isna(narr).execute().fetch()
     expected = pd.isna(narr)
     np.testing.assert_array_equal(expected, actual)
 
     # pandas index
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     pi = pd.Index((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
     actual = isna(pi).execute().fetch()
     expected = pd.isna(pi)
     np.testing.assert_array_equal(expected, actual)
 
     # pandas series
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     ps = pd.Series((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
     actual = isna(ps).execute().fetch()
     expected = pd.isna(ps)
     pd.testing.assert_series_equal(expected, actual)
 
     # pandas dataframe
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     pdf = pd.DataFrame(
         {"foo": (1, 2, 3, np.Inf, pd.NA), "bar": (4, 5, 6, np.NaN, pd.NaT)}
     )
@@ -296,7 +277,6 @@ def test_isna(setup, inf_as_na):
     pd.testing.assert_frame_equal(expected, actual)
 
     # mars tensor
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     marr = mt.tensor(narr)
     actual = isna(marr).execute().fetch()
     expected = pd.isna(narr)
@@ -305,7 +285,6 @@ def test_isna(setup, inf_as_na):
     # mars index
     from ...datasource.index import from_pandas as from_pandas_index
 
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     mi = from_pandas_index(pi)
     actual = isna(mi).execute().fetch()
     expected = pd.isna(pi)
@@ -314,7 +293,6 @@ def test_isna(setup, inf_as_na):
     # mars series
     from ...datasource.series import from_pandas as from_pandas_series
 
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     ms = from_pandas_series(ps)
     actual = isna(ms).execute().fetch()
     expected = pd.isna(ps)
@@ -323,29 +301,16 @@ def test_isna(setup, inf_as_na):
     # mars dataframe
     from ...datasource.dataframe import from_pandas as from_pandas_df
 
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     mdf = from_pandas_df(pdf)
     actual = isna(mdf).execute().fetch()
     expected = pd.isna(pdf)
     pd.testing.assert_frame_equal(expected, actual)
 
-    options.dataframe.mode.use_inf_as_na = old_mars_inf_as_na
-    pd.options.mode.use_inf_as_na = old_pd_inf_as_na
-
 
-@pytest.mark.parametrize("inf_as_na", [True, False])
-def test_notna(setup, inf_as_na):
-    from ....config import options
+def test_notna(setup):
     from ..checkna import notna
 
-    old_mars_inf_as_na = options.dataframe.mode.use_inf_as_na
-    options.dataframe.mode.use_inf_as_na = inf_as_na
-    # this option could be changed by mars execution.
-    old_pd_inf_as_na = pd.get_option("mode.use_inf_as_na")
-    pd.options.mode.use_inf_as_na = inf_as_na
-
     # scalars
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     assert notna("dog") == pd.notna("dog")
     assert notna(None) == pd.notna(None)
     assert notna(md.NA) == pd.notna(pd.NA)
@@ -354,46 +319,39 @@ def test_notna(setup, inf_as_na):
     assert notna(type) == pd.notna(type)
 
     # multi index
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     with pytest.raises(NotImplementedError):
         midx = md.MultiIndex()
         notna(midx)
 
     # list
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     l = [1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT]
     actual = notna(l).execute().fetch()
     expected = pd.notna(l)
     np.testing.assert_array_equal(expected, actual)
 
     # tuple
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     t = (1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT)
     assert notna(t)
 
     # numpy ndarray
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     narr = np.array((1, 2, 3, np.Inf, np.NaN))
     actual = notna(narr).execute().fetch()
     expected = pd.notna(narr)
     np.testing.assert_array_equal(expected, actual)
 
     # pandas index
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     pi = pd.Index((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
     actual = notna(pi).execute().fetch()
     expected = pd.notna(pi)
     np.testing.assert_array_equal(expected, actual)
 
     # pandas series
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     ps = pd.Series((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
     actual = notna(ps).execute().fetch()
     expected = pd.notna(ps)
     pd.testing.assert_series_equal(expected, actual)
 
     # pandas dataframe
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     pdf = pd.DataFrame(
         {"foo": (1, 2, 3, np.Inf, pd.NA), "bar": (4, 5, 6, np.NaN, pd.NaT)}
     )
@@ -402,7 +360,6 @@ def test_notna(setup, inf_as_na):
     pd.testing.assert_frame_equal(expected, actual)
 
     # mars tensor
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     marr = mt.tensor(narr)
     actual = notna(marr).execute().fetch()
     expected = pd.notna(narr)
@@ -411,7 +368,6 @@ def test_notna(setup, inf_as_na):
     # mars index
     from ...datasource.index import from_pandas as from_pandas_index
 
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     mi = from_pandas_index(pi)
     actual = notna(mi).execute().fetch()
     expected = pd.notna(pi)
@@ -420,7 +376,6 @@ def test_notna(setup, inf_as_na):
     # mars series
     from ...datasource.series import from_pandas as from_pandas_series
 
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     ms = from_pandas_series(ps)
     actual = notna(ms).execute().fetch()
     expected = pd.notna(ps)
@@ -429,11 +384,7 @@ def test_notna(setup, inf_as_na):
     # mars dataframe
     from ...datasource.dataframe import from_pandas as from_pandas_df
 
-    assert pd.get_option("mode.use_inf_as_na") == inf_as_na
     mdf = from_pandas_df(pdf)
     actual = notna(mdf).execute().fetch()
     expected = pd.notna(pdf)
     pd.testing.assert_frame_equal(expected, actual)
-
-    options.dataframe.mode.use_inf_as_na = old_mars_inf_as_na
-    pd.options.mode.use_inf_as_na = old_pd_inf_as_na
diff --git a/python/xorbits/_mars/dataframe/reduction/aggregation.py b/python/xorbits/_mars/dataframe/reduction/aggregation.py
index dc748dfb3..d9aa5768f 100644
--- a/python/xorbits/_mars/dataframe/reduction/aggregation.py
+++ b/python/xorbits/_mars/dataframe/reduction/aggregation.py
@@ -98,7 +98,6 @@ class DataFrameAggregate(DataFrameOperand, DataFrameOperandMixin):
     axis = AnyField("axis")
     numeric_only = BoolField("numeric_only")
     bool_only = BoolField("bool_only")
-    use_inf_as_na = BoolField("use_inf_as_na")
 
     combine_size = Int32Field("combine_size")
     pre_funcs = ListField("pre_funcs")
@@ -925,45 +924,45 @@ def _cudf_agg(cls, op: "DataFrameAggregate", in_data):
     @redirect_custom_log
     @enter_current_session
     def execute(cls, ctx, op: "DataFrameAggregate"):
-        try:
-            pd.set_option("mode.use_inf_as_na", op.use_inf_as_na)
-            if op.stage == OperandStage.map:
-                cls._execute_map(ctx, op)
-            elif op.stage == OperandStage.combine:
-                cls._execute_combine(ctx, op)
-            elif op.stage == OperandStage.agg:
-                cls._execute_agg(ctx, op)
-            elif not _agg_size_as_series and op.raw_func == "size":
-                xp = cp if op.gpu else np
-                ctx[op.outputs[0].key] = xp.array(
-                    ctx[op.inputs[0].key].agg(op.raw_func, axis=op.axis)
-                ).reshape(op.outputs[0].shape)
+        if op.stage == OperandStage.map:
+            cls._execute_map(ctx, op)
+        elif op.stage == OperandStage.combine:
+            cls._execute_combine(ctx, op)
+        elif op.stage == OperandStage.agg:
+            cls._execute_agg(ctx, op)
+        elif not _agg_size_as_series and op.raw_func == "size":
+            xp = cp if op.gpu else np
+            ctx[op.outputs[0].key] = xp.array(
+                ctx[op.inputs[0].key].agg(op.raw_func, axis=op.axis)
+            ).reshape(op.outputs[0].shape)
+        else:
+            xp = cp if op.gpu else np
+            in_obj = op.inputs[0]
+            in_data = ctx[in_obj.key]
+            in_data = cls._select_dtypes(in_data, op)
+            if isinstance(in_obj, INDEX_CHUNK_TYPE):
+                result = op.func[0](in_data)
+            elif (
+                op.output_types[0] == OutputType.scalar
+                and in_data.shape == (0,)
+                and callable(op.func[0])
+            ):
+                result = op.func[0](in_data)
             else:
-                xp = cp if op.gpu else np
-                in_obj = op.inputs[0]
-                in_data = ctx[in_obj.key]
-                in_data = cls._select_dtypes(in_data, op)
-                if isinstance(in_obj, INDEX_CHUNK_TYPE):
-                    result = op.func[0](in_data)
-                elif (
-                    op.output_types[0] == OutputType.scalar
-                    and in_data.shape == (0,)
-                    and callable(op.func[0])
-                ):
-                    result = op.func[0](in_data)
+                if is_cudf(in_data):
+                    result = cls._cudf_agg(op, in_data)
                 else:
-                    if is_cudf(in_data):
-                        result = cls._cudf_agg(op, in_data)
-                    else:
-                        result = in_data.agg(op.raw_func, axis=op.axis)
-                    if op.outputs[0].ndim == 1:
-                        result = result.astype(op.outputs[0].dtype, copy=False)
+                    result = (
+                        in_data.agg(op.raw_func, axis=op.axis)
+                        if op.raw_func is not None
+                        else in_data.agg(**op.raw_func_kw, axis=op.axis)
+                    )
+                if op.outputs[0].ndim == 1:
+                    result = result.astype(op.outputs[0].dtype, copy=False)
 
-                if op.output_types[0] == OutputType.tensor:
-                    result = xp.array(result)
-                ctx[op.outputs[0].key] = result
-        finally:
-            pd.reset_option("mode.use_inf_as_na")
+            if op.output_types[0] == OutputType.tensor:
+                result = xp.array(result)
+            ctx[op.outputs[0].key] = result
 
 
 def is_funcs_aggregate(func, func_kw=None, ndim=2):
@@ -1068,7 +1067,6 @@ def normalize_reduction_funcs(op, ndim=None):
 
 def aggregate(df, func=None, axis=0, **kw):
     axis = validate_axis(axis, df)
-    use_inf_as_na = kw.pop("_use_inf_as_na", options.dataframe.mode.use_inf_as_na)
     if (
         df.ndim == 2
         and isinstance(func, dict)
@@ -1107,7 +1105,6 @@ def aggregate(df, func=None, axis=0, **kw):
         combine_size=combine_size,
         numeric_only=numeric_only,
         bool_only=bool_only,
-        use_inf_as_na=use_inf_as_na,
     )
 
     return op(df, output_type=output_type, dtypes=dtypes, index=index)
diff --git a/python/xorbits/_mars/dataframe/reduction/all.py b/python/xorbits/_mars/dataframe/reduction/all.py
index e7a3e2569..f8a5d1409 100644
--- a/python/xorbits/_mars/dataframe/reduction/all.py
+++ b/python/xorbits/_mars/dataframe/reduction/all.py
@@ -17,7 +17,6 @@
 import pandas as pd
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from .core import (
     DATAFRAME_TYPE,
@@ -86,7 +85,6 @@ def all_series(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameAll(
         axis=axis,
         skipna=skipna,
@@ -94,7 +92,6 @@ def all_series(
         bool_only=bool_only,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(series)
@@ -109,7 +106,6 @@ def all_dataframe(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     output_types = [OutputType.series] if axis is not None else [OutputType.scalar]
     op = DataFrameAll(
         axis=axis,
@@ -118,13 +114,11 @@ def all_dataframe(
         bool_only=bool_only,
         combine_size=combine_size,
         output_types=output_types,
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
 
 
 def all_index(idx):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
-    op = DataFrameAll(output_types=[OutputType.scalar], use_inf_as_na=use_inf_as_na)
+    op = DataFrameAll(output_types=[OutputType.scalar])
     return op(idx)
diff --git a/python/xorbits/_mars/dataframe/reduction/any.py b/python/xorbits/_mars/dataframe/reduction/any.py
index d0bc85b30..b653a461e 100644
--- a/python/xorbits/_mars/dataframe/reduction/any.py
+++ b/python/xorbits/_mars/dataframe/reduction/any.py
@@ -17,7 +17,6 @@
 import pandas as pd
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from .core import (
     DATAFRAME_TYPE,
@@ -86,7 +85,6 @@ def any_series(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameAny(
         axis=axis,
         skipna=skipna,
@@ -94,7 +92,6 @@ def any_series(
         bool_only=bool_only,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(series)
@@ -109,7 +106,6 @@ def any_dataframe(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     output_types = [OutputType.series] if axis is not None else [OutputType.scalar]
     op = DataFrameAny(
         axis=axis,
@@ -118,13 +114,11 @@ def any_dataframe(
         bool_only=bool_only,
         combine_size=combine_size,
         output_types=output_types,
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
 
 
 def any_index(index):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
-    op = DataFrameAny(output_types=[OutputType.scalar], use_inf_as_na=use_inf_as_na)
+    op = DataFrameAny(output_types=[OutputType.scalar])
     return op(index)
diff --git a/python/xorbits/_mars/dataframe/reduction/core.py b/python/xorbits/_mars/dataframe/reduction/core.py
index 21e33d662..c82b652cd 100644
--- a/python/xorbits/_mars/dataframe/reduction/core.py
+++ b/python/xorbits/_mars/dataframe/reduction/core.py
@@ -65,7 +65,6 @@ class DataFrameReductionOperand(DataFrameOperand):
     _numeric_only = BoolField("numeric_only")
     _bool_only = BoolField("bool_only")
     _min_count = Int32Field("min_count")
-    _use_inf_as_na = BoolField("use_inf_as_na")
     _method = StringField("method")
 
     _dtype = DataTypeField("dtype")
@@ -84,7 +83,6 @@ def __init__(
         gpu=None,
         sparse=None,
         output_types=None,
-        use_inf_as_na=None,
         method=None,
         **kw,
     ):
@@ -100,7 +98,6 @@ def __init__(
             gpu=gpu,
             sparse=sparse,
             _output_types=output_types,
-            _use_inf_as_na=use_inf_as_na,
             _method=method,
             **kw,
         )
@@ -137,10 +134,6 @@ def dtype(self):
     def combine_size(self):
         return self._combine_size
 
-    @property
-    def use_inf_as_na(self):
-        return self._use_inf_as_na
-
     @property
     def is_atomic(self):
         return False
@@ -163,7 +156,6 @@ def get_reduction_args(self, axis=None):
 class DataFrameCumReductionOperand(DataFrameOperand):
     _axis = AnyField("axis")
     _skipna = BoolField("skipna")
-    _use_inf_as_na = BoolField("use_inf_as_na")
 
     _dtype = DataTypeField("dtype")
 
@@ -175,7 +167,6 @@ def __init__(
         gpu=None,
         sparse=None,
         output_types=None,
-        use_inf_as_na=None,
         **kw,
     ):
         super().__init__(
@@ -185,7 +176,6 @@ def __init__(
             gpu=gpu,
             sparse=sparse,
             _output_types=output_types,
-            _use_inf_as_na=use_inf_as_na,
             **kw,
         )
 
@@ -201,10 +191,6 @@ def skipna(self):
     def dtype(self):
         return self._dtype
 
-    @property
-    def use_inf_as_na(self):
-        return self._use_inf_as_na
-
 
 def _default_agg_fun(value, func_name=None, **kw):
     if value.ndim == 1:
@@ -612,14 +598,10 @@ def _execute_combine(cls, ctx, op):
 
     @classmethod
     def execute(cls, ctx, op):
-        try:
-            pd.set_option("mode.use_inf_as_na", op.use_inf_as_na)
-            if op.stage == OperandStage.map:
-                return cls._execute_map(ctx, op)
-            else:
-                return cls._execute_combine(ctx, op)
-        finally:
-            pd.reset_option("mode.use_inf_as_na")
+        if op.stage == OperandStage.map:
+            return cls._execute_map(ctx, op)
+        else:
+            return cls._execute_combine(ctx, op)
 
     def _call_dataframe(self, df):
         axis = getattr(self, "axis", None) or 0
diff --git a/python/xorbits/_mars/dataframe/reduction/count.py b/python/xorbits/_mars/dataframe/reduction/count.py
index 41b4047b4..71012db21 100644
--- a/python/xorbits/_mars/dataframe/reduction/count.py
+++ b/python/xorbits/_mars/dataframe/reduction/count.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
 
@@ -40,13 +39,11 @@ def count(value):
 
 
 def count_series(series, level=None, combine_size=None, **kw):
-    use_inf_as_na = kw.pop("_use_inf_as_na", options.dataframe.mode.use_inf_as_na)
     method = kw.pop("method", None)
     op = DataFrameCount(
         level=level,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(series)
@@ -55,7 +52,6 @@ def count_series(series, level=None, combine_size=None, **kw):
 def count_dataframe(
     df, axis=0, level=None, numeric_only=False, combine_size=None, **kw
 ):
-    use_inf_as_na = kw.pop("_use_inf_as_na", options.dataframe.mode.use_inf_as_na)
     method = kw.pop("method", None)
     op = DataFrameCount(
         axis=axis,
@@ -63,7 +59,6 @@ def count_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/cummax.py b/python/xorbits/_mars/dataframe/reduction/cummax.py
index 238d76196..18e248572 100644
--- a/python/xorbits/_mars/dataframe/reduction/cummax.py
+++ b/python/xorbits/_mars/dataframe/reduction/cummax.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from .core import DataFrameCumReductionMixin, DataFrameCumReductionOperand
 
 
@@ -24,11 +23,9 @@ class DataFrameCummax(DataFrameCumReductionOperand, DataFrameCumReductionMixin):
 
 
 def cummax(df, axis=None, skipna=True):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameCummax(
         axis=axis,
         skipna=skipna,
         output_types=df.op.output_types,
-        use_inf_as_na=use_inf_as_na,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/cummin.py b/python/xorbits/_mars/dataframe/reduction/cummin.py
index 2ee203770..559b258be 100644
--- a/python/xorbits/_mars/dataframe/reduction/cummin.py
+++ b/python/xorbits/_mars/dataframe/reduction/cummin.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from .core import DataFrameCumReductionMixin, DataFrameCumReductionOperand
 
 
@@ -24,11 +23,9 @@ class DataFrameCummin(DataFrameCumReductionOperand, DataFrameCumReductionMixin):
 
 
 def cummin(df, axis=None, skipna=True):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameCummin(
         axis=axis,
         skipna=skipna,
         output_types=df.op.output_types,
-        use_inf_as_na=use_inf_as_na,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/cumprod.py b/python/xorbits/_mars/dataframe/reduction/cumprod.py
index d251198ad..9e52f33e4 100644
--- a/python/xorbits/_mars/dataframe/reduction/cumprod.py
+++ b/python/xorbits/_mars/dataframe/reduction/cumprod.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from .core import DataFrameCumReductionMixin, DataFrameCumReductionOperand
 
 
@@ -24,11 +23,9 @@ class DataFrameCumprod(DataFrameCumReductionOperand, DataFrameCumReductionMixin)
 
 
 def cumprod(df, axis=None, skipna=True):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameCumprod(
         axis=axis,
         skipna=skipna,
         output_types=df.op.output_types,
-        use_inf_as_na=use_inf_as_na,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/cumsum.py b/python/xorbits/_mars/dataframe/reduction/cumsum.py
index 591c85302..f6e74ef4b 100644
--- a/python/xorbits/_mars/dataframe/reduction/cumsum.py
+++ b/python/xorbits/_mars/dataframe/reduction/cumsum.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from .core import DataFrameCumReductionMixin, DataFrameCumReductionOperand
 
 
@@ -24,11 +23,9 @@ class DataFrameCumsum(DataFrameCumReductionOperand, DataFrameCumReductionMixin):
 
 
 def cumsum(df, axis=None, skipna=True):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameCumsum(
         axis=axis,
         skipna=skipna,
         output_types=df.op.output_types,
-        use_inf_as_na=use_inf_as_na,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/custom_reduction.py b/python/xorbits/_mars/dataframe/reduction/custom_reduction.py
index 59ee88830..a89361df6 100644
--- a/python/xorbits/_mars/dataframe/reduction/custom_reduction.py
+++ b/python/xorbits/_mars/dataframe/reduction/custom_reduction.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from ...serialization.serializables import AnyField
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
@@ -35,12 +34,10 @@ def get_reduction_args(self, axis=None):
 
 
 def build_custom_reduction_result(df, custom_reduction_obj, method=None):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     output_type = OutputType.series if df.ndim == 2 else OutputType.scalar
     op = DataFrameCustomReduction(
         custom_reduction=custom_reduction_obj,
         output_types=[output_type],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/kurtosis.py b/python/xorbits/_mars/dataframe/reduction/kurtosis.py
index 5bb2702e9..b32055953 100644
--- a/python/xorbits/_mars/dataframe/reduction/kurtosis.py
+++ b/python/xorbits/_mars/dataframe/reduction/kurtosis.py
@@ -16,7 +16,6 @@
 import numpy as np
 
 from ... import opcodes
-from ...config import options
 from ...core import ENTITY_TYPE, OutputType
 from ...serialization.serializables import BoolField
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
@@ -83,7 +82,6 @@ def kurt_series(
     fisher=True,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameKurtosis(
         axis=axis,
         skipna=skipna,
@@ -92,7 +90,6 @@ def kurt_series(
         bias=bias,
         fisher=fisher,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -109,7 +106,6 @@ def kurt_dataframe(
     fisher=True,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameKurtosis(
         axis=axis,
         skipna=skipna,
@@ -119,7 +115,6 @@ def kurt_dataframe(
         fisher=fisher,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/max.py b/python/xorbits/_mars/dataframe/reduction/max.py
index e04fa12cc..5e8d3dd34 100644
--- a/python/xorbits/_mars/dataframe/reduction/max.py
+++ b/python/xorbits/_mars/dataframe/reduction/max.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
 
@@ -28,15 +27,21 @@ def is_atomic(self):
         return True
 
 
-def max_series(df, axis=None, skipna=True, level=None, combine_size=None, method=None):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
+def max_series(
+    df,
+    axis=None,
+    skipna=True,
+    level=None,
+    combine_size=None,
+    method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
+):
     op = DataFrameMax(
         axis=axis,
         skipna=skipna,
         level=level,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -50,8 +55,8 @@ def max_dataframe(
     numeric_only=None,
     combine_size=None,
     method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameMax(
         axis=axis,
         skipna=skipna,
@@ -59,18 +64,15 @@ def max_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
 
 
 def max_index(df, axis=None, skipna=True):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameMax(
         axis=axis,
         skipna=skipna,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/mean.py b/python/xorbits/_mars/dataframe/reduction/mean.py
index e471e66ae..4d22d608a 100644
--- a/python/xorbits/_mars/dataframe/reduction/mean.py
+++ b/python/xorbits/_mars/dataframe/reduction/mean.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
 
@@ -33,15 +32,21 @@ def mean(x):
         return mean
 
 
-def mean_series(df, axis=None, skipna=True, level=None, combine_size=None, method=None):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
+def mean_series(
+    df,
+    axis=None,
+    skipna=True,
+    level=None,
+    combine_size=None,
+    method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
+):
     op = DataFrameMean(
         axis=axis,
         skipna=skipna,
         level=level,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -55,8 +60,8 @@ def mean_dataframe(
     numeric_only=None,
     combine_size=None,
     method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameMean(
         axis=axis,
         skipna=skipna,
@@ -64,7 +69,6 @@ def mean_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/min.py b/python/xorbits/_mars/dataframe/reduction/min.py
index d514c43b7..0bcb91634 100644
--- a/python/xorbits/_mars/dataframe/reduction/min.py
+++ b/python/xorbits/_mars/dataframe/reduction/min.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
 
@@ -28,15 +27,21 @@ def is_atomic(self):
         return True
 
 
-def min_series(df, axis=None, skipna=True, level=None, combine_size=None, method=None):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
+def min_series(
+    df,
+    axis=None,
+    skipna=True,
+    level=None,
+    combine_size=None,
+    method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
+):
     op = DataFrameMin(
         axis=axis,
         skipna=skipna,
         level=level,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -50,8 +55,8 @@ def min_dataframe(
     numeric_only=None,
     combine_size=None,
     method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameMin(
         axis=axis,
         skipna=skipna,
@@ -59,18 +64,15 @@ def min_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
 
 
 def min_index(df, axis=None, skipna=True):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameMin(
         axis=axis,
         skipna=skipna,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/prod.py b/python/xorbits/_mars/dataframe/reduction/prod.py
index c84b1ce3b..217c34994 100644
--- a/python/xorbits/_mars/dataframe/reduction/prod.py
+++ b/python/xorbits/_mars/dataframe/reduction/prod.py
@@ -16,7 +16,6 @@
 import numpy as np
 
 from ... import opcodes
-from ...config import options
 from ...core import OutputType
 from .aggregation import where_function
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
@@ -48,7 +47,6 @@ def prod(value):
 def prod_series(
     df, axis=None, skipna=True, level=None, min_count=0, combine_size=None, method=None
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameProd(
         axis=axis,
         skipna=skipna,
@@ -56,7 +54,6 @@ def prod_series(
         min_count=min_count,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -72,7 +69,6 @@ def prod_dataframe(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameProd(
         axis=axis,
         skipna=skipna,
@@ -81,7 +77,6 @@ def prod_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/sem.py b/python/xorbits/_mars/dataframe/reduction/sem.py
index 5c6d274ed..f434c4e9b 100644
--- a/python/xorbits/_mars/dataframe/reduction/sem.py
+++ b/python/xorbits/_mars/dataframe/reduction/sem.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from ...serialization.serializables import Int32Field
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
@@ -48,7 +47,6 @@ def sem(x):
 def sem_series(
     series, axis=None, skipna=True, level=None, ddof=1, combine_size=None, method=None
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameSem(
         axis=axis,
         skipna=skipna,
@@ -56,7 +54,6 @@ def sem_series(
         ddof=ddof,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(series)
@@ -72,7 +69,6 @@ def sem_dataframe(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameSem(
         axis=axis,
         skipna=skipna,
@@ -81,7 +77,6 @@ def sem_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/skew.py b/python/xorbits/_mars/dataframe/reduction/skew.py
index c7e16a5a1..7b9ec6eae 100644
--- a/python/xorbits/_mars/dataframe/reduction/skew.py
+++ b/python/xorbits/_mars/dataframe/reduction/skew.py
@@ -16,7 +16,6 @@
 import numpy as np
 
 from ... import opcodes
-from ...config import options
 from ...core import ENTITY_TYPE, OutputType
 from ...serialization.serializables import BoolField
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
@@ -68,7 +67,6 @@ def skew(x):
 def skew_series(
     df, axis=None, skipna=True, level=None, combine_size=None, bias=False, method=None
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameSkew(
         axis=axis,
         skipna=skipna,
@@ -76,7 +74,6 @@ def skew_series(
         combine_size=combine_size,
         bias=bias,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -92,7 +89,6 @@ def skew_dataframe(
     bias=False,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameSkew(
         axis=axis,
         skipna=skipna,
@@ -101,7 +97,6 @@ def skew_dataframe(
         bias=bias,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/sum.py b/python/xorbits/_mars/dataframe/reduction/sum.py
index 1b3a4dd7b..ff8c81b6c 100644
--- a/python/xorbits/_mars/dataframe/reduction/sum.py
+++ b/python/xorbits/_mars/dataframe/reduction/sum.py
@@ -16,7 +16,6 @@
 import numpy as np
 
 from ... import opcodes
-from ...config import options
 from ...core import OutputType
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
 
@@ -47,9 +46,15 @@ def sum_(value):
 
 
 def sum_series(
-    df, axis=None, skipna=True, level=None, min_count=0, combine_size=None, method=None
+    df,
+    axis=None,
+    skipna=True,
+    level=None,
+    min_count=0,
+    combine_size=None,
+    method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameSum(
         axis=axis,
         skipna=skipna,
@@ -57,7 +62,6 @@ def sum_series(
         min_count=min_count,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
@@ -72,8 +76,8 @@ def sum_dataframe(
     numeric_only=None,
     combine_size=None,
     method=None,
+    **kwargs,  # kwargs for compatible with numpy reduction
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameSum(
         axis=axis,
         skipna=skipna,
@@ -82,7 +86,6 @@ def sum_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py b/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py
index cfcd70192..53b121805 100644
--- a/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py
+++ b/python/xorbits/_mars/dataframe/reduction/tests/test_reduction_execution.py
@@ -771,6 +771,37 @@ def test_dataframe_aggregate(setup, check_ref_counts):
     ]
     data = pd.DataFrame(np.random.rand(20, 20))
 
+    def realized_volatility(series):
+        print(series)
+        return np.sqrt(np.sum(series**2))
+
+    df = md.DataFrame(data)
+    result = df.agg(realized_volatility)
+    pd.testing.assert_series_equal(
+        result.execute().fetch(), data.agg(realized_volatility)
+    )
+
+    def trip_type(x):
+        return np.min(x)
+
+    df = md.DataFrame(data)
+    result = df.agg(trip_type)
+    pd.testing.assert_series_equal(result.execute().fetch(), data.agg(trip_type))
+
+    def trip_type_max(x):
+        return np.max(x)
+
+    df = md.DataFrame(data)
+    result = df.agg(trip_type_max)
+    pd.testing.assert_series_equal(result.execute().fetch(), data.agg(trip_type_max))
+
+    def trip_type_mean(x):
+        return np.mean(x)
+
+    df = md.DataFrame(data)
+    result = df.agg(trip_type_mean)
+    pd.testing.assert_series_equal(result.execute().fetch(), data.agg(trip_type_mean))
+
     df = md.DataFrame(data)
     result = df.agg(all_aggs)
     pd.testing.assert_frame_equal(result.execute().fetch(), data.agg(all_aggs))
@@ -1138,3 +1169,34 @@ def g3(x):
         s.agg((g1, g2, g3)), ms.agg((g1, g2, g3)).execute().fetch()
     )
     pd.testing.assert_series_equal(s.agg((g1, g1)), ms.agg((g1, g1)).execute().fetch())
+
+
+@pytest.mark.parametrize("chunk_size", [None, 1, 5, 10])
+def test_agg_with_kwargs(setup, chunk_size):
+    rs = np.random.RandomState(0)
+    df = pd.DataFrame(
+        {
+            "a": rs.choice([1, 3, 8], size=100),
+            "b": rs.choice([201.8, 155.7, 95.7], size=100),
+            "c": rs.choice([1, np.nan, 3], size=100),
+        },
+    )
+    mdf = md.DataFrame(df, chunk_size=chunk_size)
+    res = mdf.agg(a=("a", "sum"))
+    pd.testing.assert_frame_equal(res.execute().fetch(), df.agg(a=("a", "sum")))
+
+    res = mdf.agg(x=("a", "sum"), y=("b", "mean"))
+    pd.testing.assert_frame_equal(
+        res.execute().fetch(), df.agg(x=("a", "sum"), y=("b", "mean"))
+    )
+
+    res = mdf.agg(x=("a", "mean"), y=("c", sum))
+    pd.testing.assert_frame_equal(
+        res.execute().fetch(), df.agg(x=("a", "mean"), y=("c", sum))
+    )
+
+    def g(x):
+        return x.sum() - (x * 3).sum()
+
+    res = mdf.agg(g=("b", g))
+    pd.testing.assert_frame_equal(res.execute().fetch(), df.agg(g=("b", g)))
diff --git a/python/xorbits/_mars/dataframe/reduction/var.py b/python/xorbits/_mars/dataframe/reduction/var.py
index e59d5fa23..f7e3acbf4 100644
--- a/python/xorbits/_mars/dataframe/reduction/var.py
+++ b/python/xorbits/_mars/dataframe/reduction/var.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 from ... import opcodes as OperandDef
-from ...config import options
 from ...core import OutputType
 from ...serialization.serializables import Int32Field
 from .core import DataFrameReductionMixin, DataFrameReductionOperand
@@ -51,7 +50,6 @@ def var(x):
 def var_series(
     series, axis=None, skipna=True, level=None, ddof=1, combine_size=None, method=None
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameVar(
         axis=axis,
         skipna=skipna,
@@ -59,7 +57,6 @@ def var_series(
         ddof=ddof,
         combine_size=combine_size,
         output_types=[OutputType.scalar],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(series)
@@ -75,7 +72,6 @@ def var_dataframe(
     combine_size=None,
     method=None,
 ):
-    use_inf_as_na = options.dataframe.mode.use_inf_as_na
     op = DataFrameVar(
         axis=axis,
         skipna=skipna,
@@ -84,7 +80,6 @@ def var_dataframe(
         numeric_only=numeric_only,
         combine_size=combine_size,
         output_types=[OutputType.series],
-        use_inf_as_na=use_inf_as_na,
         method=method,
     )
     return op(df)
diff --git a/python/xorbits/_mars/dataframe/tests/test_core.py b/python/xorbits/_mars/dataframe/tests/test_core.py
index af061ff4f..2ca4b0777 100644
--- a/python/xorbits/_mars/dataframe/tests/test_core.py
+++ b/python/xorbits/_mars/dataframe/tests/test_core.py
@@ -442,3 +442,27 @@ def test_mars_tensor_magic(setup):
     np.testing.assert_array_equal(expected, actual)
     with pytest.raises(ValueError, match="could not convert string to float"):
         DataFrame(expected).__mars_tensor__(dtype="float64").execute()
+
+
+def test_series_and_index_array(setup):
+    data = np.random.rand(10)
+    series = Series(data).execute()
+
+    array = np.array(series)
+    np.testing.assert_array_equal(array, data)
+
+    df = pd.DataFrame({"a": [1, 2], "b": ["foo", "bar"]})
+    xdf = DataFrame(df)
+    index = xdf.index.execute()
+    np.testing.assert_array_equal(np.array(df.index), np.array(index))
+
+    arrays = [
+        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+        ["one", "two", "one", "two", "one", "two", "one", "two"],
+    ]
+    tuples = list(zip(*arrays))
+    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    s = pd.Series(np.random.randn(8), index=index)
+    xs = Series(s).index.execute()
+
+    np.testing.assert_array_equal(np.array(s.index), np.array(xs))
diff --git a/python/xorbits/_mars/deploy/oscar/base_config.yml b/python/xorbits/_mars/deploy/oscar/base_config.yml
index 99754f1bf..51305f766 100644
--- a/python/xorbits/_mars/deploy/oscar/base_config.yml
+++ b/python/xorbits/_mars/deploy/oscar/base_config.yml
@@ -56,6 +56,15 @@ scheduling:
     # Max number of concurrent speculative run for a subtask.
     max_concurrent_run: 3
   subtask_cancel_timeout: 5
+  stage_monitor:
+    enable_check: false
+    refresh_time: 3
+    prepare_data_timeout: 300
+    request_quota_timeout: 300
+    acquire_slot_timeout: 300
+    execution_timeout: null
+    release_slot_timeout: 300
+    finish_timeout: 300
 metrics:
   backend: console
   # If backend is prometheus, then we can add prometheus config as follows:
diff --git a/python/xorbits/_mars/deploy/oscar/session.py b/python/xorbits/_mars/deploy/oscar/session.py
index 2fe2d10dd..8e1ac5b09 100644
--- a/python/xorbits/_mars/deploy/oscar/session.py
+++ b/python/xorbits/_mars/deploy/oscar/session.py
@@ -502,6 +502,17 @@ def decref(self, *tileables_keys):
             Tileables' keys
         """
 
+    @abstractmethod
+    def incref(self, *tileables_keys):
+        """
+        Incref tileables.
+
+        Parameters
+        ----------
+        tileables_keys : list
+            Tileables' keys
+        """
+
     @abstractmethod
     def _get_ref_counts(self) -> Dict[str, int]:
         """
@@ -960,10 +971,19 @@ async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
     def _get_to_fetch_tileable(
         self, tileable: TileableType
     ) -> Tuple[TileableType, List[Union[slice, Integral]]]:
-        from ...dataframe.indexing.iloc import DataFrameIlocGetItem, SeriesIlocGetItem
+        from ...dataframe.indexing.iloc import (
+            DataFrameIlocGetItem,
+            IndexIlocGetItem,
+            SeriesIlocGetItem,
+        )
         from ...tensor.indexing import TensorIndex
 
-        slice_op_types = TensorIndex, DataFrameIlocGetItem, SeriesIlocGetItem
+        slice_op_types = (
+            TensorIndex,
+            DataFrameIlocGetItem,
+            SeriesIlocGetItem,
+            IndexIlocGetItem,
+        )
 
         if hasattr(tileable, "data"):
             tileable = tileable.data
@@ -1200,6 +1220,10 @@ async def decref(self, *tileable_keys):
         logger.debug("Decref tileables on client: %s", tileable_keys)
         return await self._lifecycle_api.decref_tileables(list(tileable_keys))
 
+    async def incref(self, *tileable_keys):
+        logger.debug("Incref tileables on client: %s", tileable_keys)
+        return await self._lifecycle_api.incref_tileables(list(tileable_keys))
+
     async def _get_ref_counts(self) -> Dict[str, int]:
         return await self._lifecycle_api.get_all_chunk_ref_counts()
 
@@ -1623,6 +1647,11 @@ def fetch_infos(self, *tileables, fields, **kwargs) -> list:
     def decref(self, *tileables_keys):
         pass  # pragma: no cover
 
+    @implements(AbstractSyncSession.incref)
+    @_delegate_to_isolated_session
+    def incref(self, *tileables_keys):
+        pass  # pragma: no cover
+
     @implements(AbstractSyncSession._get_ref_counts)
     @_delegate_to_isolated_session
     def _get_ref_counts(self) -> Dict[str, int]:
diff --git a/python/xorbits/_mars/learn/contrib/lightgbm/tests/test_classifier.py b/python/xorbits/_mars/learn/contrib/lightgbm/tests/test_classifier.py
index 7e226b786..d060e5dd7 100644
--- a/python/xorbits/_mars/learn/contrib/lightgbm/tests/test_classifier.py
+++ b/python/xorbits/_mars/learn/contrib/lightgbm/tests/test_classifier.py
@@ -156,7 +156,7 @@ def test_local_classifier_from_to_parquet(setup):
         df.iloc[:500].to_parquet(os.path.join(d, "data", "data1.parquet"))
         df.iloc[500:].to_parquet(os.path.join(d, "data", "data2.parquet"))
 
-        df = md.read_parquet(data_dir)
+        df = md.read_parquet(data_dir, use_arrow_dtype=False)
         model = LGBMClassifier()
         model.load_model(classifier)
         result = model.predict(df, run=False)
@@ -164,7 +164,12 @@ def test_local_classifier_from_to_parquet(setup):
 
         r.execute()
 
-        ret = md.read_parquet(result_dir).to_pandas().iloc[:, 0].to_numpy()
+        ret = (
+            md.read_parquet(result_dir, use_arrow_dtype=False)
+            .to_pandas()
+            .iloc[:, 0]
+            .to_numpy()
+        )
         expected = classifier.predict(X)
         expected = np.stack([1 - expected, expected]).argmax(axis=0)
         np.testing.assert_array_equal(ret, expected)
diff --git a/python/xorbits/_mars/learn/contrib/xgboost/tests/test_classifier.py b/python/xorbits/_mars/learn/contrib/xgboost/tests/test_classifier.py
index e386ee106..98f16ef2a 100644
--- a/python/xorbits/_mars/learn/contrib/xgboost/tests/test_classifier.py
+++ b/python/xorbits/_mars/learn/contrib/xgboost/tests/test_classifier.py
@@ -151,7 +151,7 @@ def test_local_classifier_from_to_parquet(setup):
         df.iloc[:500].to_parquet(os.path.join(d, "data", "data1.parquet"))
         df.iloc[500:].to_parquet(os.path.join(d, "data", "data2.parquet"))
 
-        df = md.read_parquet(data_dir).set_index("id")
+        df = md.read_parquet(data_dir, use_arrow_dtype=False).set_index("id")
         model = XGBClassifier()
         model.load_model(m_name)
         result = model.predict(df, run=False)
@@ -160,7 +160,12 @@ def test_local_classifier_from_to_parquet(setup):
         # tiles to ensure no iterative tiling exists
         r.execute()
 
-        ret = md.read_parquet(result_dir).to_pandas().iloc[:, 0].to_numpy()
+        ret = (
+            md.read_parquet(result_dir, use_arrow_dtype=False)
+            .to_pandas()
+            .iloc[:, 0]
+            .to_numpy()
+        )
         model2 = xgboost.XGBClassifier()
         model2.load_model(m_name)
         expected = model2.predict(X)
diff --git a/python/xorbits/_mars/learn/linear_model/_base.py b/python/xorbits/_mars/learn/linear_model/_base.py
index bf1e27b82..0b6b9510e 100644
--- a/python/xorbits/_mars/learn/linear_model/_base.py
+++ b/python/xorbits/_mars/learn/linear_model/_base.py
@@ -302,7 +302,7 @@ def fit(self, X, y, sample_weight=None):
                 self.coef_.execute()
             except LinAlgError:
                 # TODO: implement linalg.lstsq first
-                raise NotImplementedError("Does not support sigular matrix!")
+                raise NotImplementedError("Does not support singular matrix!")
 
         if y.ndim == 1:
             self.coef_ = mt.ravel(self.coef_)
diff --git a/python/xorbits/_mars/learn/linear_model/tests/test_base.py b/python/xorbits/_mars/learn/linear_model/tests/test_base.py
index eaf32f0ef..942a5c342 100644
--- a/python/xorbits/_mars/learn/linear_model/tests/test_base.py
+++ b/python/xorbits/_mars/learn/linear_model/tests/test_base.py
@@ -52,7 +52,7 @@ def test_linear_regression(setup):
     assert_array_almost_equal(reg.predict(X), model.predict(X))
 
     # Regular model fitting, #samples <= 2, # features < 2
-    error_msg = re.escape("Does not support sigular matrix!")
+    error_msg = re.escape("Does not support singular matrix!")
 
     X = [[1], [2]]
     Y = [1, 2]
@@ -68,7 +68,7 @@ def test_linear_regression(setup):
     assert_array_almost_equal(reg.predict(X), model.predict(X))
 
     # Extra case #1: singular matrix, degenerate input
-    error_msg = re.escape("Does not support sigular matrix!")
+    error_msg = re.escape("Does not support singular matrix!")
 
     X = [[1]]
     Y = [0]
diff --git a/python/xorbits/_mars/learn/metrics/pairwise/pairwise.py b/python/xorbits/_mars/learn/metrics/pairwise/pairwise.py
index 8353cff39..f1e650f70 100644
--- a/python/xorbits/_mars/learn/metrics/pairwise/pairwise.py
+++ b/python/xorbits/_mars/learn/metrics/pairwise/pairwise.py
@@ -72,7 +72,7 @@
     "precomputed": None,  # HACK: precomputed is always allowed, never called
 }
 
-# These distances recquire boolean tensors, when using mars.tensor.spatial.distance
+# These distances require boolean tensors, when using mars.tensor.spatial.distance
 PAIRWISE_BOOLEAN_FUNCTIONS = [
     "dice",
     "jaccard",
diff --git a/python/xorbits/_mars/learn/neighbors/base.py b/python/xorbits/_mars/learn/neighbors/base.py
index b1f1b21f4..408dd9c8e 100644
--- a/python/xorbits/_mars/learn/neighbors/base.py
+++ b/python/xorbits/_mars/learn/neighbors/base.py
@@ -24,7 +24,6 @@
 from ..metrics import pairwise_distances_topk
 from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
 from ..utils import check_array
-from ..utils.core import sklearn_version
 from ..utils.validation import check_is_fitted
 from ._ball_tree import SklearnBallTree, ball_tree_query, create_ball_tree
 from ._faiss import METRIC_TO_FAISS_METRIC_TYPE, build_faiss_index, faiss_query
@@ -33,12 +32,8 @@
 from ._proxima import METRIC_TO_PROXIMA_METRIC_TYPE, build_proxima_index, proxima_query
 
 VALID_METRICS = dict(
-    ball_tree=SklearnBallTree.valid_metrics()
-    if sklearn_version() >= "1.3.0"
-    else SklearnBallTree.valid_metrics,
-    kd_tree=SklearnKDTree.valid_metrics()
-    if sklearn_version() >= "1.3.0"
-    else SklearnKDTree.valid_metrics,
+    ball_tree=SklearnBallTree.valid_metrics,
+    kd_tree=SklearnKDTree.valid_metrics,
     # The following list comes from the
     # sklearn.metrics.pairwise doc string
     brute=(
diff --git a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py
index a90544a61..9b7c77e8e 100644
--- a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py
+++ b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py
@@ -55,9 +55,17 @@ def _get_successor_required_columns(self, data: TileableData) -> Set[Any]:
         """
         successors = self._get_successors(data)
         if successors:
-            return set().union(
+            res = set().union(
                 *[self._context[successor][data] for successor in successors]
             )
+            # When getting the required columns of a DataFrameIndex node, we need to consider itself.
+            if (
+                isinstance(data, BaseDataFrameData)
+                and isinstance(data.op, DataFrameIndex)
+                and len(data.dtypes) > 0
+            ):
+                res = res.union(set(data.dtypes.index))
+            return res
         else:
             return self._get_all_columns(data)
 
diff --git a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/input_column_selector.py b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/input_column_selector.py
index dda97f7f0..97e866e95 100644
--- a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/input_column_selector.py
+++ b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/input_column_selector.py
@@ -164,9 +164,10 @@ def df_groupby_agg_select_function(
     ret = {}
     # group by a series
     groupby_series = False
-    if isinstance(by, list) and len(by) == 1 and isinstance(by[0], BaseSeriesData):
+    if isinstance(by, list) and all([isinstance(_by, BaseSeriesData) for _by in by]):
         groupby_series = True
-        ret[by[0]] = {by[0].name}
+        for _by in by:
+            ret[_by] = {_by.name}
 
     if isinstance(inp, BaseSeriesData):
         ret[inp] = {inp.name}
diff --git a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_column_pruning.py b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_column_pruning.py
index 7158ad6f4..9e869f48b 100644
--- a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_column_pruning.py
+++ b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_column_pruning.py
@@ -15,6 +15,7 @@
 import os
 import tempfile
 
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -32,6 +33,7 @@
 from ......dataframe.indexing.getitem import DataFrameIndex
 from ......dataframe.indexing.setitem import DataFrameSetitem
 from ......dataframe.merge import DataFrameMerge
+from ......dataframe.utils import PD_VERSION_GREATER_THAN_2_10
 from ......optimization.logical.tileable import optimize
 from ......tensor.core import TensorData
 from ......tensor.datasource import ArrayDataSource
@@ -331,6 +333,9 @@ def test_merge_then_groupby_apply(setup, gen_data2):
 
     raw1 = pd.read_parquet(file_path)
     raw2 = pd.read_parquet(file_path2)
+    if PD_VERSION_GREATER_THAN_2_10:
+        raw1 = raw1.convert_dtypes(dtype_backend="pyarrow")
+        raw2 = raw2.convert_dtypes(dtype_backend="pyarrow")
     expected = (
         (
             ((raw1 + 1) * 2).merge(raw2, left_on=["c1", "c3"], right_on=["cc2", "cc4"])[
@@ -387,6 +392,8 @@ def test_two_merges(setup, gen_data2):
         ]
         .merge(raw2, left_on=["cc1"], right_on=["cc3"])
     )
+    if PD_VERSION_GREATER_THAN_2_10:
+        expected = expected.convert_dtypes(dtype_backend="pyarrow")
     pd.testing.assert_frame_equal(r, expected)
 
     parquet_nodes = [n for n in graph._nodes if type(n.op) is DataFrameReadParquet]
@@ -426,6 +433,8 @@ def test_two_groupby_aggs_with_multi_index(setup, gen_data2):
     r = c.execute().fetch()
 
     raw = pd.read_parquet(file_path)
+    if PD_VERSION_GREATER_THAN_2_10:
+        raw = raw.convert_dtypes(dtype_backend="pyarrow")
     expected = (
         (raw * 2)
         .groupby(["c2", "c3"])
@@ -590,3 +599,42 @@ def test_setitem(setup, gen_data1):
     raw1["c5"] = raw2["c1"]
     expected = raw1.groupby(by="c1", as_index=False).sum()["c2"]
     pd.testing.assert_series_equal(r.execute().fetch(), expected)
+
+
+def test_merge_index_groupby_agg(setup, gen_data1):
+    file_path, file_path2 = gen_data1
+    left = md.read_csv(file_path)
+    right = md.read_csv(file_path2)
+    r = left.merge(right, on="c1")
+    data = r[["c1", "c2_x", "c2_y", "c4_x", "c4_y"]]
+
+    def udf(x):
+        return np.sum(x)
+
+    res = data.groupby("c1").agg({"c2_x": udf})
+
+    graph = res.build_graph()
+    optimize(graph)
+
+    agg_node = graph.result_tileables[0]
+    assert isinstance(agg_node.op, DataFrameGroupByAgg)
+
+    assert len(graph.predecessors(agg_node)) == 1
+    index_node = graph.predecessors(agg_node)[0]
+    assert type(index_node.op) is DataFrameIndex
+    assert set(index_node.op.col_names) == {"c1", "c2_x"}
+
+    index_node2 = graph.predecessors(index_node)[0]
+    assert type(index_node2.op) is DataFrameIndex
+    assert set(index_node2.op.col_names) == {"c1", "c2_x", "c2_y", "c4_x", "c4_y"}
+
+    merge_node = graph.predecessors(index_node2)[0]
+    assert type(merge_node.op) is DataFrameMerge
+
+    read_csv_node_left, read_csv_node_right = graph.predecessors(merge_node)
+    assert type(read_csv_node_left.op) is DataFrameReadCSV
+    assert type(read_csv_node_right.op) is DataFrameReadCSV
+    assert len(read_csv_node_left.op.usecols) == 3
+    assert len(read_csv_node_right.op.usecols) == 3
+    assert set(read_csv_node_left.op.usecols) == {"c1", "c2", "c4"}
+    assert set(read_csv_node_right.op.usecols) == {"c1", "c2", "c4"}
diff --git a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_input_column_selector.py b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_input_column_selector.py
index e351e339e..82cf899e8 100644
--- a/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_input_column_selector.py
+++ b/python/xorbits/_mars/optimization/logical/tileable/column_pruning/tests/test_input_column_selector.py
@@ -116,6 +116,18 @@ def test_df_groupby_agg():
     assert labels.data in input_columns
     assert input_columns[labels.data] == {"label"}
 
+    label1 = Series([1, 1, 1, 1], name="label1")
+    label2 = Series([2, 2, 3, 3], name="label2")
+    s = df.groupby(by=[label1, label2]).sum()
+    input_columns = InputColumnSelector.select(s.data, {"foo"})
+    assert len(input_columns) == 3
+    assert df.data in input_columns
+    assert input_columns[df.data] == {"foo"}
+    assert label1.data in input_columns
+    assert input_columns[label1.data] == {"label1"}
+    assert label2.data in input_columns
+    assert input_columns[label2.data] == {"label2"}
+
 
 @pytest.mark.skip(reason="group by index is not supported yet")
 def test_df_groupby_index_agg():
diff --git a/python/xorbits/_mars/optimization/logical/tileable/tests/test_head.py b/python/xorbits/_mars/optimization/logical/tileable/tests/test_head.py
index 0e0897c24..f43932ac9 100644
--- a/python/xorbits/_mars/optimization/logical/tileable/tests/test_head.py
+++ b/python/xorbits/_mars/optimization/logical/tileable/tests/test_head.py
@@ -23,6 +23,7 @@
 from ..... import dataframe as md
 from .....core import TileableGraph, TileableGraphBuilder, enter_mode
 from .....dataframe.indexing.iloc import DataFrameIlocGetItem, SeriesIlocGetItem
+from .....dataframe.utils import PD_VERSION_GREATER_THAN_2_10
 from .. import optimize
 
 
@@ -129,6 +130,8 @@ def test_read_parquet_head(prepare_data, setup):
         extra_config={"operand_executors": _iloc_operand_executors}
     ).fetch()
     expected = pdf.head(5)
+    if PD_VERSION_GREATER_THAN_2_10:
+        expected = expected.convert_dtypes(dtype_backend="pyarrow")
     pd.testing.assert_frame_equal(result, expected)
 
 
diff --git a/python/xorbits/_mars/services/scheduling/api/oscar.py b/python/xorbits/_mars/services/scheduling/api/oscar.py
index cb9541e35..9e2e7cac1 100644
--- a/python/xorbits/_mars/services/scheduling/api/oscar.py
+++ b/python/xorbits/_mars/services/scheduling/api/oscar.py
@@ -174,11 +174,18 @@ async def create(cls: Type[APIType], session_id: str, address: str) -> APIType:
 
         from .... import resource as mars_resource
         from ..worker import (
+            StageMonitorActor,
             SubtaskExecutionActor,
             WorkerQuotaManagerActor,
             WorkerSlotManagerActor,
         )
 
+        await mo.create_actor(
+            StageMonitorActor,
+            uid=StageMonitorActor.default_uid(),
+            address=address,
+        )
+
         await mo.create_actor(
             SubtaskExecutionActor,
             subtask_max_retries=0,
diff --git a/python/xorbits/_mars/services/scheduling/worker/__init__.py b/python/xorbits/_mars/services/scheduling/worker/__init__.py
index f43167c94..ea0785d2e 100644
--- a/python/xorbits/_mars/services/scheduling/worker/__init__.py
+++ b/python/xorbits/_mars/services/scheduling/worker/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .execution import SubtaskExecutionActor
+from .execution import StageMonitorActor, SubtaskExecutionActor
 from .quota import MemQuotaActor, QuotaActor, WorkerQuotaManagerActor
 from .service import SchedulingWorkerService
 from .workerslot import (
diff --git a/python/xorbits/_mars/services/scheduling/worker/execution.py b/python/xorbits/_mars/services/scheduling/worker/execution.py
index dfeceb80b..b90da08fc 100644
--- a/python/xorbits/_mars/services/scheduling/worker/execution.py
+++ b/python/xorbits/_mars/services/scheduling/worker/execution.py
@@ -19,9 +19,10 @@
 import operator
 import pprint
 import sys
+import time
 from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 
 import xoscar as mo
 from xoscar.errors import ServerClosed, XoscarError
@@ -37,13 +38,93 @@
 from ...cluster import ClusterAPI
 from ...meta import MetaAPI
 from ...storage import StorageAPI
-from ...subtask import Subtask, SubtaskAPI, SubtaskResult, SubtaskStatus
+from ...subtask import Subtask, SubtaskAPI, SubtaskResult, SubtaskStage, SubtaskStatus
 from ...task.task_info_collector import TaskInfoCollector
 from .quota import QuotaActor
 from .workerslot import BandSlotManagerActor
 
 logger = logging.getLogger(__name__)
 
+
+class StageMonitorActor(mo.Actor):
+    def __init__(
+        self,
+        monitoring_config: Dict = {},
+    ):
+        self._records = dict()
+
+        self._enable_check = monitoring_config.get("enable_check", False)
+        self._refresh_time = monitoring_config.get("refresh_time", 3)
+        self._kill_timeout = {
+            SubtaskStage.PREPARE_DATA: monitoring_config.get("prepare_data_timeout"),
+            SubtaskStage.REQUEST_QUOTA: monitoring_config.get("request_quota_timeout"),
+            SubtaskStage.ACQUIRE_SLOT: monitoring_config.get("acquire_slot_timeout"),
+            SubtaskStage.EXECUTE: monitoring_config.get("execution_timeout"),
+            SubtaskStage.RELEASE_SLOT: monitoring_config.get("release_slot_timeout"),
+            SubtaskStage.FINISH: monitoring_config.get("finish_timeout"),
+        }
+        self._check_task = None
+
+    async def __post_create__(self):
+        await super().__post_create__()
+        if self._enable_check:
+            self._check_task = self.ref().check_subtasks.tell_delay(
+                delay=self._refresh_time
+            )
+
+    async def __pre_destroy__(self):
+        if self._enable_check:
+            self._check_task.cancel()
+        await super().__pre_destroy__()
+
+    async def check_subtasks(self):
+        stale_tasks = await self.get_all_stale_tasks()
+        for task_key, stage in stale_tasks:
+            session_id, subtask_id = task_key
+            try:
+                logger.warning(
+                    "Subtask[session_id: %s, subtask_id: %s] is timeout at stage %s",
+                    session_id,
+                    subtask_id,
+                    stage,
+                )
+            except Exception as e:
+                logger.error(e)
+
+        self._check_task = self.ref().check_subtasks.tell_delay(
+            delay=self._refresh_time
+        )
+
+    async def get_all_stale_tasks(self):
+        cur_timestamp = time.time()
+        stale_tasks = []
+        for k, v in self._records.items():
+            pre_timestamp, cur_stage = v["history"][-1][0], v["history"][-1][1]
+            if (
+                self._kill_timeout[cur_stage] is not None
+                and cur_timestamp - pre_timestamp >= self._kill_timeout[cur_stage]
+            ):
+                stale_tasks.append((k, cur_stage))
+        return stale_tasks
+
+    async def register_subtask(self, subtask: Subtask, supervisor_address: str):
+        keys = (subtask.session_id, subtask.subtask_id)
+        self._records[keys] = {
+            "subtask": subtask,
+            "history": [],
+            "supervisor_address": supervisor_address,
+        }
+
+    async def report_stage(self, keys: Tuple[str, str], stage: SubtaskStage):
+        if stage == SubtaskStage.FINISH:
+            self._records.pop(keys)
+            return
+        self._records[keys]["history"].append((time.time(), stage))
+
+    async def get_records(self):
+        return self._records
+
+
 # the default times to run subtask.
 DEFAULT_SUBTASK_MAX_RETRIES = 0
 
@@ -168,9 +249,16 @@ def __init__(
             "The count of finished subtasks of the current band.",
             ("band",),
         )
+        self._stat_monitor_ref = None
 
     async def __post_create__(self):
         self._cluster_api = await ClusterAPI.create(self.address)
+        self._stat_monitor_ref = await mo.actor_ref(
+            uid=StageMonitorActor.default_uid(), address=self.address
+        )
+
+    async def _get_stat_monitor_ref(self) -> mo.ActorRefType[StageMonitorActor]:
+        return await mo.actor_ref(StageMonitorActor.default_uid(), address=self.address)
 
     @alru_cache(cache_exceptions=False)
     async def _get_slot_manager_ref(
@@ -366,6 +454,9 @@ async def internal_run_subtask(self, subtask: Subtask, band_name: str):
         )
         try:
             logger.debug("Preparing data for subtask %s", subtask.subtask_id)
+            await self._stat_monitor_ref.report_stage(
+                (subtask.session_id, subtask.subtask_id), SubtaskStage.PREPARE_DATA
+            )
             with Timer() as timer:
                 prepare_data_task = asyncio.create_task(
                     _retry_run(
@@ -376,6 +467,7 @@ async def internal_run_subtask(self, subtask: Subtask, band_name: str):
                         band_name,
                     )
                 )
+
                 await asyncio.wait_for(
                     prepare_data_task, timeout=self._data_prepare_timeout
                 )
@@ -429,19 +521,28 @@ async def _run_subtask_once():
             aiotask = None
             slot_id = None
             try:
+                await self._stat_monitor_ref.report_stage(
+                    (subtask.session_id, subtask.subtask_id), SubtaskStage.REQUEST_QUOTA
+                )
                 await quota_ref.request_batch_quota(batch_quota_req)
                 self._check_cancelling(subtask_info)
-
+                await self._stat_monitor_ref.report_stage(
+                    (subtask.session_id, subtask.subtask_id), SubtaskStage.ACQUIRE_SLOT
+                )
                 slot_id = await slot_manager_ref.acquire_free_slot(
                     (subtask.session_id, subtask.subtask_id)
                 )
                 subtask_info.slot_id = slot_id
                 self._check_cancelling(subtask_info)
 
+                await self._stat_monitor_ref.report_stage(
+                    (subtask.session_id, subtask.subtask_id), SubtaskStage.EXECUTE
+                )
                 subtask_info.result.status = SubtaskStatus.running
                 aiotask = asyncio.create_task(
                     subtask_api.run_subtask_in_slot(band_name, slot_id, subtask)
                 )
+
                 return await asyncio.shield(aiotask)
             except asyncio.CancelledError as ex:
                 try:
@@ -502,6 +603,10 @@ async def _run_subtask_once():
                     await slot_manager_ref.release_free_slot(
                         slot_id, (subtask.session_id, subtask.subtask_id)
                     )
+                    await self._stat_monitor_ref.report_stage(
+                        (subtask.session_id, subtask.subtask_id),
+                        SubtaskStage.RELEASE_SLOT,
+                    )
                     logger.debug(
                         "Released slot %d for subtask %s", slot_id, subtask.subtask_id
                     )
@@ -541,6 +646,9 @@ async def run_subtask(
         logger.debug(
             "Start to schedule subtask %s on %s.", subtask.subtask_id, self.address
         )
+
+        await self._stat_monitor_ref.register_subtask(subtask, supervisor_address)
+
         self._submitted_subtask_count.record(1, {"band": self.address})
         with mo.debug.no_message_trace():
             task = asyncio.create_task(
@@ -564,6 +672,9 @@ async def run_subtask(
         self._subtask_info.pop(subtask.subtask_id, None)
         self._finished_subtask_count.record(1, {"band": self.address})
         logger.debug("Subtask %s finished with result %s", subtask.subtask_id, result)
+        await self._stat_monitor_ref.report_stage(
+            (subtask.session_id, subtask.subtask_id), SubtaskStage.FINISH
+        )
         return result
 
     async def cancel_subtask(self, subtask_id: str, kill_timeout: Optional[int] = 5):
diff --git a/python/xorbits/_mars/services/scheduling/worker/quota.py b/python/xorbits/_mars/services/scheduling/worker/quota.py
index 430afe133..c6ab7853a 100644
--- a/python/xorbits/_mars/services/scheduling/worker/quota.py
+++ b/python/xorbits/_mars/services/scheduling/worker/quota.py
@@ -307,9 +307,15 @@ def __init__(
 
         self._stat_refresh_task = None
         self._slot_manager_ref = None
+        self._stat_monitor_ref = None
 
     async def __post_create__(self):
         await super().__post_create__()
+        from .execution import StageMonitorActor
+
+        self._stat_monitor_ref = await mo.actor_ref(
+            uid=StageMonitorActor.default_uid(), address=self.address
+        )
         self._stat_refresh_task = self.ref().update_mem_stats.tell_delay(
             delay=self._refresh_time
         )
@@ -332,7 +338,7 @@ async def update_mem_stats(self):
         """
         cur_mem_available = mars_resource.virtual_memory().available
         if cur_mem_available > self._last_memory_available:
-            # memory usage reduced: try reallocate existing requests
+            # memory usage reduced: try to reallocate existing requests
             await self._process_requests()
         self._last_memory_available = cur_mem_available
         self._report_quota_info()
diff --git a/python/xorbits/_mars/services/scheduling/worker/service.py b/python/xorbits/_mars/services/scheduling/worker/service.py
index a5fad5cc1..0d12fab05 100644
--- a/python/xorbits/_mars/services/scheduling/worker/service.py
+++ b/python/xorbits/_mars/services/scheduling/worker/service.py
@@ -17,7 +17,11 @@
 
 from ....utils import calc_size_by_str
 from ...core import AbstractService
-from .execution import DEFAULT_SUBTASK_MAX_RETRIES, SubtaskExecutionActor
+from .execution import (
+    DEFAULT_SUBTASK_MAX_RETRIES,
+    StageMonitorActor,
+    SubtaskExecutionActor,
+)
 from .quota import WorkerQuotaManagerActor
 from .workerslot import WorkerSlotManagerActor
 
@@ -58,6 +62,12 @@ async def start(self):
         )
         data_prepare_timeout = scheduling_config.get("data_prepare_timeout", 600)
 
+        await mo.create_actor(
+            StageMonitorActor,
+            monitoring_config=scheduling_config.get("stage_monitor", {}),
+            uid=StageMonitorActor.default_uid(),
+            address=address,
+        )
         await mo.create_actor(
             WorkerSlotManagerActor,
             uid=WorkerSlotManagerActor.default_uid(),
@@ -100,3 +110,6 @@ async def stop(self):
                 uid=WorkerSlotManagerActor.default_uid(), address=address
             )
         )
+        await mo.destroy_actor(
+            mo.create_actor_ref(uid=StageMonitorActor.default_uid(), address=address)
+        )
diff --git a/python/xorbits/_mars/services/scheduling/worker/tests/test_execution.py b/python/xorbits/_mars/services/scheduling/worker/tests/test_execution.py
index 53f05f2ce..e174598c7 100644
--- a/python/xorbits/_mars/services/scheduling/worker/tests/test_execution.py
+++ b/python/xorbits/_mars/services/scheduling/worker/tests/test_execution.py
@@ -54,7 +54,12 @@
 from ....task.supervisor.manager import TaskManagerActor
 from ....task.task_info_collector import TaskInfoCollectorActor
 from ...supervisor import GlobalResourceManagerActor
-from ...worker import BandSlotManagerActor, QuotaActor, SubtaskExecutionActor
+from ...worker import (
+    BandSlotManagerActor,
+    QuotaActor,
+    StageMonitorActor,
+    SubtaskExecutionActor,
+)
 
 
 class CancelDetectActorMixin:
@@ -158,7 +163,7 @@ def collect_task_info_enabled(self):
 
 @pytest.fixture
 async def actor_pool(request):
-    n_slots, enable_kill = request.param
+    n_slots, enable_kill, enable_stage_check = request.param
     pool = await create_actor_pool(
         "127.0.0.1", labels=[None] + ["numa-0"] * n_slots, n_process=n_slots
     )
@@ -181,7 +186,18 @@ async def actor_pool(request):
             pool.external_address,
             storage_handler_cls=MockStorageHandlerActor,
         )
-
+        # create monitor actor
+        monitor_ref = await mo.create_actor(
+            StageMonitorActor,
+            monitoring_config={
+                "enable_check": True,
+                "execution_timeout": 5,
+            }
+            if enable_stage_check
+            else {},
+            uid=StageMonitorActor.default_uid(),
+            address=pool.external_address,
+        )
         # create assigner actor
         execution_ref = await mo.create_actor(
             SubtaskExecutionActor,
@@ -230,6 +246,7 @@ async def actor_pool(request):
         try:
             yield pool, session_id, meta_api, worker_meta_api, storage_api, execution_ref
         finally:
+            await mo.destroy_actor(monitor_ref)
             await mo.destroy_actor(task_manager_ref)
             await mo.destroy_actor(band_slot_ref)
             await mo.destroy_actor(global_resource_ref)
@@ -242,7 +259,7 @@ async def actor_pool(request):
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("actor_pool", [(1, True)], indirect=True)
+@pytest.mark.parametrize("actor_pool", [(1, True, False)], indirect=True)
 async def test_execute_tensor(actor_pool):
     pool, session_id, meta_api, worker_meta_api, storage_api, execution_ref = actor_pool
 
@@ -323,7 +340,7 @@ async def test_execute_tensor(actor_pool):
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "actor_pool,cancel_phase",
-    [((1, True), phase) for phase in _cancel_phases],
+    [((1, True, False), phase) for phase in _cancel_phases],
     indirect=["actor_pool"],
 )
 async def test_execute_with_cancel(actor_pool, cancel_phase):
@@ -427,7 +444,7 @@ def delay_fun(delay, _inp1):
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("actor_pool", [(1, True)], indirect=True)
+@pytest.mark.parametrize("actor_pool", [(1, True, False)], indirect=True)
 async def test_execute_with_pure_deps(actor_pool):
     pool, session_id, meta_api, worker_meta_api, storage_api, execution_ref = actor_pool
 
@@ -508,7 +525,7 @@ def test_estimate_size():
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("actor_pool", [(1, False)], indirect=True)
+@pytest.mark.parametrize("actor_pool", [(1, False, False)], indirect=True)
 async def test_cancel_without_kill(actor_pool):
     pool, session_id, meta_api, worker_meta_api, storage_api, execution_ref = actor_pool
     executed_file = os.path.join(
@@ -611,3 +628,68 @@ def test_fetch_data_from_both_cpu_and_gpu(data_type, chunked, setup_gpu):
         pd.testing.assert_frame_equal(expected, actual.execute().fetch(to_cpu=True))
     else:
         pd.testing.assert_series_equal(expected, actual.execute().fetch(to_cpu=True))
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("actor_pool", [(1, True, False)], indirect=True)
+async def test_stage_monitor_actor(actor_pool):
+    pool, session_id, meta_api, worker_meta_api, storage_api, execution_ref = actor_pool
+    subtask_id = f"test_subtask_{uuid.uuid4()}"
+    subtask = Subtask(
+        subtask_id=subtask_id,
+        session_id=session_id,
+        task_id=f"test_task_{uuid.uuid4()}",
+        # chunk_graph=chunk_graph,
+    )
+
+    monitor_ref = await mo.actor_ref(
+        StageMonitorActor.default_uid(), address=pool.external_address
+    )
+    await asyncio.wait_for(
+        execution_ref.run_subtask(subtask, "numa-0", pool.external_address), timeout=30
+    )
+
+    stale_tasks = await monitor_ref.get_all_stale_tasks()
+    assert len(stale_tasks) == 0
+
+    # task has been finished
+    records = await monitor_ref.get_records()
+    assert len(records) == 0
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("actor_pool", [(1, True, True)], indirect=True)
+async def test_terminate_stale_tasks(actor_pool, caplog):
+    pool, session_id, meta_api, worker_meta_api, storage_api, execution_ref = actor_pool
+
+    def delay_fun(delay):
+        time.sleep(delay)
+        return delay
+
+    remote_result = RemoteFunction(
+        function=delay_fun, function_args=[10], function_kwargs={}
+    ).new_chunk([])
+    chunk_graph = ChunkGraph([remote_result])
+    chunk_graph.add_node(remote_result)
+
+    subtask = Subtask(
+        f"test_subtask_{uuid.uuid4()}",
+        session_id=session_id,
+        task_id=f"test_task_{uuid.uuid4()}",
+        chunk_graph=chunk_graph,
+    )
+
+    with Timer() as timer:
+        aiotask = asyncio.create_task(
+            execution_ref.run_subtask(subtask, "numa-0", pool.external_address)
+        )
+
+        r = await asyncio.wait_for(aiotask, timeout=20)
+        assert r.status == SubtaskStatus.succeeded
+
+    assert 5 < timer.duration < 20
+
+    import re
+
+    match = re.search(r"Subtask\[.*?\].*stage.*", caplog.text)
+    assert match is not None
diff --git a/python/xorbits/_mars/services/scheduling/worker/tests/test_quota.py b/python/xorbits/_mars/services/scheduling/worker/tests/test_quota.py
index 99a406f91..bd8b6c5e7 100644
--- a/python/xorbits/_mars/services/scheduling/worker/tests/test_quota.py
+++ b/python/xorbits/_mars/services/scheduling/worker/tests/test_quota.py
@@ -25,6 +25,7 @@
 from .....tests.core import mock
 from .....utils import get_next_port
 from ...worker import BandSlotManagerActor, MemQuotaActor, QuotaActor
+from .. import StageMonitorActor
 
 
 class MockBandSlotManagerActor(mo.Actor):
@@ -40,11 +41,17 @@ async def actor_pool():
     start_method = (
         os.environ.get("POOL_START_METHOD", "fork") if sys.platform != "win32" else None
     )
+    # create monitor actor
     pool = await create_actor_pool(
         f"127.0.0.1:{get_next_port()}",
         n_process=0,
         subprocess_start_method=start_method,
     )
+    await mo.create_actor(
+        StageMonitorActor,
+        uid=StageMonitorActor.default_uid(),
+        address=pool.external_address,
+    )
     await pool.start()
     try:
         yield pool
diff --git a/python/xorbits/_mars/services/subtask/__init__.py b/python/xorbits/_mars/services/subtask/__init__.py
index 0b6fda518..0f81469c1 100644
--- a/python/xorbits/_mars/services/subtask/__init__.py
+++ b/python/xorbits/_mars/services/subtask/__init__.py
@@ -14,5 +14,5 @@
 # limitations under the License.
 
 from .api import MockSubtaskAPI, SubtaskAPI
-from .core import Subtask, SubtaskGraph, SubtaskResult, SubtaskStatus
+from .core import Subtask, SubtaskGraph, SubtaskResult, SubtaskStage, SubtaskStatus
 from .errors import SlotOccupiedAlready, SubtaskNotExist
diff --git a/python/xorbits/_mars/services/subtask/core.py b/python/xorbits/_mars/services/subtask/core.py
index ac562cfbc..9823d39c0 100644
--- a/python/xorbits/_mars/services/subtask/core.py
+++ b/python/xorbits/_mars/services/subtask/core.py
@@ -36,6 +36,15 @@
 from ...typing import BandType, ChunkType
 
 
+class SubtaskStage(Enum):
+    PREPARE_DATA = 0
+    REQUEST_QUOTA = 1
+    ACQUIRE_SLOT = 2
+    EXECUTE = 3
+    RELEASE_SLOT = 4
+    FINISH = 5
+
+
 class SubtaskStatus(Enum):
     pending = 0
     running = 1
diff --git a/python/xorbits/_mars/tensor/base/tile.py b/python/xorbits/_mars/tensor/base/tile.py
index ee654ee80..326434b0a 100644
--- a/python/xorbits/_mars/tensor/base/tile.py
+++ b/python/xorbits/_mars/tensor/base/tile.py
@@ -29,7 +29,7 @@ def tile(A, reps):
     behavior, promote `A` to d-dimensions manually before calling this
     function.
 
-    If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it.
+    If ``A.ndim > d``, `reps` is promoted to `A`.ndim by prepending 1's to it.
     Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as
     (1, 1, 2, 2).
 
diff --git a/python/xorbits/_mars/tensor/core.py b/python/xorbits/_mars/tensor/core.py
index b16e3c03e..c92fe6f62 100644
--- a/python/xorbits/_mars/tensor/core.py
+++ b/python/xorbits/_mars/tensor/core.py
@@ -415,6 +415,9 @@ def imag(self, new_imag):
     def __array__(self, dtype=None):
         return np.asarray(self.to_numpy(), dtype=dtype)
 
+    def tolist(self):
+        return self.to_numpy().tolist()
+
     def __array_function__(self, func, types, args, kwargs):
         from .. import tensor as module
 
diff --git a/python/xorbits/_mars/tensor/statistics/bincount.py b/python/xorbits/_mars/tensor/statistics/bincount.py
index bb25fe03e..fcec8603a 100644
--- a/python/xorbits/_mars/tensor/statistics/bincount.py
+++ b/python/xorbits/_mars/tensor/statistics/bincount.py
@@ -286,8 +286,10 @@ def bincount(x, weights=None, minlength=0, chunk_size_limit=None):
     x = astensor(x)
     weights = astensor(weights) if weights is not None else None
 
-    if not np.issubdtype(x.dtype, np.int_):
-        raise TypeError(f"Cannot cast array data from {x.dtype} to {np.dtype(np.int_)}")
+    if not np.issubdtype(x.dtype, np.int64):
+        raise TypeError(
+            f"Cannot cast array data from {x.dtype} to {np.dtype(np.int64)}"
+        )
     if x.ndim != 1:
         raise ValueError("'x' must be 1 dimension")
     if minlength < 0:
diff --git a/python/xorbits/_mars/tensor/tests/test_core_execution.py b/python/xorbits/_mars/tensor/tests/test_core_execution.py
index 99d803925..0c59a4e81 100644
--- a/python/xorbits/_mars/tensor/tests/test_core_execution.py
+++ b/python/xorbits/_mars/tensor/tests/test_core_execution.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import numpy as np
+import pytest
 
 from .. import (
     add,
@@ -281,3 +282,10 @@ def test_flat(setup):
 
     np.testing.assert_array_equal(b.execute(), npb)
     np.testing.assert_array_equal(a.execute(), npa)
+
+
+@pytest.mark.parametrize("chunk_size", [None, 1, 4])
+def test_tolist(setup, chunk_size):
+    data = np.random.rand(10, 20)
+    a = tensor(data, chunk_size=chunk_size)
+    assert a.tolist() == data.tolist()
diff --git a/python/xorbits/_mars/tensor/utils.py b/python/xorbits/_mars/tensor/utils.py
index ba36abb5b..86bcd483d 100644
--- a/python/xorbits/_mars/tensor/utils.py
+++ b/python/xorbits/_mars/tensor/utils.py
@@ -774,7 +774,7 @@ def fetch_corner_data(tensor, session=None):
     # the tensor must have been executed,
     # thus the size could not be NaN
     if tensor.size > threshold:
-        # two edges for each exis
+        # two edges for each axis
         indices_iter = list(itertools.product(*(range(2) for _ in range(tensor.ndim))))
         corners = np.empty(shape=(2,) * tensor.ndim, dtype=object)
         shape = [0 for _ in range(tensor.ndim)]
diff --git a/python/xorbits/_mars/tests/core.py b/python/xorbits/_mars/tests/core.py
index 4e3c5ed4e..8e2decab9 100644
--- a/python/xorbits/_mars/tests/core.py
+++ b/python/xorbits/_mars/tests/core.py
@@ -320,6 +320,10 @@ def assert_dtype_consistent(expected_dtype, real_dtype):
                 expected_dtype, cate_dtypes
             ):
                 return
+            if isinstance(real_dtype, pd.ArrowDtype) or isinstance(
+                expected_dtype, pd.ArrowDtype
+            ):
+                return
             if not np.can_cast(real_dtype, expected_dtype) and not np.can_cast(
                 expected_dtype, real_dtype
             ):
diff --git a/python/xorbits/_mars/utils.py b/python/xorbits/_mars/utils.py
index 7c54ddd3d..91f00ede4 100644
--- a/python/xorbits/_mars/utils.py
+++ b/python/xorbits/_mars/utils.py
@@ -489,13 +489,15 @@ def calc_data_size(dt: Any, shape: Tuple[int] = None) -> int:
         return 0
 
     if isinstance(dt, tuple):
-        return sum(calc_data_size(c) for c in dt)
+        # int() for windows CI, otherwise may return numpy.int32 by `sum`
+        return int(sum(calc_data_size(c) for c in dt))
 
     shape = getattr(dt, "shape", None) or shape
     if isinstance(dt, (pd.DataFrame, pd.Series)):
         return estimate_pandas_size(dt)
     if hasattr(dt, "estimate_size"):
-        return dt.estimate_size()
+        # int() for windows CI, otherwise may return numpy.int32
+        return int(dt.estimate_size()) if dt.estimate_size() is not None else None
     if hasattr(dt, "nbytes"):
         return max(sys.getsizeof(dt), dt.nbytes)
     if hasattr(dt, "shape") and len(dt.shape) == 0:
diff --git a/python/xorbits/core/adapter.py b/python/xorbits/core/adapter.py
index bd3d93317..92e1dbdc5 100644
--- a/python/xorbits/core/adapter.py
+++ b/python/xorbits/core/adapter.py
@@ -495,6 +495,12 @@ def collect_cls_members(
 ) -> Dict[str, Any]:
     cls_members: Dict[str, Any] = {}
     for name, cls_member in inspect.getmembers(cls):
+        # Tileable and TileableData object may have functions that have the same names.
+        # For example, Index and IndexData both have `copy` function, but they have completely different semantics.
+        # Therefore, when the Index's `copy` method has been collected,
+        # the method of the same name on IndexData cannot be collected again.
+        if cls.__name__.endswith("Data") and name in DATA_MEMBERS[data_type]:  # type: ignore
+            continue
         if inspect.isfunction(cls_member) and not name.startswith("_"):
             cls_members[name] = wrap_mars_callable(
                 cls_member,
diff --git a/python/xorbits/deploy/docker/Dockerfile.base b/python/xorbits/deploy/docker/Dockerfile.base
index b2ba05971..adb1f6008 100644
--- a/python/xorbits/deploy/docker/Dockerfile.base
+++ b/python/xorbits/deploy/docker/Dockerfile.base
@@ -48,9 +48,11 @@ RUN /opt/conda/bin/conda install \
     jaxlib \
     uvloop \
     libnuma \
+  && pip install -U pip \
   && pip install -U \
     xoscar \
     cloudpickle \
+    azure-storage-blob>=12.18.1 \
     adlfs \
     fsspec>=2022.7.1,!=2022.8.0 \
     s3fs \
diff --git a/python/xorbits/lightgbm/tests/test_classifier.py b/python/xorbits/lightgbm/tests/test_classifier.py
index 2424665f6..47bd17b5c 100644
--- a/python/xorbits/lightgbm/tests/test_classifier.py
+++ b/python/xorbits/lightgbm/tests/test_classifier.py
@@ -156,7 +156,7 @@ def test_local_classifier_from_to_parquet(setup):
         df.iloc[:500].to_parquet(os.path.join(d, "data", "data1.parquet"))
         df.iloc[500:].to_parquet(os.path.join(d, "data", "data2.parquet"))
 
-        df = xpd.read_parquet(data_dir)
+        df = xpd.read_parquet(data_dir, use_arrow_dtype=False)
         model = lgb.LGBMClassifier()
         model.load_model(classifier)
         result = model.predict(df, run=False)
@@ -164,7 +164,12 @@ def test_local_classifier_from_to_parquet(setup):
 
         r.execute()
 
-        ret = xpd.read_parquet(result_dir).to_pandas().iloc[:, 0].to_numpy()
+        ret = (
+            xpd.read_parquet(result_dir, use_arrow_dtype=False)
+            .to_pandas()
+            .iloc[:, 0]
+            .to_numpy()
+        )
         expected = classifier.predict(X)
         expected = np.stack([1 - expected, expected]).argmax(axis=0)
         np.testing.assert_array_equal(ret, expected)
diff --git a/python/xorbits/numpy/numpy_adapters/tests/test_numpy_adapters.py b/python/xorbits/numpy/numpy_adapters/tests/test_numpy_adapters.py
index c03f4ded0..5b62ec55c 100644
--- a/python/xorbits/numpy/numpy_adapters/tests/test_numpy_adapters.py
+++ b/python/xorbits/numpy/numpy_adapters/tests/test_numpy_adapters.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import numpy as np
+import pandas as pd
 import pytest
 
 from .... import numpy as xnp
+from .... import pandas as xpd
 
 
 @pytest.mark.parametrize(
@@ -145,20 +147,6 @@ def test_tensorinv_fallback(setup):
         assert np.equal(xnp_output.all(), np_output.all())
 
 
-def test_ndarray_fallback(setup):
-    with pytest.warns(Warning) as w:
-        a = np.array([1, 2, 3])
-        b = xnp.array([1, 2, 3])
-        xnp_output = b.tolist().fetch()
-        np_output = a.tolist()
-
-        assert f"Tensor.tolist will fallback to Numpy" == str(w[0].message)
-        assert isinstance(xnp_output, list)
-        for i in range(0, len(b)):
-            assert np_output[i] == xnp_output[i]
-            assert xnp_output[i] == i + 1
-
-
 def test_busday_offset(setup):
     with pytest.warns(Warning) as w:
         xnp_output = xnp.busday_offset("2011-10", 0, roll="forward").execute().fetch()
@@ -216,3 +204,17 @@ def test_docstring():
     assert docstring is not None and docstring.endswith(
         "This docstring was copied from numpy.ndarray."
     )
+
+
+def test_tensor_tolist(setup):
+    data = np.random.rand(15, 25)
+    tensor = xnp.array(data)
+    assert data.tolist() == tensor.tolist()
+
+    expected = pd.unique(pd.Series([i for i in range(100)])).tolist()
+    result = xpd.unique(xpd.Series([i for i in range(100)])).tolist()
+    assert expected == result
+
+    data = np.array([1, 2, 3, 4])
+    tensor = xnp.array([1, 2, 3, 4])
+    assert data.tolist() == tensor.tolist()
diff --git a/python/xorbits/pandas/pandas_adapters/tests/test_pandas_adapters.py b/python/xorbits/pandas/pandas_adapters/tests/test_pandas_adapters.py
index cd2ce21f8..14f2f5595 100644
--- a/python/xorbits/pandas/pandas_adapters/tests/test_pandas_adapters.py
+++ b/python/xorbits/pandas/pandas_adapters/tests/test_pandas_adapters.py
@@ -22,6 +22,7 @@
 
 from .... import pandas as xpd
 from ....core.data import DataRef
+from ....core.execution import need_to_execute
 
 
 def test_pandas_dataframe_methods(setup):
@@ -499,3 +500,36 @@ def test_read_pickle(setup):
             assert (x == y).all()
     finally:
         shutil.rmtree(tempdir)
+
+
+def test_copy(setup):
+    index = xpd.Index([i for i in range(100)], name="test")
+    index_iloc = index[:20]
+    assert need_to_execute(index_iloc) is True
+    repr(index_iloc)
+
+    index_copy = index_iloc.copy()
+    assert need_to_execute(index_copy) is False
+    pd.testing.assert_index_equal(index_copy.to_pandas(), index_iloc.to_pandas())
+
+    index_copy = index_iloc.copy(name="abc")
+    assert need_to_execute(index_copy) is True
+    pd.testing.assert_index_equal(
+        index_copy.to_pandas(), index_iloc.to_pandas().copy(name="abc")
+    )
+
+    series = xpd.Series([1, 2, 3, 4, np.nan, 6])
+    series = series + 1
+    assert need_to_execute(series) is True
+    repr(series)
+
+    sc = series.copy()
+    assert need_to_execute(sc) is False
+    expected = series.to_pandas()
+    pd.testing.assert_series_equal(sc.to_pandas(), expected)
+
+    sc[0] = np.nan
+    assert need_to_execute(sc) is True
+    ec = expected.copy()
+    ec[0] = np.nan
+    pd.testing.assert_series_equal(sc.to_pandas(), ec)
diff --git a/python/xorbits/sklearn/__init__.py b/python/xorbits/sklearn/__init__.py
new file mode 100644
index 000000000..9c9f2b596
--- /dev/null
+++ b/python/xorbits/sklearn/__init__.py
@@ -0,0 +1,31 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+__all__ = [
+    "cluster",
+    "datasets",
+    "decomposition",
+    "ensemble",
+    "linear_model",
+    "metrics",
+    "model_selection",
+    "neighbors",
+    "preprocessing",
+    "semi_supervised",
+]
diff --git a/python/xorbits/sklearn/cluster/__init__.py b/python/xorbits/sklearn/cluster/__init__.py
new file mode 100644
index 000000000..da13e6baa
--- /dev/null
+++ b/python/xorbits/sklearn/cluster/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_CLUSTER_CALLABLES
+
+    return list(MARS_SKLEARN_CLUSTER_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.cluster as sk_cluster
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_CLUSTER_CALLABLES
+
+    if name in MARS_SKLEARN_CLUSTER_CALLABLES:
+        return MARS_SKLEARN_CLUSTER_CALLABLES[name]
+    else:
+        if not hasattr(sk_cluster, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_cluster, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/cluster/mars_adapters/__init__.py b/python/xorbits/sklearn/cluster/mars_adapters/__init__.py
new file mode 100644
index 000000000..e9aabb1cc
--- /dev/null
+++ b/python/xorbits/sklearn/cluster/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_CLUSTER_CALLABLES
diff --git a/python/xorbits/sklearn/cluster/mars_adapters/core.py b/python/xorbits/sklearn/cluster/mars_adapters/core.py
new file mode 100644
index 000000000..cdd3f302e
--- /dev/null
+++ b/python/xorbits/sklearn/cluster/mars_adapters/core.py
@@ -0,0 +1,35 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.cluster as sk_cluster
+
+from ...._mars.learn import cluster as mars_cluster
+from ...._mars.learn.cluster import KMeans as MarsKMeans
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class KMeans(SKLearnBase):
+    _marscls = MarsKMeans
+
+
+SKLEARN_CLUSTER_CLS_MAP = {KMeans: MarsKMeans}
+
+MARS_SKLEARN_CLUSTER_CALLABLES = _collect_module_callables(
+    mars_cluster, sk_cluster, skip_members=["register_op"]
+)
+_install_cls_members(
+    SKLEARN_CLUSTER_CLS_MAP, MARS_SKLEARN_CLUSTER_CALLABLES, sk_cluster
+)
+attach_module_callable_docstring(KMeans, sk_cluster, sk_cluster.KMeans)
diff --git a/python/xorbits/sklearn/cluster/tests/__init__.py b/python/xorbits/sklearn/cluster/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/cluster/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/cluster/tests/test_core.py b/python/xorbits/sklearn/cluster/tests/test_core.py
new file mode 100644
index 000000000..c2a10ec61
--- /dev/null
+++ b/python/xorbits/sklearn/cluster/tests/test_core.py
@@ -0,0 +1,57 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import numpy as np
+import pytest
+
+from .... import numpy as xnp
+from .. import KMeans
+
+n_rows = 1000
+n_clusters = 8
+n_columns = 10
+chunk_size = 200
+rs = xnp.random.RandomState(0)
+X = rs.rand(n_rows, n_columns, chunk_size=chunk_size)
+X_new = rs.rand(n_rows, n_columns, chunk_size=chunk_size)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = KMeans.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.cluster."
+    )
+
+    docstring = KMeans.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.cluster._kmeans.KMeans."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="sci-kit-learn not installed")
+def test_kmeans_cluster():
+    kms = KMeans(n_clusters=n_clusters, random_state=0)
+    kms.fit(X)
+    predict = kms.predict(X_new).fetch()
+
+    assert kms.n_clusters == n_clusters
+    assert np.shape(kms.labels_.fetch()) == (n_rows,)
+    assert np.shape(kms.cluster_centers_.fetch()) == (n_clusters, n_columns)
+    assert np.shape(predict) == (n_rows,)
diff --git a/python/xorbits/sklearn/datasets/__init__.py b/python/xorbits/sklearn/datasets/__init__.py
new file mode 100644
index 000000000..accf8fbcb
--- /dev/null
+++ b/python/xorbits/sklearn/datasets/__init__.py
@@ -0,0 +1,48 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_DATASETS_CALLABLES
+
+    return list(MARS_SKLEARN_DATASETS_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.datasets as sk_datasets
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_DATASETS_CALLABLES
+
+    if name in MARS_SKLEARN_DATASETS_CALLABLES:
+        return MARS_SKLEARN_DATASETS_CALLABLES[name]
+    else:
+        if not hasattr(sk_datasets, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_datasets, name)):
+                raise NotImplementedError(f"This function is not implemented yet.")
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/datasets/mars_adapters/__init__.py b/python/xorbits/sklearn/datasets/mars_adapters/__init__.py
new file mode 100644
index 000000000..050a5f86b
--- /dev/null
+++ b/python/xorbits/sklearn/datasets/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_DATASETS_CALLABLES
diff --git a/python/xorbits/sklearn/datasets/mars_adapters/core.py b/python/xorbits/sklearn/datasets/mars_adapters/core.py
new file mode 100644
index 000000000..a312031b5
--- /dev/null
+++ b/python/xorbits/sklearn/datasets/mars_adapters/core.py
@@ -0,0 +1,22 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.datasets as sk_datasets
+
+from ...._mars.learn import datasets as mars_datasets
+from ...utils import _collect_module_callables
+
+MARS_SKLEARN_DATASETS_CALLABLES = _collect_module_callables(
+    mars_datasets, sk_datasets, skip_members=["register_op"]
+)
diff --git a/python/xorbits/sklearn/datasets/tests/__init__.py b/python/xorbits/sklearn/datasets/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/datasets/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/datasets/tests/test_core.py b/python/xorbits/sklearn/datasets/tests/test_core.py
new file mode 100644
index 000000000..a7a06f71b
--- /dev/null
+++ b/python/xorbits/sklearn/datasets/tests/test_core.py
@@ -0,0 +1,131 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import pytest
+
+import xorbits.numpy as np
+
+from ... import datasets
+from ...datasets import (
+    make_blobs,
+    make_classification,
+    make_low_rank_matrix,
+    make_regression,
+)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = datasets.make_blobs.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.datasets."
+    )
+
+    docstring = datasets.make_classification.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.datasets."
+    )
+
+    docstring = datasets.make_low_rank_matrix.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.datasets."
+    )
+
+    docstring = datasets.make_regression.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.datasets."
+    )
+
+
+def test_make_classification():
+    weights = [0.1, 0.25]
+    X, y = make_classification(
+        n_samples=100,
+        n_features=20,
+        n_informative=5,
+        n_redundant=1,
+        n_repeated=1,
+        n_classes=3,
+        n_clusters_per_class=1,
+        hypercube=False,
+        shift=None,
+        scale=None,
+        weights=weights,
+        random_state=0,
+        flip_y=-1,
+    )
+    X, y = X.execute().fetch(), y.execute().fetch()
+    assert X.shape == (100, 20)
+    assert y.shape == (100,)
+    assert np.unique(y).shape == (3,)
+    assert (y == 0).sum() == 10
+    assert (y == 1).sum() == 25
+    assert (y == 2).sum() == 65
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_make_regression():
+    X, y, c = make_regression(
+        n_samples=100,
+        n_features=10,
+        n_informative=3,
+        effective_rank=5,
+        coef=True,
+        bias=0.0,
+        noise=1.0,
+        random_state=0,
+    )
+    X, y, c = X.execute().fetch(), y.execute().fetch(), c.execute().fetch()
+    assert X.shape == (100, 10), "X shape mismatch"
+    assert y.shape == (100,), "y shape mismatch"
+    assert c.shape == (10,), "coef shape mismatch"
+    assert sum(c != 0.0) == 3, "Unexpected number of informative features"
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_make_blobs():
+    cluster_stds = np.array([0.05, 0.2, 0.4])
+    cluster_centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
+    X, y = make_blobs(
+        random_state=0,
+        n_samples=50,
+        n_features=2,
+        centers=cluster_centers,
+        cluster_std=cluster_stds,
+    )
+    X, y = X.execute().fetch(), y.execute().fetch()
+    assert X.shape == (50, 2)
+    assert y.shape == (50,)
+    assert np.unique(y).shape == (3,)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_make_low_rank_matrix():
+    X = make_low_rank_matrix(
+        n_samples=50,
+        n_features=25,
+        effective_rank=5,
+        tail_strength=0.01,
+        random_state=0,
+    )
+    X = X.execute().fetch()
+    assert X.shape == (50, 25)
+    _, s, _ = np.linalg.svd(X)
+    s = s.execute().fetch()
+    assert (s.sum() - 5) < 0.1
diff --git a/python/xorbits/sklearn/decomposition/__init__.py b/python/xorbits/sklearn/decomposition/__init__.py
new file mode 100644
index 000000000..a54d6b392
--- /dev/null
+++ b/python/xorbits/sklearn/decomposition/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_DECOMP_CALLABLES
+
+    return list(MARS_SKLEARN_DECOMP_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.decomposition as sk_decomp
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_DECOMP_CALLABLES
+
+    if name in MARS_SKLEARN_DECOMP_CALLABLES:
+        return MARS_SKLEARN_DECOMP_CALLABLES[name]
+    else:
+        if not hasattr(sk_decomp, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_decomp, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/decomposition/mars_adapters/__init__.py b/python/xorbits/sklearn/decomposition/mars_adapters/__init__.py
new file mode 100644
index 000000000..8c9727dc3
--- /dev/null
+++ b/python/xorbits/sklearn/decomposition/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_DECOMP_CALLABLES
diff --git a/python/xorbits/sklearn/decomposition/mars_adapters/core.py b/python/xorbits/sklearn/decomposition/mars_adapters/core.py
new file mode 100644
index 000000000..49f3242ed
--- /dev/null
+++ b/python/xorbits/sklearn/decomposition/mars_adapters/core.py
@@ -0,0 +1,43 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.decomposition as sk_decomposition
+
+from ...._mars.learn import decomposition as mars_decomposition
+from ...._mars.learn.decomposition import PCA as MarsPCA
+from ...._mars.learn.decomposition import TruncatedSVD as MarsTruncatedSVD
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class PCA(SKLearnBase):
+    _marscls = MarsPCA
+
+
+class TruncatedSVD(SKLearnBase):
+    _marscls = MarsTruncatedSVD
+
+
+SKLEARN_DECOMP_CLS_MAP = {PCA: MarsPCA, TruncatedSVD: MarsTruncatedSVD}
+
+MARS_SKLEARN_DECOMP_CALLABLES = _collect_module_callables(
+    mars_decomposition, sk_decomposition, skip_members=["register_op"]
+)
+_install_cls_members(
+    SKLEARN_DECOMP_CLS_MAP, MARS_SKLEARN_DECOMP_CALLABLES, sk_decomposition
+)
+attach_module_callable_docstring(PCA, sk_decomposition, sk_decomposition.PCA)
+attach_module_callable_docstring(
+    TruncatedSVD, sk_decomposition, sk_decomposition.TruncatedSVD
+)
diff --git a/python/xorbits/sklearn/decomposition/tests/__init__.py b/python/xorbits/sklearn/decomposition/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/decomposition/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/decomposition/tests/test_core.py b/python/xorbits/sklearn/decomposition/tests/test_core.py
new file mode 100644
index 000000000..361b7cd3d
--- /dev/null
+++ b/python/xorbits/sklearn/decomposition/tests/test_core.py
@@ -0,0 +1,87 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_array_almost_equal, assert_equal
+from sklearn import datasets
+from sklearn.utils import check_random_state
+
+from .. import PCA, TruncatedSVD
+
+iris = np.asarray(datasets.load_iris().data)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = PCA.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.decomposition."
+    )
+
+    docstring = PCA.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.decomposition._pca.PCA."
+    )
+
+    docstring = TruncatedSVD.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.decomposition."
+    )
+
+    docstring = TruncatedSVD.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.decomposition._truncated_svd.TruncatedSVD."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_pca():
+    X = iris
+
+    for n_comp in np.arange(X.shape[1]):
+        pca = PCA(n_components=n_comp, svd_solver="full")
+        pca.fit(X)
+        X_r = pca.transform(X).fetch()
+        assert_equal(X_r.shape[1], n_comp)
+
+        X_r2 = pca.fit_transform(X).fetch()
+        assert_array_almost_equal(X_r, X_r2)
+
+        X_r = pca.transform(X).fetch()
+        X_r2 = pca.fit_transform(X).fetch()
+        assert_array_almost_equal(X_r, X_r2)
+
+        # Test get_covariance and get_precision
+        cov = pca.get_covariance()
+        precision = pca.get_precision()
+        assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_truncated_svd():
+    shape = 60, 55
+    n_samples, n_features = shape
+    rng = check_random_state(42)
+    X = rng.randint(-100, 20, np.product(shape)).reshape(shape)
+    X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64)
+    for n_components in (10, 25, 41):
+        tsvd = TruncatedSVD(n_components).fit(X)
+        assert tsvd.n_components == n_components
+        assert tsvd.components_.shape == (n_components, n_features)
diff --git a/python/xorbits/sklearn/ensemble/__init__.py b/python/xorbits/sklearn/ensemble/__init__.py
new file mode 100644
index 000000000..92ad66397
--- /dev/null
+++ b/python/xorbits/sklearn/ensemble/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_EN_CALLABLES
+
+    return list(MARS_SKLEARN_EN_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.ensemble as sk_en
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_EN_CALLABLES
+
+    if name in MARS_SKLEARN_EN_CALLABLES:
+        return MARS_SKLEARN_EN_CALLABLES[name]
+    else:
+        if not hasattr(sk_en, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_en, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/ensemble/mars_adapters/__init__.py b/python/xorbits/sklearn/ensemble/mars_adapters/__init__.py
new file mode 100644
index 000000000..8b02cbb39
--- /dev/null
+++ b/python/xorbits/sklearn/ensemble/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_EN_CALLABLES
diff --git a/python/xorbits/sklearn/ensemble/mars_adapters/core.py b/python/xorbits/sklearn/ensemble/mars_adapters/core.py
new file mode 100644
index 000000000..d7025e5a3
--- /dev/null
+++ b/python/xorbits/sklearn/ensemble/mars_adapters/core.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.ensemble as sk_en
+
+from ...._mars.learn import ensemble as mars_en
+from ...._mars.learn.ensemble import BaggingClassifier as MarsBaggingClassifier
+from ...._mars.learn.ensemble import BaggingRegressor as MarsBaggingRegressor
+from ...._mars.learn.ensemble import IsolationForest as MarsIsolationForest
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class BaggingClassifier(SKLearnBase):
+    _marscls = MarsBaggingClassifier
+
+
+class BaggingRegressor(SKLearnBase):
+    _marscls = MarsBaggingRegressor
+
+
+class IsolationForest(SKLearnBase):
+    _marscls = MarsIsolationForest
+
+
+SKLEARN_EN_CLS_MAP = {
+    BaggingClassifier: MarsBaggingClassifier,
+    IsolationForest: MarsIsolationForest,
+    BaggingRegressor: MarsBaggingRegressor,
+}
+
+MARS_SKLEARN_EN_CALLABLES = _collect_module_callables(
+    mars_en, sk_en, skip_members=["register_op"]
+)
+_install_cls_members(SKLEARN_EN_CLS_MAP, MARS_SKLEARN_EN_CALLABLES, sk_en)
+attach_module_callable_docstring(BaggingClassifier, sk_en, sk_en.BaggingClassifier)
+attach_module_callable_docstring(BaggingRegressor, sk_en, sk_en.BaggingRegressor)
+attach_module_callable_docstring(IsolationForest, sk_en, sk_en.IsolationForest)
diff --git a/python/xorbits/sklearn/ensemble/tests/__init__.py b/python/xorbits/sklearn/ensemble/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/ensemble/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/ensemble/tests/test_core.py b/python/xorbits/sklearn/ensemble/tests/test_core.py
new file mode 100644
index 000000000..07cdfc537
--- /dev/null
+++ b/python/xorbits/sklearn/ensemble/tests/test_core.py
@@ -0,0 +1,124 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression
+from sklearn.svm import SVC
+
+from ...datasets import make_classification, make_regression
+from ...ensemble import BaggingClassifier, BaggingRegressor, IsolationForest
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = BaggingClassifier.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.ensemble."
+    )
+
+    docstring = BaggingRegressor.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.ensemble."
+    )
+
+    docstring = IsolationForest.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.ensemble."
+    )
+
+    docstring = BaggingClassifier.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.ensemble._bagging.BaggingClassifier."
+    )
+
+    docstring = BaggingRegressor.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.ensemble._bagging.BaggingRegressor."
+    )
+
+    docstring = IsolationForest.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.ensemble._iforest.IsolationForest."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_baggingclassifier():
+    rs = np.random.RandomState(0)
+
+    raw_x, raw_y = make_classification(
+        n_samples=100,
+        n_features=4,
+        n_informative=2,
+        n_redundant=0,
+        random_state=rs,
+        shuffle=False,
+    )
+
+    clf = BaggingClassifier(
+        base_estimator=SVC(),
+        n_estimators=10,
+        max_samples=10,
+        max_features=1,
+        random_state=rs,
+        warm_start=True,
+    )
+
+    clf.fit(raw_x, raw_y)
+    log_proba = clf.predict_log_proba(raw_x)
+    log_proba = log_proba.fetch()
+    exp_log_proba_array = np.exp(log_proba)
+    assert clf.n_estimators == 10
+    assert np.all((exp_log_proba_array >= 0) & (exp_log_proba_array <= 1))
+    assert np.allclose(np.sum(exp_log_proba_array, axis=1), 1.0)
+
+
+def test_bagging_regression():
+    rs = np.random.RandomState(0)
+
+    raw_x, raw_y = make_regression(
+        n_samples=100, n_features=4, n_informative=2, random_state=rs, shuffle=False
+    )
+    clf = BaggingRegressor(
+        base_estimator=LinearRegression(),
+        n_estimators=10,
+        max_samples=10,
+        max_features=0.5,
+        random_state=rs,
+        warm_start=True,
+    )
+    clf.fit(raw_x, raw_y)
+
+    predict_y = clf.predict(raw_x)
+    predict_y_array = predict_y.fetch()
+    assert predict_y_array.shape == raw_y.shape
+
+
+def test_iforest():
+    rs = np.random.RandomState(0)
+    raw_train = rs.poisson(size=(100, 10))
+    raw_test = rs.poisson(size=(200, 10))
+
+    clf = IsolationForest(random_state=rs, n_estimators=10, max_samples=1)
+    pred = clf.fit(raw_train).predict(raw_test).fetch()
+    score = clf.score_samples(raw_test).fetch()
+
+    assert clf.n_estimators == 10
+    assert pred.shape == (200,)
+    assert score.shape == (200,)
diff --git a/python/xorbits/sklearn/linear_model/__init__.py b/python/xorbits/sklearn/linear_model/__init__.py
new file mode 100644
index 000000000..1011a91b5
--- /dev/null
+++ b/python/xorbits/sklearn/linear_model/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_LM_CALLABLES
+
+    return list(MARS_SKLEARN_LM_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.linear_model as sk_lm
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_LM_CALLABLES
+
+    if name in MARS_SKLEARN_LM_CALLABLES:
+        return MARS_SKLEARN_LM_CALLABLES[name]
+    else:
+        if not hasattr(sk_lm, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_lm, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/linear_model/mars_adapters/__init__.py b/python/xorbits/sklearn/linear_model/mars_adapters/__init__.py
new file mode 100644
index 000000000..dc3dfcca7
--- /dev/null
+++ b/python/xorbits/sklearn/linear_model/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_LM_CALLABLES
diff --git a/python/xorbits/sklearn/linear_model/mars_adapters/core.py b/python/xorbits/sklearn/linear_model/mars_adapters/core.py
new file mode 100644
index 000000000..7be6ff8e3
--- /dev/null
+++ b/python/xorbits/sklearn/linear_model/mars_adapters/core.py
@@ -0,0 +1,42 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.linear_model as sk_lm
+
+from ...._mars.learn import linear_model as mars_lm
+from ...._mars.learn.glm import LogisticRegression as MarsLogisticRegression
+from ...._mars.learn.linear_model import LinearRegression as MarsLinearRegression
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class LinearRegression(SKLearnBase):
+    _marscls = MarsLinearRegression
+
+
+class LogisticRegression(SKLearnBase):
+    _marscls = MarsLogisticRegression
+
+
+SKLEARN_LM_CLS_MAP = {
+    LinearRegression: MarsLinearRegression,
+    LogisticRegression: MarsLogisticRegression,
+}
+
+MARS_SKLEARN_LM_CALLABLES = _collect_module_callables(
+    mars_lm, sk_lm, skip_members=["register_op"]
+)
+_install_cls_members(SKLEARN_LM_CLS_MAP, MARS_SKLEARN_LM_CALLABLES, sk_lm)
+attach_module_callable_docstring(LinearRegression, sk_lm, sk_lm.LinearRegression)
+attach_module_callable_docstring(LogisticRegression, sk_lm, sk_lm.LogisticRegression)
diff --git a/python/xorbits/sklearn/linear_model/tests/__init__.py b/python/xorbits/sklearn/linear_model/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/linear_model/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/linear_model/tests/test_core.py b/python/xorbits/sklearn/linear_model/tests/test_core.py
new file mode 100644
index 000000000..4e002dc89
--- /dev/null
+++ b/python/xorbits/sklearn/linear_model/tests/test_core.py
@@ -0,0 +1,73 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import numpy as np
+import pytest
+
+from .. import LinearRegression, LogisticRegression
+
+n_rows = 100
+n_columns = 5
+X = np.random.rand(n_rows, n_columns)
+y = np.random.rand(n_rows)
+y_cat = np.random.randint(0, 2, n_rows)
+X_new = np.random.rand(n_rows, n_columns)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = LogisticRegression.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.linear_model."
+    )
+
+    docstring = LogisticRegression.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.linear_model._logistic.LogisticRegression."
+    )
+
+    docstring = LinearRegression.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.linear_model."
+    )
+
+    docstring = LinearRegression.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.linear_model._base.LinearRegression."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_linear_regression():
+    lr = LinearRegression()
+    lr.fit(X, y)
+    predict = lr.predict(X_new)
+
+    assert np.shape(lr.coef_.fetch()) == (n_columns,)
+    assert np.shape(lr.intercept_.fetch()) == ()
+    assert np.shape(predict) == (n_rows,)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_logistic_regression():
+    lr = LogisticRegression(max_iter=1)
+    lr.fit(X, y_cat)
+    predict = lr.predict(X_new).fetch()
+
+    assert np.shape(predict) == (n_rows,)
diff --git a/python/xorbits/sklearn/metrics/__init__.py b/python/xorbits/sklearn/metrics/__init__.py
new file mode 100644
index 000000000..c0365ddd5
--- /dev/null
+++ b/python/xorbits/sklearn/metrics/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_METRICS_CALLABLES
+
+    return list(MARS_SKLEARN_METRICS_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.metrics as sk_metrics
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_METRICS_CALLABLES
+
+    if name in MARS_SKLEARN_METRICS_CALLABLES:
+        return MARS_SKLEARN_METRICS_CALLABLES[name]
+    else:
+        if not hasattr(sk_metrics, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_metrics, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/metrics/mars_adapters/__init__.py b/python/xorbits/sklearn/metrics/mars_adapters/__init__.py
new file mode 100644
index 000000000..d1e23cf5e
--- /dev/null
+++ b/python/xorbits/sklearn/metrics/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_METRICS_CALLABLES
diff --git a/python/xorbits/sklearn/metrics/mars_adapters/core.py b/python/xorbits/sklearn/metrics/mars_adapters/core.py
new file mode 100644
index 000000000..b0fa6a862
--- /dev/null
+++ b/python/xorbits/sklearn/metrics/mars_adapters/core.py
@@ -0,0 +1,22 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.metrics as sk_metrics
+
+from ...._mars.learn import metrics as mars_metrics
+from ...utils import _collect_module_callables
+
+MARS_SKLEARN_METRICS_CALLABLES = _collect_module_callables(
+    mars_metrics, sk_metrics, skip_members=["register_op"]
+)
diff --git a/python/xorbits/sklearn/metrics/tests/__init__.py b/python/xorbits/sklearn/metrics/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/metrics/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/metrics/tests/test_core.py b/python/xorbits/sklearn/metrics/tests/test_core.py
new file mode 100644
index 000000000..347b05aee
--- /dev/null
+++ b/python/xorbits/sklearn/metrics/tests/test_core.py
@@ -0,0 +1,142 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import inspect
+
+import numpy as np
+import pytest
+
+from ... import metrics
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    for name, f in inspect.getmembers(metrics, inspect.isfunction):
+        if name.startswith("_"):
+            continue
+        docstring = f.__doc__
+        assert docstring is not None
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_classification():
+    from sklearn.metrics import f1_score as sklearn_f1_score
+    from sklearn.metrics import fbeta_score as sklearn_fbeta_score
+    from sklearn.metrics import (
+        multilabel_confusion_matrix as sklearn_multilabel_confusion_matrix,
+    )
+    from sklearn.metrics import (
+        precision_recall_fscore_support as sklearn_precision_recall_fscore_support,
+    )
+    from sklearn.metrics import precision_score as sklearn_precision_score
+    from sklearn.metrics import recall_score as sklearn_recall_score
+
+    from ...metrics import (
+        f1_score,
+        fbeta_score,
+        multilabel_confusion_matrix,
+        precision_recall_fscore_support,
+        precision_score,
+        recall_score,
+    )
+
+    y_true = np.array([0, 1, 2, 0, 1, 2], dtype=np.int64)
+    y_pred = np.array([0, 2, 1, 0, 0, 1], dtype=np.int64)
+
+    np.testing.assert_array_almost_equal(
+        f1_score(y_true, y_pred, average="macro").execute().fetch(),
+        sklearn_f1_score(y_true, y_pred, average="macro"),
+    )
+    np.testing.assert_array_almost_equal(
+        fbeta_score(y_true, y_pred, beta=0.5, average="macro").execute().fetch(),
+        sklearn_fbeta_score(y_true, y_pred, beta=0.5, average="macro"),
+    )
+
+    np.testing.assert_array_almost_equal(
+        precision_score(y_true, y_pred, average="macro").execute().fetch(),
+        sklearn_precision_score(y_true, y_pred, average="macro"),
+    )
+
+    np.testing.assert_array_almost_equal(
+        recall_score(y_true, y_pred, average="macro").execute().fetch(),
+        sklearn_recall_score(y_true, y_pred, average="macro"),
+    )
+
+    np.testing.assert_array_almost_equal(
+        multilabel_confusion_matrix(y_true, y_pred).execute().fetch(),
+        sklearn_multilabel_confusion_matrix(y_true, y_pred),
+    )
+
+    np.testing.assert_array_almost_equal(
+        precision_recall_fscore_support(y_true, y_pred)[0].execute().fetch(),
+        sklearn_precision_recall_fscore_support(y_true, y_pred)[0],
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_scorer():
+    from sklearn.metrics import r2_score
+
+    from ...metrics import get_scorer
+
+    assert get_scorer("r2") is not None
+    assert get_scorer(r2_score) is not None
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_r2_score():
+    from ...metrics import r2_score
+
+    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
+    y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
+
+    error = r2_score(y_true, y_pred, multioutput="variance_weighted")
+    np.testing.assert_almost_equal(error.fetch(), 1.0 - 5.0 / 2)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_ranking():
+    from sklearn.metrics import accuracy_score as sklearn_accuracy_score
+    from sklearn.metrics import auc as sklearn_auc
+    from sklearn.metrics import roc_curve as sklearn_roc_curve
+    from sklearn.metrics.tests.test_ranking import make_prediction
+
+    from ...metrics import accuracy_score, auc, roc_auc_score, roc_curve
+
+    y_true, y_score, _ = make_prediction(binary=True)
+
+    np.testing.assert_almost_equal(
+        accuracy_score(y_true, y_score).fetch(),
+        sklearn_accuracy_score(y_true, y_score),
+    )
+    rs = np.random.RandomState(0)
+    y = rs.randint(0, 10, (10,))
+    pred = rs.rand(10)
+    fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
+    m = auc(fpr, tpr)
+
+    sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
+        y,
+        pred,
+        pos_label=2,
+    )
+    expect_m = sklearn_auc(sk_fpr, sk_tpr)
+    assert pytest.approx(m.fetch()) == expect_m
+    y_true = np.array([0, 0, 1, 1], dtype=np.int64)
+    assert roc_auc_score(y_true, y_true, max_fpr=1) == 1
diff --git a/python/xorbits/sklearn/model_selection/__init__.py b/python/xorbits/sklearn/model_selection/__init__.py
new file mode 100644
index 000000000..3b18a2c03
--- /dev/null
+++ b/python/xorbits/sklearn/model_selection/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_ML_CALLABLES
+
+    return list(MARS_SKLEARN_ML_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.model_selection as sk_ml
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_ML_CALLABLES
+
+    if name in MARS_SKLEARN_ML_CALLABLES:
+        return MARS_SKLEARN_ML_CALLABLES[name]
+    else:
+        if not hasattr(sk_ml, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_ml, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/model_selection/mars_adapters/__init__.py b/python/xorbits/sklearn/model_selection/mars_adapters/__init__.py
new file mode 100644
index 000000000..9a8b3c370
--- /dev/null
+++ b/python/xorbits/sklearn/model_selection/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_ML_CALLABLES
diff --git a/python/xorbits/sklearn/model_selection/mars_adapters/core.py b/python/xorbits/sklearn/model_selection/mars_adapters/core.py
new file mode 100644
index 000000000..7f7869fbc
--- /dev/null
+++ b/python/xorbits/sklearn/model_selection/mars_adapters/core.py
@@ -0,0 +1,51 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.model_selection as sk_ml
+
+from ...._mars.learn import model_selection as mars_ml
+from ...._mars.learn.model_selection import KFold as MarsKFold
+from ...._mars.learn.model_selection import ParameterGrid as MarsParameterGrid
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class KFold(SKLearnBase):
+    _marscls = MarsKFold
+
+
+class ParameterGrid(SKLearnBase):
+    _marscls = MarsParameterGrid
+
+    def __len__(self):
+        return len(self.mars_instance)
+
+    def __iter__(self):
+        return iter(self.mars_instance)
+
+    def __getitem__(self, index):
+        return self.mars_instance[index]
+
+
+SKLEARN_ML_CLS_MAP = {
+    KFold: MarsKFold,
+    ParameterGrid: MarsParameterGrid,
+}
+
+MARS_SKLEARN_ML_CALLABLES = _collect_module_callables(
+    mars_ml, sk_ml, skip_members=["register_op"]
+)
+_install_cls_members(SKLEARN_ML_CLS_MAP, MARS_SKLEARN_ML_CALLABLES, sk_ml)
+attach_module_callable_docstring(KFold, sk_ml, sk_ml.KFold)
+attach_module_callable_docstring(ParameterGrid, sk_ml, sk_ml.ParameterGrid)
diff --git a/python/xorbits/sklearn/model_selection/tests/__init__.py b/python/xorbits/sklearn/model_selection/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/model_selection/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/model_selection/tests/test_core.py b/python/xorbits/sklearn/model_selection/tests/test_core.py
new file mode 100644
index 000000000..e9cd89328
--- /dev/null
+++ b/python/xorbits/sklearn/model_selection/tests/test_core.py
@@ -0,0 +1,66 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+from typing import Iterable, Sized
+
+import numpy as np
+import pytest
+
+from ...model_selection import KFold, ParameterGrid, train_test_split
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = KFold.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.model_selection."
+    )
+
+    docstring = ParameterGrid.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.model_selection."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_parameter_grid():
+    arr1 = [1, 2, 3]
+    params1 = {"foo": arr1}
+    grid1 = ParameterGrid(params1)
+    assert isinstance(grid1, Iterable)
+    assert isinstance(grid1, Sized)
+    assert len(grid1) == 3
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_kfold():
+    X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
+    kf = KFold(n_splits=2)
+    splits = kf.get_n_splits(X)
+    assert splits == 2
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_train_test_split():
+    X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
+    y = np.array([1, 2, 3, 4])
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
+    assert X_train.shape == (2, 2)
+    assert X_test.shape == (2, 2)
+    assert y_train.shape == (2,)
+    assert y_test.shape == (2,)
diff --git a/python/xorbits/sklearn/neighbors/__init__.py b/python/xorbits/sklearn/neighbors/__init__.py
new file mode 100644
index 000000000..07bd6edea
--- /dev/null
+++ b/python/xorbits/sklearn/neighbors/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_NEIGHBORS_CALLABLES
+
+    return list(MARS_SKLEARN_NEIGHBORS_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.neighbors as sk_neigh
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_NEIGHBORS_CALLABLES
+
+    if name in MARS_SKLEARN_NEIGHBORS_CALLABLES:
+        return MARS_SKLEARN_NEIGHBORS_CALLABLES[name]
+    else:
+        if not hasattr(sk_neigh, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_neigh, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/neighbors/mars_adapters/__init__.py b/python/xorbits/sklearn/neighbors/mars_adapters/__init__.py
new file mode 100644
index 000000000..4c58c1f1f
--- /dev/null
+++ b/python/xorbits/sklearn/neighbors/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_NEIGHBORS_CALLABLES
diff --git a/python/xorbits/sklearn/neighbors/mars_adapters/core.py b/python/xorbits/sklearn/neighbors/mars_adapters/core.py
new file mode 100644
index 000000000..bfa906478
--- /dev/null
+++ b/python/xorbits/sklearn/neighbors/mars_adapters/core.py
@@ -0,0 +1,39 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.neighbors as sk_neighbors
+
+from ...._mars.learn import neighbors as mars_neighbors
+from ...._mars.learn.neighbors import NearestNeighbors as MarsNearestNeighbors
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class NearestNeighbors(SKLearnBase):
+    _marscls = MarsNearestNeighbors
+
+
+SKLEARN_NEIGHBORS_CLS_MAP = {
+    NearestNeighbors: MarsNearestNeighbors,
+}
+
+MARS_SKLEARN_NEIGHBORS_CALLABLES = _collect_module_callables(
+    mars_neighbors, sk_neighbors, skip_members=["register_op"]
+)
+_install_cls_members(
+    SKLEARN_NEIGHBORS_CLS_MAP, MARS_SKLEARN_NEIGHBORS_CALLABLES, sk_neighbors
+)
+attach_module_callable_docstring(
+    NearestNeighbors, sk_neighbors, sk_neighbors.NearestNeighbors
+)
diff --git a/python/xorbits/sklearn/neighbors/tests/__init__.py b/python/xorbits/sklearn/neighbors/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/neighbors/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/neighbors/tests/test_core.py b/python/xorbits/sklearn/neighbors/tests/test_core.py
new file mode 100644
index 000000000..990931fff
--- /dev/null
+++ b/python/xorbits/sklearn/neighbors/tests/test_core.py
@@ -0,0 +1,34 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import pytest
+
+from ...neighbors import NearestNeighbors
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = NearestNeighbors.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.neighbors."
+    )
+
+    docstring = NearestNeighbors.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.neighbors._unsupervised.NearestNeighbors."
+    )
diff --git a/python/xorbits/sklearn/preprocessing/__init__.py b/python/xorbits/sklearn/preprocessing/__init__.py
new file mode 100644
index 000000000..bf05574d2
--- /dev/null
+++ b/python/xorbits/sklearn/preprocessing/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_PREPROC_CALLABLES
+
+    return list(MARS_SKLEARN_PREPROC_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.preprocessing as sk_preproc
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_PREPROC_CALLABLES
+
+    if name in MARS_SKLEARN_PREPROC_CALLABLES:
+        return MARS_SKLEARN_PREPROC_CALLABLES[name]
+    else:
+        if not hasattr(sk_preproc, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_preproc, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/preprocessing/mars_adapters/__init__.py b/python/xorbits/sklearn/preprocessing/mars_adapters/__init__.py
new file mode 100644
index 000000000..ecfb158b7
--- /dev/null
+++ b/python/xorbits/sklearn/preprocessing/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_PREPROC_CALLABLES
diff --git a/python/xorbits/sklearn/preprocessing/mars_adapters/core.py b/python/xorbits/sklearn/preprocessing/mars_adapters/core.py
new file mode 100644
index 000000000..b71799039
--- /dev/null
+++ b/python/xorbits/sklearn/preprocessing/mars_adapters/core.py
@@ -0,0 +1,51 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.preprocessing as sk_preproc
+
+from ...._mars.learn import preprocessing as mars_preproc
+from ...._mars.learn.preprocessing import LabelBinarizer as MarsLabelBinarizer
+from ...._mars.learn.preprocessing import LabelEncoder as MarsLabelEncoder
+from ...._mars.learn.preprocessing import MinMaxScaler as MarsMinMaxScaler
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class MinMaxScaler(SKLearnBase):
+    _marscls = MarsMinMaxScaler
+
+
+class LabelBinarizer(SKLearnBase):
+    _marscls = MarsLabelBinarizer
+
+
+class LabelEncoder(SKLearnBase):
+    _marscls = MarsLabelEncoder
+
+
+SKLEARN_PREPROC_CLS_MAP = {
+    MinMaxScaler: MarsMinMaxScaler,
+    LabelEncoder: MarsLabelEncoder,
+    LabelBinarizer: MarsLabelBinarizer,
+}
+
+MARS_SKLEARN_PREPROC_CALLABLES = _collect_module_callables(
+    mars_preproc, sk_preproc, skip_members=["register_op"]
+)
+_install_cls_members(
+    SKLEARN_PREPROC_CLS_MAP, MARS_SKLEARN_PREPROC_CALLABLES, sk_preproc
+)
+attach_module_callable_docstring(MinMaxScaler, sk_preproc, sk_preproc.MinMaxScaler)
+attach_module_callable_docstring(LabelBinarizer, sk_preproc, sk_preproc.LabelBinarizer)
+attach_module_callable_docstring(LabelEncoder, sk_preproc, sk_preproc.LabelEncoder)
diff --git a/python/xorbits/sklearn/preprocessing/tests/__init__.py b/python/xorbits/sklearn/preprocessing/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/preprocessing/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/preprocessing/tests/test_core.py b/python/xorbits/sklearn/preprocessing/tests/test_core.py
new file mode 100644
index 000000000..68f4cf3e3
--- /dev/null
+++ b/python/xorbits/sklearn/preprocessing/tests/test_core.py
@@ -0,0 +1,82 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import numpy as np
+import pytest
+
+from ...preprocessing import LabelBinarizer, LabelEncoder, MinMaxScaler
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = MinMaxScaler.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.preprocessing."
+    )
+
+    docstring = LabelBinarizer.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.preprocessing."
+    )
+
+    docstring = LabelEncoder.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.preprocessing."
+    )
+
+    docstring = MinMaxScaler.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.preprocessing._data.MinMaxScaler."
+    )
+
+    docstring = LabelBinarizer.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.preprocessing._label.LabelBinarizer."
+    )
+
+    docstring = LabelEncoder.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.preprocessing._label.LabelEncoder."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_min_max_scaler():
+    X = np.array([[1, 2], [2, 4], [4, 8], [8, 16]], dtype=np.float64)
+    scaler = MinMaxScaler()
+    scaler.fit(X)
+    np.testing.assert_array_equal(scaler.data_min_, [1.0, 2.0])
+    np.testing.assert_array_equal(scaler.data_max_, [8.0, 16.0])
+    np.testing.assert_array_equal(scaler.data_range_, [7.0, 14.0])
+
+    X_transformed = scaler.transform(X).fetch()
+    assert X_transformed.shape == (4, 2)
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_label_binarizer():
+    lb = LabelBinarizer()
+    lb.fit([1, 2, 6, 4, 2])
+    assert lb.classes_.tolist() == [1, 2, 4, 6]
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_label_encoder():
+    le = LabelEncoder()
+    le.fit([1, 2, 2, 6])
+    assert le.classes_.tolist() == [1, 2, 6]
diff --git a/python/xorbits/sklearn/semi_supervised/__init__.py b/python/xorbits/sklearn/semi_supervised/__init__.py
new file mode 100644
index 000000000..e4d2a1aca
--- /dev/null
+++ b/python/xorbits/sklearn/semi_supervised/__init__.py
@@ -0,0 +1,49 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core.utils.fallback import unimplemented_func
+
+
+def _install():
+    """Nothing required for installing sklearn."""
+
+
+def __dir__():  # pragma: no cover
+    try:
+        import sklearn
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_SS_CALLABLES
+
+    return list(MARS_SKLEARN_SS_CALLABLES.keys())
+
+
+def __getattr__(name: str):  # pragma: no cover
+    import inspect
+
+    try:
+        import sklearn.semi_supervised as sk_ss
+    except ImportError:
+        raise AttributeError("sklearn is required but not installed.")
+    from .mars_adapters import MARS_SKLEARN_SS_CALLABLES
+
+    if name in MARS_SKLEARN_SS_CALLABLES:
+        return MARS_SKLEARN_SS_CALLABLES[name]
+    else:
+        if not hasattr(sk_ss, name):
+            raise AttributeError(name)
+        else:
+            if inspect.ismethod(getattr(sk_ss, name)):
+                return unimplemented_func()
+            else:
+                raise AttributeError
diff --git a/python/xorbits/sklearn/semi_supervised/mars_adapters/__init__.py b/python/xorbits/sklearn/semi_supervised/mars_adapters/__init__.py
new file mode 100644
index 000000000..b53765590
--- /dev/null
+++ b/python/xorbits/sklearn/semi_supervised/mars_adapters/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import MARS_SKLEARN_SS_CALLABLES
diff --git a/python/xorbits/sklearn/semi_supervised/mars_adapters/core.py b/python/xorbits/sklearn/semi_supervised/mars_adapters/core.py
new file mode 100644
index 000000000..9f144f49b
--- /dev/null
+++ b/python/xorbits/sklearn/semi_supervised/mars_adapters/core.py
@@ -0,0 +1,35 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sklearn.semi_supervised as sk_ss
+
+from ...._mars.learn import semi_supervised as mars_ss
+from ...._mars.learn.semi_supervised import LabelPropagation as MarsLabelPropagation
+from ....core.utils.docstring import attach_module_callable_docstring
+from ...utils import SKLearnBase, _collect_module_callables, _install_cls_members
+
+
+class LabelPropagation(SKLearnBase):
+    _marscls = MarsLabelPropagation
+
+
+SKLEARN_SS_CLS_MAP = {
+    LabelPropagation: MarsLabelPropagation,
+}
+
+MARS_SKLEARN_SS_CALLABLES = _collect_module_callables(
+    mars_ss, sk_ss, skip_members=["register_op"]
+)
+_install_cls_members(SKLEARN_SS_CLS_MAP, MARS_SKLEARN_SS_CALLABLES, sk_ss)
+attach_module_callable_docstring(LabelPropagation, sk_ss, sk_ss.LabelPropagation)
diff --git a/python/xorbits/sklearn/semi_supervised/tests/__init__.py b/python/xorbits/sklearn/semi_supervised/tests/__init__.py
new file mode 100644
index 000000000..37f6558d9
--- /dev/null
+++ b/python/xorbits/sklearn/semi_supervised/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/xorbits/sklearn/semi_supervised/tests/test_core.py b/python/xorbits/sklearn/semi_supervised/tests/test_core.py
new file mode 100644
index 000000000..3739664cc
--- /dev/null
+++ b/python/xorbits/sklearn/semi_supervised/tests/test_core.py
@@ -0,0 +1,48 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import sklearn
+except ImportError:  # pragma: no cover
+    sklearn = None
+
+import numpy as np
+import pytest
+
+from ...semi_supervised import LabelPropagation
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_doc():
+    docstring = LabelPropagation.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.semi_supervised."
+    )
+
+    docstring = LabelPropagation.fit.__doc__
+    assert docstring is not None and docstring.endswith(
+        "This docstring was copied from sklearn.semi_supervised._label_propagation.LabelPropagation."
+    )
+
+
+@pytest.mark.skipif(sklearn is None, reason="scikit-learn not installed")
+def test_label_propagation():
+    rng = np.random.RandomState(0)
+    X = rng.rand(10, 5)
+    y = np.array([0, 0, 0, 1, 1, -1, -1, -1, -1, -1])
+    lp = LabelPropagation()
+    lp.fit(X, y)
+    assert lp.classes_.tolist() == [0, 1]
+    assert lp.transduction_.tolist() == [0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+    assert lp.predict(X).tolist() == [0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
+    assert lp.score(X, y) == 0.5
diff --git a/python/xorbits/sklearn/utils.py b/python/xorbits/sklearn/utils.py
new file mode 100644
index 000000000..ac2834e86
--- /dev/null
+++ b/python/xorbits/sklearn/utils.py
@@ -0,0 +1,72 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import inspect
+from typing import Callable, Dict, List, Optional
+
+from ..core.adapter import to_mars, wrap_mars_callable
+
+
+class SKLearnBase:
+    def __init__(self, *args, **kwargs):
+        self.mars_instance = self._marscls(*to_mars(args), **to_mars(kwargs))
+
+    def __getattr__(self, name):
+        return getattr(self.mars_instance, name)
+
+
+def wrap_cls_func(marscls: Callable, name: str, submodule):
+    @functools.wraps(getattr(marscls, name))
+    def wrapped(self, *args, **kwargs):
+        return getattr(self.mars_instance, name)(*args, **kwargs)
+
+    return wrap_mars_callable(
+        wrapped,
+        member_name=name,
+        attach_docstring=True,
+        is_cls_member=True,
+        docstring_src_module=submodule,
+        docstring_src_cls=getattr(submodule, marscls.__name__, None),
+    )
+
+
+def _collect_module_callables(
+    mars_module,
+    orig_module,
+    skip_members: Optional[List[str]] = None,
+) -> Dict[str, Callable]:
+    module_callables: Dict[str, Callable] = dict()
+
+    for name, func in inspect.getmembers(mars_module, inspect.isfunction):
+        if skip_members is not None and name in skip_members:
+            continue
+        module_callables[name] = wrap_mars_callable(
+            func,
+            attach_docstring=True,
+            is_cls_member=False,
+            docstring_src_module=orig_module,
+            docstring_src=getattr(orig_module, name, None),
+        )
+    return module_callables
+
+
+def _install_cls_members(
+    module_cls_map, module_callables: Dict[str, Callable], orig_submodule
+):
+    for x_cls, mars_cls in module_cls_map.items():
+        module_callables[x_cls.__name__] = x_cls
+        for name, _ in inspect.getmembers(mars_cls, inspect.isfunction):
+            if not name.startswith("_"):
+                setattr(x_cls, name, wrap_cls_func(mars_cls, name, orig_submodule))