diff --git a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore.py b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore.py index 3d392e3f7..c3db910f7 100644 --- a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore.py +++ b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore.py @@ -17,12 +17,14 @@ import pandas as pd from ....core import tile +from ....tests.core import support_cuda from ... import DataFrame -def test_to_csv(): +@support_cuda +def test_to_csv(setup_gpu, gpu): raw = pd.DataFrame(np.random.rand(10, 5)) - df = DataFrame(raw, chunk_size=4) + df = DataFrame(raw, gpu=gpu, chunk_size=4) r = df.to_csv("*.csv") r = tile(r) diff --git a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py index 485fa252b..725203bbc 100644 --- a/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py +++ b/python/xorbits/_mars/dataframe/datastore/tests/test_datastore_execution.py @@ -38,12 +38,13 @@ fastparquet = None from .... import dataframe as md -from ....tests.core import flaky +from ....tests.core import flaky, support_cuda from ... import DataFrame from ...utils import PD_VERSION_GREATER_THAN_2_10 -def test_to_csv_execution(setup): +@support_cuda +def test_to_csv_execution(setup, setup_gpu, gpu): index = pd.RangeIndex(100, 0, -1, name="index") raw = pd.DataFrame( { @@ -53,7 +54,7 @@ def test_to_csv_execution(setup): }, index=index, ) - df = DataFrame(raw, chunk_size=33) + df = DataFrame(raw, gpu=gpu, chunk_size=33) with tempfile.TemporaryDirectory() as base_path: # DATAFRAME TESTS @@ -82,7 +83,7 @@ def test_to_csv_execution(setup): pd.testing.assert_frame_equal(dfs[1].set_index("index"), raw.iloc[33:66]) # test df with unknown shape - df2 = DataFrame(raw, chunk_size=(50, 2)) + df2 = DataFrame(raw, gpu=gpu, chunk_size=(50, 2)) df2 = df2[df2["col1"] < 1] path2 = os.path.join(base_path, "out2.csv") df2.to_csv(path2).execute() @@ -92,32 +93,34 @@ def test_to_csv_execution(setup): pd.testing.assert_frame_equal(result, raw) # SERIES TESTS - series = md.Series(raw.col1, chunk_size=33) - - # test one file with series - path = os.path.join(base_path, "out.csv") - series.to_csv(path).execute() - - result = pd.read_csv(path, dtype=raw.dtypes.to_dict()) - result.set_index("index", inplace=True) - pd.testing.assert_frame_equal(result, raw.col1.to_frame()) - - # test multi files with series - path = os.path.join(base_path, "out-*.csv") - series.to_csv(path).execute() - - dfs = [ - pd.read_csv( - os.path.join(base_path, f"out-{i}.csv"), dtype=raw.dtypes.to_dict() + # cudf series not support to_csv + if gpu == False: + series = md.Series(raw.col1, chunk_size=33) + + # test one file with series + path = os.path.join(base_path, "out.csv") + series.to_csv(path).execute() + + result = pd.read_csv(path, dtype=raw.dtypes.to_dict()) + result.set_index("index", inplace=True) + pd.testing.assert_frame_equal(result, raw.col1.to_frame()) + + # test multi files with series + path = os.path.join(base_path, "out-*.csv") + series.to_csv(path).execute() + + dfs = [ + pd.read_csv( + os.path.join(base_path, f"out-{i}.csv"), dtype=raw.dtypes.to_dict() + ) + for i in range(4) + ] + result = pd.concat(dfs, axis=0) + result.set_index("index", inplace=True) + pd.testing.assert_frame_equal(result, raw.col1.to_frame()) + pd.testing.assert_frame_equal( + dfs[1].set_index("index"), raw.col1.to_frame().iloc[33:66] ) - for i in range(4) - ] - result = pd.concat(dfs, axis=0) - result.set_index("index", inplace=True) - pd.testing.assert_frame_equal(result, raw.col1.to_frame()) - pd.testing.assert_frame_equal( - dfs[1].set_index("index"), raw.col1.to_frame().iloc[33:66] - ) @pytest.mark.skipif(sqlalchemy is None, reason="sqlalchemy not installed") diff --git a/python/xorbits/_mars/dataframe/datastore/to_csv.py b/python/xorbits/_mars/dataframe/datastore/to_csv.py index 330db3405..caf96d6c5 100644 --- a/python/xorbits/_mars/dataframe/datastore/to_csv.py +++ b/python/xorbits/_mars/dataframe/datastore/to_csv.py @@ -35,7 +35,7 @@ from ...tensor.core import TensorOrder from ...tensor.operands import TensorOperand, TensorOperandMixin from ..operands import DataFrameOperand, DataFrameOperandMixin -from ..utils import is_pandas_2, parse_index +from ..utils import is_cudf, is_pandas_2, parse_index class DataFrameToCSV(DataFrameOperand, DataFrameOperandMixin): @@ -374,6 +374,19 @@ def _to_csv(cls, op, df, path, header=None): kwargs["line_terminator"] = op.lineterminator kwargs.pop("lineterminator") + # cudf not support following parameters + if is_cudf(df): + kwargs.pop("float_format") + kwargs.pop("index_label") + kwargs.pop("mode") + kwargs.pop("quoting") + kwargs.pop("quotechar") + kwargs.pop("date_format") + kwargs.pop("doublequote") + kwargs.pop("escapechar") + kwargs.pop("decimal") + kwargs["compression"] = None + df.to_csv(path, **kwargs) @classmethod