BUG: GPU cudf read_parquet has no attribute 'build_categorical_column' #828

luweizheng · 2024-11-17T15:38:34Z

cudf error

   @require_cudf
    def test_read_parquet_gpu_execution(setup_gpu):
        with tempfile.TemporaryDirectory() as tempdir:
            file_path = os.path.join(tempdir, "test.parquet")

            df = pd.DataFrame(
                {
                    "col1": np.random.rand(100),
                    "col2": np.random.choice(["a", "b", "c"], (100,)),
                    "col3": np.arange(100),
                }
            )
            df.to_parquet(file_path, index=False)

            pdf = pd.read_parquet(file_path)
            mdf = md.read_parquet(file_path, gpu=True).execute().fetch(to_cpu=False)
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True), mdf.to_pandas().reset_index(drop=True)
            )

            mdf2 = md.read_parquet(file_path, gpu=True).execute().fetch(to_cpu=False)
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True), mdf2.to_pandas().reset_index(drop=True)
            )

            mdf3 = (
                md.read_parquet(file_path, gpu=True).head(3).execute().fetch(to_cpu=False)
            )
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True).head(3), mdf3.to_pandas().reset_index(drop=True)
            )

        with tempfile.TemporaryDirectory() as tempdir:
            file_path = os.path.join(tempdir, "test.parquet")
            test_df = pd.DataFrame(
                {
                    "a": np.arange(10).astype(np.int64, copy=False),
                    "b": [f"s{i}" for i in range(10)],
                    "c": np.random.rand(10),
                }
            )
            test_df.to_parquet(file_path, row_group_size=3)

            df = md.read_parquet(
                file_path, groups_as_chunks=True, columns=["a", "b"], gpu=True
            )
            result = df.execute().fetch(to_cpu=False).to_pandas()
            pd.testing.assert_frame_equal(
                result.reset_index(drop=True), test_df[["a", "b"]]
            )

        # test partitioned
        with tempfile.TemporaryDirectory() as tempdir:
            df = pd.DataFrame(
                {
                    "a": np.random.rand(300),
                    "b": [f"s{i}" for i in range(300)],
                    "c": np.random.choice(["a", "b", "c"], (300,)),
                }
            )
            df.to_parquet(tempdir, partition_cols=["c"])
            mdf = md.read_parquet(tempdir, gpu=True)
>           r = mdf.execute().fetch(to_cpu=False).to_pandas().astype(df.dtypes)

xorbits/_mars/dataframe/datasource/tests/test_datasource_execution.py:1798:
xorbits/_mars/core/entity/tileables.py:430: in execute
    result = self.data.execute(session=session, **kw)
xorbits/_mars/core/entity/executable.py:152: in execute
    return execute(self, session=session, **kw)
xorbits/_mars/deploy/oscar/session.py:1789: in execute
    return session.execute(
xorbits/_mars/deploy/oscar/session.py:1600: in execute
    execution_info: ExecutionInfo = fut.result(
/opt/conda/lib/python3.12/concurrent/futures/_base.py:456: in result
    return self.__get_result()
/opt/conda/lib/python3.12/concurrent/futures/_base.py:401: in __get_result
    raise self._exception

The text was updated successfully, but these errors were encountered:

luweizheng · 2024-12-05T07:02:17Z

Fix by #832

XprobeBot added bug Something isn't working gpu labels Nov 17, 2024

XprobeBot added this to the v0.7.4 milestone Nov 17, 2024

hucorz mentioned this issue Dec 4, 2024

BUG: Fix no build_categorical_column error of GPU read_parquet #832

Merged

2 tasks

luweizheng closed this as completed Dec 5, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

BUG: GPU cudf read_parquet has no attribute 'build_categorical_column' #828

BUG: GPU cudf read_parquet has no attribute 'build_categorical_column' #828

luweizheng commented Nov 17, 2024

luweizheng commented Dec 5, 2024

BUG: GPU cudf read_parquet has no attribute 'build_categorical_column' #828

BUG: GPU cudf read_parquet has no attribute 'build_categorical_column' #828

Comments

luweizheng commented Nov 17, 2024

luweizheng commented Dec 5, 2024