Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: GPU cudf read_parquet has no attribute 'build_categorical_column' #828

Closed
luweizheng opened this issue Nov 17, 2024 · 1 comment
Closed
Labels
bug Something isn't working gpu

Comments

@luweizheng
Copy link
Collaborator

cudf error

   @require_cudf
    def test_read_parquet_gpu_execution(setup_gpu):
        with tempfile.TemporaryDirectory() as tempdir:
            file_path = os.path.join(tempdir, "test.parquet")

            df = pd.DataFrame(
                {
                    "col1": np.random.rand(100),
                    "col2": np.random.choice(["a", "b", "c"], (100,)),
                    "col3": np.arange(100),
                }
            )
            df.to_parquet(file_path, index=False)

            pdf = pd.read_parquet(file_path)
            mdf = md.read_parquet(file_path, gpu=True).execute().fetch(to_cpu=False)
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True), mdf.to_pandas().reset_index(drop=True)
            )

            mdf2 = md.read_parquet(file_path, gpu=True).execute().fetch(to_cpu=False)
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True), mdf2.to_pandas().reset_index(drop=True)
            )

            mdf3 = (
                md.read_parquet(file_path, gpu=True).head(3).execute().fetch(to_cpu=False)
            )
            pd.testing.assert_frame_equal(
                pdf.reset_index(drop=True).head(3), mdf3.to_pandas().reset_index(drop=True)
            )

        with tempfile.TemporaryDirectory() as tempdir:
            file_path = os.path.join(tempdir, "test.parquet")
            test_df = pd.DataFrame(
                {
                    "a": np.arange(10).astype(np.int64, copy=False),
                    "b": [f"s{i}" for i in range(10)],
                    "c": np.random.rand(10),
                }
            )
            test_df.to_parquet(file_path, row_group_size=3)

            df = md.read_parquet(
                file_path, groups_as_chunks=True, columns=["a", "b"], gpu=True
            )
            result = df.execute().fetch(to_cpu=False).to_pandas()
            pd.testing.assert_frame_equal(
                result.reset_index(drop=True), test_df[["a", "b"]]
            )

        # test partitioned
        with tempfile.TemporaryDirectory() as tempdir:
            df = pd.DataFrame(
                {
                    "a": np.random.rand(300),
                    "b": [f"s{i}" for i in range(300)],
                    "c": np.random.choice(["a", "b", "c"], (300,)),
                }
            )
            df.to_parquet(tempdir, partition_cols=["c"])
            mdf = md.read_parquet(tempdir, gpu=True)
>           r = mdf.execute().fetch(to_cpu=False).to_pandas().astype(df.dtypes)

xorbits/_mars/dataframe/datasource/tests/test_datasource_execution.py:1798:
xorbits/_mars/core/entity/tileables.py:430: in execute
    result = self.data.execute(session=session, **kw)
xorbits/_mars/core/entity/executable.py:152: in execute
    return execute(self, session=session, **kw)
xorbits/_mars/deploy/oscar/session.py:1789: in execute
    return session.execute(
xorbits/_mars/deploy/oscar/session.py:1600: in execute
    execution_info: ExecutionInfo = fut.result(
/opt/conda/lib/python3.12/concurrent/futures/_base.py:456: in result
    return self.__get_result()
/opt/conda/lib/python3.12/concurrent/futures/_base.py:401: in __get_result
    raise self._exception
@XprobeBot XprobeBot added bug Something isn't working gpu labels Nov 17, 2024
@XprobeBot XprobeBot added this to the v0.7.4 milestone Nov 17, 2024
@luweizheng
Copy link
Collaborator Author

Fix by #832

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working gpu
Projects
None yet
Development

No branches or pull requests

2 participants