diff --git a/dev/requirements.txt b/dev/requirements.txt index 04cab4cbfcc3c..33300cc28d3ca 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -3,7 +3,7 @@ py4j>=0.10.9.7 # PySpark dependencies (optional) numpy>=1.21 -pyarrow>=10.0.0 +pyarrow>=11.0.0 six==1.16.0 pandas>=2.0.0 scipy diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 2b9f28135bb17..b35588a618acf 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -207,7 +207,7 @@ Installable with ``pip install "pyspark[connect]"``. Package Supported version Note ========================== ================= ========================== `pandas` >=2.0.0 Required for Spark Connect -`pyarrow` >=10.0.0 Required for Spark Connect +`pyarrow` >=11.0.0 Required for Spark Connect `grpcio` >=1.67.0 Required for Spark Connect `grpcio-status` >=1.67.0 Required for Spark Connect `googleapis-common-protos` >=1.65.0 Required for Spark Connect @@ -223,7 +223,7 @@ Installable with ``pip install "pyspark[sql]"``. Package Supported version Note ========= ================= ====================== `pandas` >=2.0.0 Required for Spark SQL -`pyarrow` >=10.0.0 Required for Spark SQL +`pyarrow` >=11.0.0 Required for Spark SQL ========= ================= ====================== Additional libraries that enhance functionality but are not included in the installation packages: @@ -240,7 +240,7 @@ Installable with ``pip install "pyspark[pandas_on_spark]"``. Package Supported version Note ========= ================= ================================ `pandas` >=2.0.0 Required for Pandas API on Spark -`pyarrow` >=10.0.0 Required for Pandas API on Spark +`pyarrow` >=11.0.0 Required for Pandas API on Spark ========= ================= ================================ Additional libraries that enhance functionality but are not included in the installation packages: diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst index 5292530420025..55d067eb5fa2d 100644 --- a/python/docs/source/migration_guide/pyspark_upgrade.rst +++ b/python/docs/source/migration_guide/pyspark_upgrade.rst @@ -25,7 +25,7 @@ Upgrading from PySpark 3.5 to 4.0 * In Spark 4.0, Python 3.8 support was dropped in PySpark. * In Spark 4.0, the minimum supported version for Pandas has been raised from 1.0.5 to 2.0.0 in PySpark. * In Spark 4.0, the minimum supported version for Numpy has been raised from 1.15 to 1.21 in PySpark. -* In Spark 4.0, the minimum supported version for PyArrow has been raised from 4.0.0 to 10.0.0 in PySpark. +* In Spark 4.0, the minimum supported version for PyArrow has been raised from 4.0.0 to 11.0.0 in PySpark. * In Spark 4.0, ``Int64Index`` and ``Float64Index`` have been removed from pandas API on Spark, ``Index`` should be used directly. * In Spark 4.0, ``DataFrame.iteritems`` has been removed from pandas API on Spark, use ``DataFrame.items`` instead. * In Spark 4.0, ``Series.iteritems`` has been removed from pandas API on Spark, use ``Series.items`` instead. diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py index 09f194278cdcc..f595b26450e39 100755 --- a/python/packaging/classic/setup.py +++ b/python/packaging/classic/setup.py @@ -152,7 +152,7 @@ def _supports_symlinks(): # python/packaging/connect/setup.py _minimum_pandas_version = "2.0.0" _minimum_numpy_version = "1.21" -_minimum_pyarrow_version = "10.0.0" +_minimum_pyarrow_version = "11.0.0" _minimum_grpc_version = "1.67.0" _minimum_googleapis_common_protos_version = "1.65.0" diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py index 5f67e5306b3ff..51d0a4c9e3601 100755 --- a/python/packaging/connect/setup.py +++ b/python/packaging/connect/setup.py @@ -132,7 +132,7 @@ # python/packaging/classic/setup.py _minimum_pandas_version = "2.0.0" _minimum_numpy_version = "1.21" - _minimum_pyarrow_version = "10.0.0" + _minimum_pyarrow_version = "11.0.0" _minimum_grpc_version = "1.59.3" _minimum_googleapis_common_protos_version = "1.56.4" diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py index 5849ae0edd6d9..a351c13ff0a08 100644 --- a/python/pyspark/sql/pandas/utils.py +++ b/python/pyspark/sql/pandas/utils.py @@ -61,7 +61,7 @@ def require_minimum_pandas_version() -> None: def require_minimum_pyarrow_version() -> None: """Raise ImportError if minimum version of pyarrow is not installed""" # TODO(HyukjinKwon): Relocate and deduplicate the version specification. - minimum_pyarrow_version = "10.0.0" + minimum_pyarrow_version = "11.0.0" import os