From 61d0f7ab351d5a4e3be5ed5ffda6a68d136ba124 Mon Sep 17 00:00:00 2001 From: Wenlei Xie Date: Wed, 1 Dec 2021 10:38:21 -0800 Subject: [PATCH] Support Python 3.7 (#93) Summary: Google Collab runs Python 3.7 Pull Request resolved: https://github.com/facebookresearch/torcharrow/pull/93 Reviewed By: ejguan Differential Revision: D32734745 Pulled By: wenleix fbshipit-source-id: 7d187c8ccdca7a59668c925c0712d59f7d27d79d --- .github/workflows/ubuntu.yml | 5 +++++ README.md | 6 +++--- setup.py | 5 +++-- torcharrow/dtypes.py | 22 +++++++++++++-------- torcharrow/icolumn.py | 1 - torcharrow/idataframe.py | 6 ++---- torcharrow/test/test_expression.py | 2 +- torcharrow/velox_rt/dataframe_cpu.py | 20 +++++++++---------- torcharrow/velox_rt/numerical_column_cpu.py | 8 ++++---- 9 files changed, 41 insertions(+), 34 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 4ee9cd9ea..3c7d372ab 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -16,6 +16,11 @@ jobs: - name: Print CPU info run: cat /proc/cpuinfo + - name: Setup Python environment + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Check out source repository uses: actions/checkout@v2 diff --git a/README.md b/README.md index 63677a243..018f42df4 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,11 @@ It plans to provide: ## Installation -You will need Python 3.8 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment. +You will need Python 3.7 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment. First, set up an environment. If you are using conda, create a conda environment: ``` -conda create --name torcharrow python=3.8 +conda create --name torcharrow python=3.7 conda activate torcharrow ``` @@ -38,7 +38,7 @@ Coming soon! ### From Source -If you are installing from source, you will need Python 3.8 or later and a C++17 compiler. +If you are installing from source, you will need Python 3.7 or later and a C++17 compiler. #### Get the TorchArrow Source ```bash diff --git a/setup.py b/setup.py index 5bfe4f7ac..79aa178bb 100644 --- a/setup.py +++ b/setup.py @@ -135,20 +135,21 @@ def build_extension(self, ext): license="BSD", install_requires=[ "arrow", - "numpy", + "numpy==1.21.4", "pandas", "typing", "tabulate", "typing-inspect", "pyarrow", ], - python_requires=">=3.8", + python_requires=">=3.7", classifiers=[ "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: POSIX :: Linux", "Programming Language :: C++", + "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", diff --git a/torcharrow/dtypes.py b/torcharrow/dtypes.py index b71ad7419..fd02f27cc 100644 --- a/torcharrow/dtypes.py +++ b/torcharrow/dtypes.py @@ -811,7 +811,7 @@ def np_typeof_dtype(t: DType): # -> np.dtype[]: ) -def typeof_np_ndarray(t: np.ndarray) -> ty.Union[DType, ty.Literal["object"]]: +def typeof_np_ndarray(t: np.ndarray) -> DType: return typeof_np_dtype(t.dtype) @@ -909,6 +909,12 @@ def get_underlying_dtype(dtype: DType) -> DType: def get_nullable_dtype(dtype: DType) -> DType: return replace(dtype, nullable=True) +# Based on https://github.com/pytorch/pytorch/blob/c48e6f014a0cca0adc18e1a39a8fd724fe7ab83a/torch/_jit_internal.py#L1113-L1118 +def get_origin(target_type): + return getattr(target_type, "__origin__", None) + +def get_args(target_type): + return getattr(target_type, "__args__", None) def dtype_of_type(typ: ty.Union[ty.Type, DType]) -> DType: assert typ is not None @@ -930,19 +936,19 @@ def dtype_of_type(typ: ty.Union[ty.Type, DType]) -> DType: return Struct( [Field(f.name, dtype_of_type(f.type)) for f in dataclasses.fields(typ)] ) - if ty.get_origin(typ) in (List, list): - args = ty.get_args(typ) + if get_origin(typ) in (List, list): + args = get_args(typ) assert len(args) == 1 elem_type = dtype_of_type(args[0]) return List(elem_type) - if ty.get_origin(typ) in (ty.Dict, dict): - args = ty.get_args(typ) + if get_origin(typ) in (ty.Dict, dict): + args = get_args(typ) assert len(args) == 2 key = dtype_of_type(args[0]) value = dtype_of_type(args[1]) return Map(key, value) if typing_inspect.is_optional_type(typ): - args = ty.get_args(typ) + args = get_args(typ) assert len(args) == 2 if issubclass(args[1], type(None)): contained = args[0] @@ -974,8 +980,8 @@ def dtype_from_batch_pytype(typ: ty.Type) -> DType: # TODO: we need a type annotation for Columns with statically accessible dtype raise TypeError("Cannot infer dtype from IColumn") - if ty.get_origin(typ) in (List, list): - args = ty.get_args(typ) + if get_origin(typ) in (List, list): + args = get_args(typ) assert len(args) == 1 return dtype_of_type(args[0]) diff --git a/torcharrow/icolumn.py b/torcharrow/icolumn.py index d19313ac1..8e89cdab6 100644 --- a/torcharrow/icolumn.py +++ b/torcharrow/icolumn.py @@ -614,7 +614,6 @@ def map( def transform( self, func: ty.Callable, - /, dtype: ty.Optional[dt.DType] = None, format: str = "column", columns: ty.Optional[ty.List[str]] = None, diff --git a/torcharrow/idataframe.py b/torcharrow/idataframe.py index 71dca8e40..320c6b1cb 100644 --- a/torcharrow/idataframe.py +++ b/torcharrow/idataframe.py @@ -8,7 +8,6 @@ Callable, Iterable, List, - Literal, Mapping, Optional, Sequence, @@ -36,7 +35,7 @@ def DataFrame( - data: Union[Iterable, dt.DType, Literal[None]] = None, + data: Optional[Union[Iterable, dt.DType]] = None, dtype: Optional[dt.DType] = None, columns: Optional[List[str]] = None, device: Device = "", @@ -151,7 +150,7 @@ def DataFrame( # ----------------------------------------------------------------------------- # DataFrames aka (StructColumns, can be nested as StructColumns:-) -DataOrDTypeOrNone = Union[Mapping, Sequence, dt.DType, Literal[None]] +DataOrDTypeOrNone = Optional[Union[Mapping, Sequence, dt.DType]] class IDataFrame(IColumn): @@ -213,7 +212,6 @@ def copy(self): def transform( self, func: Callable, - /, dtype: Optional[dt.DType] = None, format: str = "column", columns: Optional[List[str]] = None, diff --git a/torcharrow/test/test_expression.py b/torcharrow/test/test_expression.py index 6fec4dcd3..7dbce4927 100644 --- a/torcharrow/test/test_expression.py +++ b/torcharrow/test/test_expression.py @@ -38,7 +38,7 @@ def get5(self, n=100): return self.val + n # kwargs - def get6(self, /, n=100, m=200): + def get6(self, n=100, m=200): return self.val + n + m @staticmethod diff --git a/torcharrow/velox_rt/dataframe_cpu.py b/torcharrow/velox_rt/dataframe_cpu.py index 85d84e8d8..8ea746540 100644 --- a/torcharrow/velox_rt/dataframe_cpu.py +++ b/torcharrow/velox_rt/dataframe_cpu.py @@ -12,7 +12,6 @@ Dict, Iterable, List, - Literal, Mapping, Optional, Sequence, @@ -53,7 +52,7 @@ # ----------------------------------------------------------------------------- # DataFrames aka (StructColumns, can be nested as StructColumns:-) -DataOrDTypeOrNone = Union[Mapping, Sequence, dt.DType, Literal[None]] +DataOrDTypeOrNone = Optional[Union[Mapping, Sequence, dt.DType]] class DataFrameCpu(ColumnFromVelox, IDataFrame): @@ -339,8 +338,7 @@ def slice_columns(self, start, stop): def map( self, arg: Union[Dict, Callable], - /, - na_action: Literal["ignore", None] = None, + na_action=None, dtype: Optional[dt.DType] = None, columns: Optional[List[str]] = None, ): @@ -396,7 +394,7 @@ def func(*x): def flatmap( self, arg: Union[Dict, Callable], - na_action: Literal["ignore", None] = None, + na_action=None, dtype: Optional[dt.DType] = None, columns: Optional[List[str]] = None, ): @@ -503,7 +501,7 @@ def sort( self, by: Optional[List[str]] = None, ascending=True, - na_position: Literal["last", "first"] = "last", + na_position="last", ): """Sort a column/a dataframe in ascending or descending order""" # Not allowing None in comparison might be too harsh... @@ -535,7 +533,7 @@ def _nlargest( self, n=5, columns: Optional[List[str]] = None, - keep: Literal["last", "first"] = "first", + keep="first", ): """Returns a new dataframe of the *n* largest elements.""" # Todo add keep arg @@ -547,7 +545,7 @@ def _nsmallest( self, n=5, columns: Optional[List[str]] = None, - keep: Literal["last", "first"] = "first", + keep="first", ): """Returns a new dataframe of the *n* smallest elements.""" return self.sort(by=columns, ascending=True).head(n) @@ -1267,7 +1265,7 @@ def isin(self, values: Union[list, dict, IColumn]): @trace @expression - def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict, Literal[None]]): + def fill_null(self, fill_value: Optional[Union[dt.ScalarTypes, Dict]]): if fill_value is None: return self if isinstance(fill_value, IColumn._scalar_types): @@ -1290,7 +1288,7 @@ def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict, Literal[None]]): @trace @expression - def drop_null(self, how: Literal["any", "all"] = "any"): + def drop_null(self, how="any"): """Return a dataframe with rows removed where the row has any or all nulls.""" self._prototype_support_warning("drop_null") @@ -1312,7 +1310,7 @@ def drop_null(self, how: Literal["any", "all"] = "any"): def drop_duplicates( self, subset: Optional[List[str]] = None, - keep: Literal["first", "last", False] = "first", + keep="first", ): """Remove duplicate values from data but keep the first, last, none (keep=False)""" self._prototype_support_warning("drop_duplicates") diff --git a/torcharrow/velox_rt/numerical_column_cpu.py b/torcharrow/velox_rt/numerical_column_cpu.py index db1e4e28f..eeef24093 100644 --- a/torcharrow/velox_rt/numerical_column_cpu.py +++ b/torcharrow/velox_rt/numerical_column_cpu.py @@ -3,7 +3,7 @@ import math import operator import statistics -from typing import Dict, List, Literal, Optional, Union, Callable +from typing import Dict, List, Optional, Union, Callable import numpy as np import torcharrow as ta @@ -130,7 +130,7 @@ def sort( self, columns: Optional[List[str]] = None, ascending=True, - na_position: Literal["last", "first"] = "last", + na_position="last", ): """Sort a column/a dataframe in ascending or descending order""" self._prototype_support_warning("sort") @@ -164,7 +164,7 @@ def _nlargest( self, n=5, columns: Optional[List[str]] = None, - keep: Literal["last", "first"] = "first", + keep="first", ): """Returns a new data of the *n* largest element.""" if columns is not None: @@ -669,7 +669,7 @@ def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict]): @trace @expression - def drop_null(self, how: Literal["any", "all"] = "any"): + def drop_null(self, how="any"): """Return a column with rows removed where a row has any or all nulls.""" self._prototype_support_warning("drop_null")