From 61d0f7ab351d5a4e3be5ed5ffda6a68d136ba124 Mon Sep 17 00:00:00 2001
From: Wenlei Xie <wenlei.xie@gmail.com>
Date: Wed, 1 Dec 2021 10:38:21 -0800
Subject: [PATCH] Support Python 3.7 (#93)

Summary:
Google Collab runs Python 3.7

Pull Request resolved: https://github.com/facebookresearch/torcharrow/pull/93

Reviewed By: ejguan

Differential Revision: D32734745

Pulled By: wenleix

fbshipit-source-id: 7d187c8ccdca7a59668c925c0712d59f7d27d79d
---
 .github/workflows/ubuntu.yml                |  5 +++++
 README.md                                   |  6 +++---
 setup.py                                    |  5 +++--
 torcharrow/dtypes.py                        | 22 +++++++++++++--------
 torcharrow/icolumn.py                       |  1 -
 torcharrow/idataframe.py                    |  6 ++----
 torcharrow/test/test_expression.py          |  2 +-
 torcharrow/velox_rt/dataframe_cpu.py        | 20 +++++++++----------
 torcharrow/velox_rt/numerical_column_cpu.py |  8 ++++----
 9 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 4ee9cd9ea..3c7d372ab 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -16,6 +16,11 @@ jobs:
       - name: Print CPU info
         run: cat /proc/cpuinfo
 
+      - name: Setup Python environment
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
       - name: Check out source repository
         uses: actions/checkout@v2
 
diff --git a/README.md b/README.md
index 63677a243..018f42df4 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,11 @@ It plans to provide:
 
 ## Installation
 
-You will need Python 3.8 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment.
+You will need Python 3.7 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment.
 
 First, set up an environment. If you are using conda, create a conda environment:
 ```
-conda create --name torcharrow python=3.8
+conda create --name torcharrow python=3.7
 conda activate torcharrow
 ```
 
@@ -38,7 +38,7 @@ Coming soon!
 
 ### From Source
 
-If you are installing from source, you will need Python 3.8 or later and a C++17 compiler.
+If you are installing from source, you will need Python 3.7 or later and a C++17 compiler.
 
 #### Get the TorchArrow Source
 ```bash
diff --git a/setup.py b/setup.py
index 5bfe4f7ac..79aa178bb 100644
--- a/setup.py
+++ b/setup.py
@@ -135,20 +135,21 @@ def build_extension(self, ext):
     license="BSD",
     install_requires=[
         "arrow",
-        "numpy",
+        "numpy==1.21.4",
         "pandas",
         "typing",
         "tabulate",
         "typing-inspect",
         "pyarrow",
     ],
-    python_requires=">=3.8",
+    python_requires=">=3.7",
     classifiers=[
         "Intended Audience :: Developers",
         "Intended Audience :: Science/Research",
         "License :: OSI Approved :: BSD License",
         "Operating System :: POSIX :: Linux",
         "Programming Language :: C++",
+        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: Implementation :: CPython",
diff --git a/torcharrow/dtypes.py b/torcharrow/dtypes.py
index b71ad7419..fd02f27cc 100644
--- a/torcharrow/dtypes.py
+++ b/torcharrow/dtypes.py
@@ -811,7 +811,7 @@ def np_typeof_dtype(t: DType):  # -> np.dtype[]:
     )
 
 
-def typeof_np_ndarray(t: np.ndarray) -> ty.Union[DType, ty.Literal["object"]]:
+def typeof_np_ndarray(t: np.ndarray) -> DType:
     return typeof_np_dtype(t.dtype)
 
 
@@ -909,6 +909,12 @@ def get_underlying_dtype(dtype: DType) -> DType:
 def get_nullable_dtype(dtype: DType) -> DType:
     return replace(dtype, nullable=True)
 
+# Based on https://github.com/pytorch/pytorch/blob/c48e6f014a0cca0adc18e1a39a8fd724fe7ab83a/torch/_jit_internal.py#L1113-L1118
+def get_origin(target_type):
+    return getattr(target_type, "__origin__", None)
+
+def get_args(target_type):
+    return getattr(target_type, "__args__", None)
 
 def dtype_of_type(typ: ty.Union[ty.Type, DType]) -> DType:
     assert typ is not None
@@ -930,19 +936,19 @@ def dtype_of_type(typ: ty.Union[ty.Type, DType]) -> DType:
         return Struct(
             [Field(f.name, dtype_of_type(f.type)) for f in dataclasses.fields(typ)]
         )
-    if ty.get_origin(typ) in (List, list):
-        args = ty.get_args(typ)
+    if get_origin(typ) in (List, list):
+        args = get_args(typ)
         assert len(args) == 1
         elem_type = dtype_of_type(args[0])
         return List(elem_type)
-    if ty.get_origin(typ) in (ty.Dict, dict):
-        args = ty.get_args(typ)
+    if get_origin(typ) in (ty.Dict, dict):
+        args = get_args(typ)
         assert len(args) == 2
         key = dtype_of_type(args[0])
         value = dtype_of_type(args[1])
         return Map(key, value)
     if typing_inspect.is_optional_type(typ):
-        args = ty.get_args(typ)
+        args = get_args(typ)
         assert len(args) == 2
         if issubclass(args[1], type(None)):
             contained = args[0]
@@ -974,8 +980,8 @@ def dtype_from_batch_pytype(typ: ty.Type) -> DType:
         # TODO: we need a type annotation for Columns with statically accessible dtype
         raise TypeError("Cannot infer dtype from IColumn")
 
-    if ty.get_origin(typ) in (List, list):
-        args = ty.get_args(typ)
+    if get_origin(typ) in (List, list):
+        args = get_args(typ)
         assert len(args) == 1
         return dtype_of_type(args[0])
 
diff --git a/torcharrow/icolumn.py b/torcharrow/icolumn.py
index d19313ac1..8e89cdab6 100644
--- a/torcharrow/icolumn.py
+++ b/torcharrow/icolumn.py
@@ -614,7 +614,6 @@ def map(
     def transform(
         self,
         func: ty.Callable,
-        /,
         dtype: ty.Optional[dt.DType] = None,
         format: str = "column",
         columns: ty.Optional[ty.List[str]] = None,
diff --git a/torcharrow/idataframe.py b/torcharrow/idataframe.py
index 71dca8e40..320c6b1cb 100644
--- a/torcharrow/idataframe.py
+++ b/torcharrow/idataframe.py
@@ -8,7 +8,6 @@
     Callable,
     Iterable,
     List,
-    Literal,
     Mapping,
     Optional,
     Sequence,
@@ -36,7 +35,7 @@
 
 
 def DataFrame(
-    data: Union[Iterable, dt.DType, Literal[None]] = None,
+    data: Optional[Union[Iterable, dt.DType]] = None,
     dtype: Optional[dt.DType] = None,
     columns: Optional[List[str]] = None,
     device: Device = "",
@@ -151,7 +150,7 @@ def DataFrame(
 # -----------------------------------------------------------------------------
 # DataFrames aka (StructColumns, can be nested as StructColumns:-)
 
-DataOrDTypeOrNone = Union[Mapping, Sequence, dt.DType, Literal[None]]
+DataOrDTypeOrNone = Optional[Union[Mapping, Sequence, dt.DType]]
 
 
 class IDataFrame(IColumn):
@@ -213,7 +212,6 @@ def copy(self):
     def transform(
         self,
         func: Callable,
-        /,
         dtype: Optional[dt.DType] = None,
         format: str = "column",
         columns: Optional[List[str]] = None,
diff --git a/torcharrow/test/test_expression.py b/torcharrow/test/test_expression.py
index 6fec4dcd3..7dbce4927 100644
--- a/torcharrow/test/test_expression.py
+++ b/torcharrow/test/test_expression.py
@@ -38,7 +38,7 @@ def get5(self, n=100):
         return self.val + n
 
     # kwargs
-    def get6(self, /, n=100, m=200):
+    def get6(self, n=100, m=200):
         return self.val + n + m
 
     @staticmethod
diff --git a/torcharrow/velox_rt/dataframe_cpu.py b/torcharrow/velox_rt/dataframe_cpu.py
index 85d84e8d8..8ea746540 100644
--- a/torcharrow/velox_rt/dataframe_cpu.py
+++ b/torcharrow/velox_rt/dataframe_cpu.py
@@ -12,7 +12,6 @@
     Dict,
     Iterable,
     List,
-    Literal,
     Mapping,
     Optional,
     Sequence,
@@ -53,7 +52,7 @@
 # -----------------------------------------------------------------------------
 # DataFrames aka (StructColumns, can be nested as StructColumns:-)
 
-DataOrDTypeOrNone = Union[Mapping, Sequence, dt.DType, Literal[None]]
+DataOrDTypeOrNone = Optional[Union[Mapping, Sequence, dt.DType]]
 
 
 class DataFrameCpu(ColumnFromVelox, IDataFrame):
@@ -339,8 +338,7 @@ def slice_columns(self, start, stop):
     def map(
         self,
         arg: Union[Dict, Callable],
-        /,
-        na_action: Literal["ignore", None] = None,
+        na_action=None,
         dtype: Optional[dt.DType] = None,
         columns: Optional[List[str]] = None,
     ):
@@ -396,7 +394,7 @@ def func(*x):
     def flatmap(
         self,
         arg: Union[Dict, Callable],
-        na_action: Literal["ignore", None] = None,
+        na_action=None,
         dtype: Optional[dt.DType] = None,
         columns: Optional[List[str]] = None,
     ):
@@ -503,7 +501,7 @@ def sort(
         self,
         by: Optional[List[str]] = None,
         ascending=True,
-        na_position: Literal["last", "first"] = "last",
+        na_position="last",
     ):
         """Sort a column/a dataframe in ascending or descending order"""
         # Not allowing None in comparison might be too harsh...
@@ -535,7 +533,7 @@ def _nlargest(
         self,
         n=5,
         columns: Optional[List[str]] = None,
-        keep: Literal["last", "first"] = "first",
+        keep="first",
     ):
         """Returns a new dataframe of the *n* largest elements."""
         # Todo add keep arg
@@ -547,7 +545,7 @@ def _nsmallest(
         self,
         n=5,
         columns: Optional[List[str]] = None,
-        keep: Literal["last", "first"] = "first",
+        keep="first",
     ):
         """Returns a new dataframe of the *n* smallest elements."""
         return self.sort(by=columns, ascending=True).head(n)
@@ -1267,7 +1265,7 @@ def isin(self, values: Union[list, dict, IColumn]):
 
     @trace
     @expression
-    def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict, Literal[None]]):
+    def fill_null(self, fill_value: Optional[Union[dt.ScalarTypes, Dict]]):
         if fill_value is None:
             return self
         if isinstance(fill_value, IColumn._scalar_types):
@@ -1290,7 +1288,7 @@ def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict, Literal[None]]):
 
     @trace
     @expression
-    def drop_null(self, how: Literal["any", "all"] = "any"):
+    def drop_null(self, how="any"):
         """Return a dataframe with rows removed where the row has any or all nulls."""
         self._prototype_support_warning("drop_null")
 
@@ -1312,7 +1310,7 @@ def drop_null(self, how: Literal["any", "all"] = "any"):
     def drop_duplicates(
         self,
         subset: Optional[List[str]] = None,
-        keep: Literal["first", "last", False] = "first",
+        keep="first",
     ):
         """Remove duplicate values from data but keep the first, last, none (keep=False)"""
         self._prototype_support_warning("drop_duplicates")
diff --git a/torcharrow/velox_rt/numerical_column_cpu.py b/torcharrow/velox_rt/numerical_column_cpu.py
index db1e4e28f..eeef24093 100644
--- a/torcharrow/velox_rt/numerical_column_cpu.py
+++ b/torcharrow/velox_rt/numerical_column_cpu.py
@@ -3,7 +3,7 @@
 import math
 import operator
 import statistics
-from typing import Dict, List, Literal, Optional, Union, Callable
+from typing import Dict, List, Optional, Union, Callable
 
 import numpy as np
 import torcharrow as ta
@@ -130,7 +130,7 @@ def sort(
         self,
         columns: Optional[List[str]] = None,
         ascending=True,
-        na_position: Literal["last", "first"] = "last",
+        na_position="last",
     ):
         """Sort a column/a dataframe in ascending or descending order"""
         self._prototype_support_warning("sort")
@@ -164,7 +164,7 @@ def _nlargest(
         self,
         n=5,
         columns: Optional[List[str]] = None,
-        keep: Literal["last", "first"] = "first",
+        keep="first",
     ):
         """Returns a new data of the *n* largest element."""
         if columns is not None:
@@ -669,7 +669,7 @@ def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict]):
 
     @trace
     @expression
-    def drop_null(self, how: Literal["any", "all"] = "any"):
+    def drop_null(self, how="any"):
         """Return a column with rows removed where a row has any or all nulls."""
         self._prototype_support_warning("drop_null")