Skip to content
This repository was archived by the owner on Nov 1, 2024. It is now read-only.

Commit

Permalink
Support Python 3.7 (#93)
Browse files Browse the repository at this point in the history
Summary:
Google Collab runs Python 3.7

Pull Request resolved: #93

Reviewed By: ejguan

Differential Revision: D32734745

Pulled By: wenleix

fbshipit-source-id: 7d187c8ccdca7a59668c925c0712d59f7d27d79d
  • Loading branch information
wenleix authored and facebook-github-bot committed Dec 1, 2021
1 parent 251779b commit 61d0f7a
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 34 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ jobs:
- name: Print CPU info
run: cat /proc/cpuinfo

- name: Setup Python environment
uses: actions/setup-python@v2
with:
python-version: 3.7

- name: Check out source repository
uses: actions/checkout@v2

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ It plans to provide:

## Installation

You will need Python 3.8 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment.
You will need Python 3.7 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment.

First, set up an environment. If you are using conda, create a conda environment:
```
conda create --name torcharrow python=3.8
conda create --name torcharrow python=3.7
conda activate torcharrow
```

Expand All @@ -38,7 +38,7 @@ Coming soon!

### From Source

If you are installing from source, you will need Python 3.8 or later and a C++17 compiler.
If you are installing from source, you will need Python 3.7 or later and a C++17 compiler.

#### Get the TorchArrow Source
```bash
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,21 @@ def build_extension(self, ext):
license="BSD",
install_requires=[
"arrow",
"numpy",
"numpy==1.21.4",
"pandas",
"typing",
"tabulate",
"typing-inspect",
"pyarrow",
],
python_requires=">=3.8",
python_requires=">=3.7",
classifiers=[
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Operating System :: POSIX :: Linux",
"Programming Language :: C++",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: Implementation :: CPython",
Expand Down
22 changes: 14 additions & 8 deletions torcharrow/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ def np_typeof_dtype(t: DType): # -> np.dtype[]:
)


def typeof_np_ndarray(t: np.ndarray) -> ty.Union[DType, ty.Literal["object"]]:
def typeof_np_ndarray(t: np.ndarray) -> DType:
return typeof_np_dtype(t.dtype)


Expand Down Expand Up @@ -909,6 +909,12 @@ def get_underlying_dtype(dtype: DType) -> DType:
def get_nullable_dtype(dtype: DType) -> DType:
return replace(dtype, nullable=True)

# Based on https://github.com/pytorch/pytorch/blob/c48e6f014a0cca0adc18e1a39a8fd724fe7ab83a/torch/_jit_internal.py#L1113-L1118
def get_origin(target_type):
return getattr(target_type, "__origin__", None)

def get_args(target_type):
return getattr(target_type, "__args__", None)

def dtype_of_type(typ: ty.Union[ty.Type, DType]) -> DType:
assert typ is not None
Expand All @@ -930,19 +936,19 @@ def dtype_of_type(typ: ty.Union[ty.Type, DType]) -> DType:
return Struct(
[Field(f.name, dtype_of_type(f.type)) for f in dataclasses.fields(typ)]
)
if ty.get_origin(typ) in (List, list):
args = ty.get_args(typ)
if get_origin(typ) in (List, list):
args = get_args(typ)
assert len(args) == 1
elem_type = dtype_of_type(args[0])
return List(elem_type)
if ty.get_origin(typ) in (ty.Dict, dict):
args = ty.get_args(typ)
if get_origin(typ) in (ty.Dict, dict):
args = get_args(typ)
assert len(args) == 2
key = dtype_of_type(args[0])
value = dtype_of_type(args[1])
return Map(key, value)
if typing_inspect.is_optional_type(typ):
args = ty.get_args(typ)
args = get_args(typ)
assert len(args) == 2
if issubclass(args[1], type(None)):
contained = args[0]
Expand Down Expand Up @@ -974,8 +980,8 @@ def dtype_from_batch_pytype(typ: ty.Type) -> DType:
# TODO: we need a type annotation for Columns with statically accessible dtype
raise TypeError("Cannot infer dtype from IColumn")

if ty.get_origin(typ) in (List, list):
args = ty.get_args(typ)
if get_origin(typ) in (List, list):
args = get_args(typ)
assert len(args) == 1
return dtype_of_type(args[0])

Expand Down
1 change: 0 additions & 1 deletion torcharrow/icolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,6 @@ def map(
def transform(
self,
func: ty.Callable,
/,
dtype: ty.Optional[dt.DType] = None,
format: str = "column",
columns: ty.Optional[ty.List[str]] = None,
Expand Down
6 changes: 2 additions & 4 deletions torcharrow/idataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
Callable,
Iterable,
List,
Literal,
Mapping,
Optional,
Sequence,
Expand Down Expand Up @@ -36,7 +35,7 @@


def DataFrame(
data: Union[Iterable, dt.DType, Literal[None]] = None,
data: Optional[Union[Iterable, dt.DType]] = None,
dtype: Optional[dt.DType] = None,
columns: Optional[List[str]] = None,
device: Device = "",
Expand Down Expand Up @@ -151,7 +150,7 @@ def DataFrame(
# -----------------------------------------------------------------------------
# DataFrames aka (StructColumns, can be nested as StructColumns:-)

DataOrDTypeOrNone = Union[Mapping, Sequence, dt.DType, Literal[None]]
DataOrDTypeOrNone = Optional[Union[Mapping, Sequence, dt.DType]]


class IDataFrame(IColumn):
Expand Down Expand Up @@ -213,7 +212,6 @@ def copy(self):
def transform(
self,
func: Callable,
/,
dtype: Optional[dt.DType] = None,
format: str = "column",
columns: Optional[List[str]] = None,
Expand Down
2 changes: 1 addition & 1 deletion torcharrow/test/test_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get5(self, n=100):
return self.val + n

# kwargs
def get6(self, /, n=100, m=200):
def get6(self, n=100, m=200):
return self.val + n + m

@staticmethod
Expand Down
20 changes: 9 additions & 11 deletions torcharrow/velox_rt/dataframe_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
Dict,
Iterable,
List,
Literal,
Mapping,
Optional,
Sequence,
Expand Down Expand Up @@ -53,7 +52,7 @@
# -----------------------------------------------------------------------------
# DataFrames aka (StructColumns, can be nested as StructColumns:-)

DataOrDTypeOrNone = Union[Mapping, Sequence, dt.DType, Literal[None]]
DataOrDTypeOrNone = Optional[Union[Mapping, Sequence, dt.DType]]


class DataFrameCpu(ColumnFromVelox, IDataFrame):
Expand Down Expand Up @@ -339,8 +338,7 @@ def slice_columns(self, start, stop):
def map(
self,
arg: Union[Dict, Callable],
/,
na_action: Literal["ignore", None] = None,
na_action=None,
dtype: Optional[dt.DType] = None,
columns: Optional[List[str]] = None,
):
Expand Down Expand Up @@ -396,7 +394,7 @@ def func(*x):
def flatmap(
self,
arg: Union[Dict, Callable],
na_action: Literal["ignore", None] = None,
na_action=None,
dtype: Optional[dt.DType] = None,
columns: Optional[List[str]] = None,
):
Expand Down Expand Up @@ -503,7 +501,7 @@ def sort(
self,
by: Optional[List[str]] = None,
ascending=True,
na_position: Literal["last", "first"] = "last",
na_position="last",
):
"""Sort a column/a dataframe in ascending or descending order"""
# Not allowing None in comparison might be too harsh...
Expand Down Expand Up @@ -535,7 +533,7 @@ def _nlargest(
self,
n=5,
columns: Optional[List[str]] = None,
keep: Literal["last", "first"] = "first",
keep="first",
):
"""Returns a new dataframe of the *n* largest elements."""
# Todo add keep arg
Expand All @@ -547,7 +545,7 @@ def _nsmallest(
self,
n=5,
columns: Optional[List[str]] = None,
keep: Literal["last", "first"] = "first",
keep="first",
):
"""Returns a new dataframe of the *n* smallest elements."""
return self.sort(by=columns, ascending=True).head(n)
Expand Down Expand Up @@ -1267,7 +1265,7 @@ def isin(self, values: Union[list, dict, IColumn]):

@trace
@expression
def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict, Literal[None]]):
def fill_null(self, fill_value: Optional[Union[dt.ScalarTypes, Dict]]):
if fill_value is None:
return self
if isinstance(fill_value, IColumn._scalar_types):
Expand All @@ -1290,7 +1288,7 @@ def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict, Literal[None]]):

@trace
@expression
def drop_null(self, how: Literal["any", "all"] = "any"):
def drop_null(self, how="any"):
"""Return a dataframe with rows removed where the row has any or all nulls."""
self._prototype_support_warning("drop_null")

Expand All @@ -1312,7 +1310,7 @@ def drop_null(self, how: Literal["any", "all"] = "any"):
def drop_duplicates(
self,
subset: Optional[List[str]] = None,
keep: Literal["first", "last", False] = "first",
keep="first",
):
"""Remove duplicate values from data but keep the first, last, none (keep=False)"""
self._prototype_support_warning("drop_duplicates")
Expand Down
8 changes: 4 additions & 4 deletions torcharrow/velox_rt/numerical_column_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import math
import operator
import statistics
from typing import Dict, List, Literal, Optional, Union, Callable
from typing import Dict, List, Optional, Union, Callable

import numpy as np
import torcharrow as ta
Expand Down Expand Up @@ -130,7 +130,7 @@ def sort(
self,
columns: Optional[List[str]] = None,
ascending=True,
na_position: Literal["last", "first"] = "last",
na_position="last",
):
"""Sort a column/a dataframe in ascending or descending order"""
self._prototype_support_warning("sort")
Expand Down Expand Up @@ -164,7 +164,7 @@ def _nlargest(
self,
n=5,
columns: Optional[List[str]] = None,
keep: Literal["last", "first"] = "first",
keep="first",
):
"""Returns a new data of the *n* largest element."""
if columns is not None:
Expand Down Expand Up @@ -669,7 +669,7 @@ def fill_null(self, fill_value: Union[dt.ScalarTypes, Dict]):

@trace
@expression
def drop_null(self, how: Literal["any", "all"] = "any"):
def drop_null(self, how="any"):
"""Return a column with rows removed where a row has any or all nulls."""
self._prototype_support_warning("drop_null")

Expand Down

0 comments on commit 61d0f7a

Please sign in to comment.