Skip to content

Commit

Permalink
Python 3.11 support (#26)
Browse files Browse the repository at this point in the history
* py311 support

* update py 3.10 version
  • Loading branch information
n3011 authored Mar 22, 2024
1 parent 1a0f6c8 commit 910e4d3
Show file tree
Hide file tree
Showing 27 changed files with 132 additions and 142 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: datum_py38
name: datum_py310

on: [push]

Expand All @@ -9,10 +9,10 @@ jobs:

steps:
- uses: actions/checkout@v1
- name: Set up Python 3.8
- name: Set up Python 3.10
uses: actions/setup-python@v1
with:
python-version: 3.8
python-version: 3.10.13
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: datum_py37
name: datum_py311

on: [push]

Expand All @@ -9,10 +9,10 @@ jobs:

steps:
- uses: actions/checkout@v1
- name: Set up Python 3.7
- name: Set up Python 3.11
uses: actions/setup-python@v1
with:
python-version: 3.7
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
1 change: 0 additions & 1 deletion configs/image_clf_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
1 change: 0 additions & 1 deletion configs/image_det_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
1 change: 0 additions & 1 deletion configs/image_seg_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
1 change: 0 additions & 1 deletion configs/text_json_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
31 changes: 14 additions & 17 deletions datum/cache/bucket.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
# Copyright 2020 The TensorFlow Datasets Authors.
#
Expand All @@ -21,9 +20,9 @@
import os
import struct
import uuid
from typing import Generator, List, Optional, Tuple
from collections.abc import Generator
from typing import Optional

import six
import tensorflow.compat.v2 as tf

from datum.utils.hashing import Hasher
Expand Down Expand Up @@ -51,7 +50,7 @@
class DuplicatedKeysError(Exception):

def __init__(self, item1: Optional[bytes] = None, item2: Optional[bytes] = None):
super(DuplicatedKeysError, self).__init__()
super().__init__()
self.item1 = item1
self.item2 = item2

Expand All @@ -75,7 +74,7 @@ def get_bucket_number(hkey: int, shards_number: int) -> int:
return math.trunc((hkey * shards_number) >> HKEY_SIZE)


class _Bucket(object):
class _Bucket:
"""Holds (key, binary value) tuples to disk, fast.
Bucket instances are designed to be used either:
Expand Down Expand Up @@ -142,7 +141,7 @@ def flush(self) -> None:
self._fobj.flush()
self._fobj.close()

def read_values(self) -> Generator[Tuple[int, bytes], None, None]:
def read_values(self) -> Generator[tuple[int, bytes], None, None]:
"""Yields (hkey, data) tuples stored in bucket."""
self.flush()
path = self._path
Expand All @@ -167,7 +166,7 @@ def del_file(self) -> None:
tf.io.gfile.remove(self._path)


class Shuffler(object):
class Shuffler:
"""Stores data in temp buckets, restitute it shuffled."""

def __init__(self, dirpath: str, hash_salt: str):
Expand All @@ -179,23 +178,23 @@ def __init__(self, dirpath: str, hash_salt: str):
"""
grp_name = uuid.uuid4()
self._hasher = Hasher(hash_salt)
self._buckets: List[_Bucket] = []
self._buckets: list[_Bucket] = []
for i in range(BUCKETS_NUMBER):
path = os.path.join(dirpath, 'bucket_%s_%03d.tmp' % (grp_name, i))
self._buckets.append(_Bucket(path))
self._read_only = False
self._total_bytes = 0
# To keep data in memory until enough data has been gathered.
self._in_memory = True
self._mem_buffer: List[Tuple[int, bytes]] = []
self._mem_buffer: list[tuple[int, bytes]] = []

@property
def size(self) -> int:
"""Return total size in bytes of records (not keys)."""
return self._total_bytes

@property
def bucket_lengths(self) -> List[int]:
def bucket_lengths(self) -> list[int]:
if self._in_memory:
return [len(self._mem_buffer)]
return [len(b) for b in self._buckets]
Expand All @@ -216,7 +215,7 @@ def add(self, key: int, data: bytes) -> None:
"""Add (key, data) to shuffler."""
if self._read_only:
raise AssertionError('add() cannot be called after __iter__.')
if not isinstance(data, six.binary_type):
if not isinstance(data, bytes):
raise AssertionError('Only bytes (not %s) can be stored in Shuffler!' % (type(data)))
hkey = self._hasher.hash_key(key)
self._total_bytes += len(data)
Expand All @@ -237,13 +236,11 @@ def __iter__(self) -> Generator[bytes, None, None]:
yield data
previous_data = data

def _iter_mem(self) -> Generator[Tuple[int, bytes], None, None]:
for hkey, data in sorted(self._mem_buffer):
yield hkey, data
def _iter_mem(self) -> Generator[tuple[int, bytes], None, None]:
yield from sorted(self._mem_buffer)

def _iter_buckets(self) -> Generator[Tuple[int, bytes], None, None]:
def _iter_buckets(self) -> Generator[tuple[int, bytes], None, None]:
for bucket in self._buckets:
bucket_data = sorted(bucket.read_values())
bucket.del_file()
for hkey, data in bucket_data:
yield hkey, data
yield from bucket_data
7 changes: 4 additions & 3 deletions datum/configs/config_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Any, Callable


class ConfigBase(object):
class ConfigBase:
"""Base class for representing a set of tf.data config.
Attributes:
Expand Down Expand Up @@ -48,7 +48,7 @@ def __setattr__(self, name: str, value: Any) -> None:
if hasattr(self, name):
object.__setattr__(self, name, value)
else:
raise AttributeError("Cannot set the property %s on %s." % (name, type(self).__name__))
raise AttributeError("Cannot set the property {} on {}.".format(name, type(self).__name__))

def merge(self, configs: ConfigBase) -> ConfigBase:
return merge_configs(self, configs)
Expand Down Expand Up @@ -89,6 +89,7 @@ def set_fn(config: ConfigBase, value: Any) -> None:

def merge_configs(*configs_list: ConfigBase) -> ConfigBase:
"""Merges the given configs, returning the result as a new configs object.
The input arguments are expected to have a matching type that derives from
`ConfigBase` (and thus each represent a set of configs). The method outputs
an object of the same type created by merging the sets of configs represented
Expand All @@ -112,7 +113,7 @@ def merge_configs(*configs_list: ConfigBase) -> ConfigBase:

for configs in configs_list:
if not isinstance(configs, result_type):
raise TypeError("Incompatible configs type: %r vs %r" % (type(configs), result_type))
raise TypeError("Incompatible configs type: {!r} vs {!r}".format(type(configs), result_type))

if not isinstance(configs_list[0], ConfigBase):
raise TypeError("The inputs should inherit from `ConfigBase`")
Expand Down
3 changes: 1 addition & 2 deletions datum/configs/tfr_configs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -13,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import Callable
from collections.abc import Callable

import tensorflow as tf

Expand Down
4 changes: 2 additions & 2 deletions datum/encoder/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

import abc
from typing import Any, Dict
from typing import Any

import tensorflow as tf

Expand Down Expand Up @@ -126,7 +126,7 @@ def encode(self, inputs: ValueType) -> ValueType:
return inputs


def datum_name_to_encoder(datum: Dict, problem_type: str) -> Dict[str, Encoder]:
def datum_name_to_encoder(datum: dict, problem_type: str) -> dict[str, Encoder]:
"""Automatically identify encoder based on data values and problem type.
Args:
Expand Down
14 changes: 7 additions & 7 deletions datum/encoder/tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -20,10 +19,11 @@
from datum.utils.common_utils import add_metaclass

# This set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = set(
chr(i) for i in range(sys.maxunicode)
if (unicodedata.category(chr(i)).startswith("L") or unicodedata.category(chr(i)).startswith("N")
))
_ALPHANUMERIC_CHAR_SET = {
chr(i)
for i in range(sys.maxunicode)
if (unicodedata.category(chr(i)).startswith("L") or unicodedata.category(chr(i)).startswith("N"))
}


@add_metaclass(abc.ABCMeta)
Expand Down Expand Up @@ -57,7 +57,7 @@ def encode(self, text):
for pos in range(1, len(text)):
if is_alnum[pos] != is_alnum[pos - 1]:
token = text[token_start:pos]
if token != u" " or token_start == 0:
if token != " " or token_start == 0:
ret.append(token)
token_start = pos
final_token = text[token_start:]
Expand All @@ -76,6 +76,6 @@ def decode(self, tokens):
ret = []
for i, token in enumerate(tokens):
if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]:
ret.append(u" ")
ret.append(" ")
ret.append(token)
return "".join(ret)
33 changes: 16 additions & 17 deletions datum/generator/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import os
import xml.etree.ElementTree
from ast import literal_eval
from typing import Any, Dict, List, Tuple, no_type_check
from typing import Any, no_type_check

import tensorflow as tf

Expand Down Expand Up @@ -88,7 +88,7 @@ def generate_datum(self, **kwargs: Any) -> GeneratorReturnType:
sub_dir = kwargs.get('image_dir', split)
csv_path = kwargs.get('csv_path', split + '.csv')
data_path = os.path.join(self.path, sub_dir)
data: List[Dict] = []
data: list[dict] = []
with tf.io.gfile.GFile(os.path.join(self.path, csv_path)) as csv_f:
reader = csv.DictReader(csv_f)
for row in reader:
Expand All @@ -100,8 +100,7 @@ def generate_datum(self, **kwargs: Any) -> GeneratorReturnType:
feature_value = os.path.join(data_path, feature_value + extension)
feature_dict['image'] = feature_value
data.append(feature_dict)
for idx, datum in enumerate(data):
yield idx, datum
yield from enumerate(data)


class DetDatumGenerator(DatumGenerator):
Expand Down Expand Up @@ -171,7 +170,7 @@ def generate_datum(self, **kwargs: Any) -> GeneratorReturnType:
yield image_id, example

def _generate_example(self, data_path: str, image_dir: str, annon_dir: str, image_id: str,
extension: str, load_annotations: bool) -> Dict:
extension: str, load_annotations: bool) -> dict:
"""Generate a single example of the dataset.
Args:
Expand Down Expand Up @@ -214,32 +213,32 @@ def _generate_example(self, data_path: str, image_dir: str, annon_dir: str, imag
}

@no_type_check
def _get_example_objects(self, annon_filepath: str) -> Tuple:
def _get_example_objects(self, annon_filepath: str) -> tuple:
"""Function to get all the objects from the annotation XML file."""
with tf.io.gfile.GFile(annon_filepath, "r") as f:
root = xml.etree.ElementTree.parse(f).getroot()
size = root.find("size")
width = float(size.find("width").text)
height = float(size.find("height").text)

xmin: List[float] = []
xmax: List[float] = []
ymin: List[float] = []
ymax: List[float] = []
area: List[float] = []
label: List[int] = []
pose: List[str] = []
is_truncated: List[bool] = []
is_difficult: List[bool] = []
xmin: list[float] = []
xmax: list[float] = []
ymin: list[float] = []
ymax: list[float] = []
area: list[float] = []
label: list[int] = []
pose: list[str] = []
is_truncated: list[bool] = []
is_difficult: list[bool] = []
for obj in root.findall("object"):
class_id = obj.find("name").text.lower()
if isinstance(class_id, str):
label.append(self.gen_config.class_map[class_id])
else:
label.append(class_id)
pose.append(obj.find("pose").text.lower())
is_truncated.append((obj.find("truncated").text == "1"))
is_difficult.append((obj.find("difficult").text == "1"))
is_truncated.append(obj.find("truncated").text == "1")
is_difficult.append(obj.find("difficult").text == "1")
bndbox = obj.find("bndbox")
xmax.append(float(bndbox.find("xmax").text) / width)
xmin.append(float(bndbox.find("xmin").text) / width)
Expand Down
1 change: 0 additions & 1 deletion datum/generator/text.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# coding=utf-8
# Copyright 2020 The OpenAGI Datum Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
10 changes: 5 additions & 5 deletions datum/reader/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import json
import os
from typing import Callable, Dict, List, Optional
from typing import Callable, Optional

import tensorflow as tf
from absl import logging
Expand Down Expand Up @@ -60,8 +60,8 @@ def _read(self,
deterministic: bool = False,
use_tf_padding: bool = False,
use_datum_padding: bool = False,
pre_batching_callback: Optional[Callable[[Dict], Dict]] = None,
post_batching_callback: Optional[Callable[[Dict], Dict]] = None) -> DatasetType:
pre_batching_callback: Optional[Callable[[dict], dict]] = None,
post_batching_callback: Optional[Callable[[dict], dict]] = None) -> DatasetType:
"""Read and process data from tfrecord files.
Args:
Expand Down Expand Up @@ -142,9 +142,9 @@ def _read(self,
return dataset

@memoized_property
def padded_shapes(self) -> Dict[str, List]:
def padded_shapes(self) -> dict[str, list]:
"""Returns padded shapes from dataset metadata."""
with open(os.path.join(self._path, 'datum_to_type_and_shape_mapping.json'), 'r') as json_f:
with open(os.path.join(self._path, 'datum_to_type_and_shape_mapping.json')) as json_f:
mapping = json.load(json_f)
padded_shapes = {}
for key, value in mapping.items():
Expand Down
Loading

0 comments on commit 910e4d3

Please sign in to comment.