Skip to content

Commit

Permalink
from_file and indexation do not return None anymore
Browse files Browse the repository at this point in the history
  • Loading branch information
xoolive committed Oct 31, 2024
1 parent 0bee890 commit d9db024
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 137 deletions.
45 changes: 13 additions & 32 deletions src/traffic/core/flight.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@
Optional,
Set,
Tuple,
Type,
TypedDict,
TypeVar,
Union,
cast,
overload,
Expand All @@ -35,6 +33,7 @@
from impunity import impunity
from pitot import geodesy as geo
from rich.console import Console, ConsoleOptions, RenderResult
from typing_extensions import Self

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -84,30 +83,15 @@ class Entry(TypedDict, total=False):
name: str


T = TypeVar("T", bound="Flight")
def _tz_interpolate(
data: DatetimeTZBlock, *args: Any, **kwargs: Any
) -> DatetimeTZBlock:
coerced = data.coerce_to_target_dtype("int64")
interpolated, *_ = coerced.interpolate(*args, **kwargs)
return interpolated

if str(pd.__version__) < "1.3":

def _tz_interpolate(
data: DatetimeTZBlock, *args: Any, **kwargs: Any
) -> DatetimeTZBlock:
return data.astype(int).interpolate(*args, **kwargs).astype(data.dtype)

DatetimeTZBlock.interpolate = _tz_interpolate

else:
# - with version 1.3.0, interpolate returns a list
# - Windows require "int64" as "int" may be interpreted as "int32" and raise
# an error (was not raised before 1.3.0)

def _tz_interpolate(
data: DatetimeTZBlock, *args: Any, **kwargs: Any
) -> DatetimeTZBlock:
coerced = data.coerce_to_target_dtype("int64")
interpolated, *_ = coerced.interpolate(*args, **kwargs)
return interpolated

DatetimeTZBlock.interpolate = _tz_interpolate
DatetimeTZBlock.interpolate = _tz_interpolate


def _split(
Expand Down Expand Up @@ -813,7 +797,7 @@ def final(
segment = None
for segment in fun(self):
continue
return segment # type: ignore
return segment

# --- Iterators ---

Expand Down Expand Up @@ -1042,6 +1026,9 @@ def _get_unique(
if field not in self.data.columns:
return None
tmp = self.data[field].unique()
tmp = list(elt for elt in tmp if elt == elt)
if len(tmp) == 0:
return None
if len(tmp) == 1:
return tmp[0] # type: ignore
if warn:
Expand All @@ -1057,8 +1044,6 @@ def callsign(self) -> Union[str, Set[str], None]:
with a route for a commercial aircraft.
"""
callsign = self._get_unique("callsign")
if callsign != callsign:
raise ValueError("NaN appearing in callsign field")
return callsign

@property
Expand Down Expand Up @@ -3100,9 +3085,7 @@ def from_fr24(cls, filename: Union[Path, str]) -> Flight:
return FlightRadar24.from_file(filename)

@classmethod
def from_file(
cls: Type[T], filename: Union[Path, str], **kwargs: Any
) -> Optional[T]:
def from_file(cls, filename: Union[Path, str], **kwargs: Any) -> Self:
"""Read data from various formats.
This class method dispatches the loading of data in various format to
Expand All @@ -3126,8 +3109,6 @@ def from_file(
"""

tentative = super().from_file(filename, **kwargs)
if tentative is None:
return None

# Special treatment for flights to download from flightradar24
cols_fr24 = {
Expand Down
91 changes: 41 additions & 50 deletions src/traffic/core/mixins.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# ruff: noqa: E501
from __future__ import annotations

import gzip
import json
import logging
import re
from functools import lru_cache
Expand All @@ -14,14 +12,14 @@
ClassVar,
Mapping,
Sequence,
Type,
TypedDict,
TypeVar,
)

from py7zr import SevenZipFile
from rich.box import SIMPLE_HEAVY
from rich.console import Console, ConsoleOptions, RenderResult
from rich.table import Table
from typing_extensions import Self

import numpy as np
import pandas as pd
Expand All @@ -40,8 +38,8 @@
from matplotlib.artist import Artist


T = TypeVar("T", bound="DataFrameMixin")
G = TypeVar("G", bound="GeoDBMixin")
# T = TypeVar("T", bound="DataFrameMixin")
# G = TypeVar("G", bound="GeoDBMixin")


_log = logging.getLogger(__name__)
Expand Down Expand Up @@ -74,9 +72,7 @@ def __sizeof__(self) -> int:
return int(self.data.memory_usage().sum())

@classmethod
def from_file(
cls: Type[T], filename: str | Path, **kwargs: Any
) -> None | T:
def from_file(cls, filename: str | Path, **kwargs: Any) -> Self:
"""Read data from various formats.
This class method dispatches the loading of data in various format to
Expand All @@ -101,6 +97,16 @@ def from_file(
>>> t = Traffic.from_file(filename)
"""
path = Path(filename)

if path.suffix == (".7z"):
with SevenZipFile(path) as archive:
if (files := archive.readall()) is None:
raise FileNotFoundError(f"Empty archive {path}")
for name, io in files.items():
if name.endswith(".jsonl"):
return cls(pd.read_json(io, lines=True, **kwargs))
raise FileNotFoundError(f"Empty archive {path}")

if ".pkl" in path.suffixes or ".pickle" in path.suffixes:
return cls(pd.read_pickle(path, **kwargs))
if ".parquet" in path.suffixes:
Expand All @@ -109,29 +115,14 @@ def from_file(
return cls(pd.read_feather(path, **kwargs))
if ".json" in path.suffixes:
return cls(pd.read_json(path, **kwargs))
if path.suffix == ".jsonl":
df = pd.json_normalize(
json.loads(elt) for elt in path.read_text().split("\n")[:-1]
)
df = df.assign(
timestamp=pd.to_datetime(df.timestamp, unit="s", utc=True)
)
return cls(df)
if ".jsonl" in path.suffixes and ".gz" in path.suffixes:
with gzip.open(path) as fh:
df = pd.json_normalize(
json.loads(elt) for elt in fh.readlines()
)
df = df.assign(
timestamp=pd.to_datetime(df.timestamp, unit="s", utc=True)
)
return cls(df)

if ".jsonl" in path.suffixes:
return cls(pd.read_json(path, lines=True, **kwargs))
if ".csv" in path.suffixes:
return cls(pd.read_csv(path, **kwargs))
if ".h5" == path.suffixes[-1]: # coverage: ignore
return cls(pd.read_hdf(path, **kwargs))
return None

raise FileNotFoundError(path)

# --- Special methods ---

Expand Down Expand Up @@ -189,39 +180,39 @@ def __rich_console__(

# --- Redirected to pandas.DataFrame ---

def assign(self: T, *args: Any, **kwargs: Any) -> T:
def assign(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.assign` method to the
underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.assign(*args, **kwargs))

def convert_dtypes(self: T, *args: Any, **kwargs: Any) -> T:
def convert_dtypes(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.convert_dtypes` method to
the underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.convert_dtypes(*args, **kwargs))

def drop(self: T, *args: Any, **kwargs: Any) -> T:
def drop(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.drop` method to the
underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.drop(*args, **kwargs))

def drop_duplicates(self: T, *args: Any, **kwargs: Any) -> T:
def drop_duplicates(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.drop_duplicates` method to
the underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.drop_duplicates(*args, **kwargs))

def fillna(self: T, *args: Any, **kwargs: Any) -> T:
def fillna(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.fillna` method to the
underlying pandas DataFrame and get the result back in the same
Expand All @@ -238,15 +229,15 @@ def groupby(
"""
return self.data.groupby(*args, **kwargs)

def merge(self: T, *args: Any, **kwargs: Any) -> T:
def merge(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.merge` method to the
underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.merge(*args, **kwargs))

def query(self: T, query_str: str, *args: Any, **kwargs: Any) -> None | T:
def query(self, query_str: str, *args: Any, **kwargs: Any) -> None | Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.query` method to the
underlying pandas DataFrame and get the result back in the same
Expand All @@ -257,31 +248,31 @@ def query(self: T, query_str: str, *args: Any, **kwargs: Any) -> None | T:
return None
return self.__class__(df)

def rename(self: T, *args: Any, **kwargs: Any) -> T:
def rename(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.rename` method to the
underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.rename(*args, **kwargs))

def replace(self: T, *args: Any, **kwargs: Any) -> T:
def replace(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.replace` method to the
underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.replace(*args, **kwargs))

def reset_index(self: T, *args: Any, **kwargs: Any) -> T:
def reset_index(self, *args: Any, **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.reset_index` method to the
underlying pandas DataFrame and get the result back in the same
structure.
"""
return self.__class__(self.data.reset_index(*args, **kwargs))

def sort_values(self: T, by: str | Sequence[str], **kwargs: Any) -> T:
def sort_values(self, by: str | Sequence[str], **kwargs: Any) -> Self:
"""
Applies the Pandas :meth:`~pandas.DataFrame.sort_values` method to the
underlying pandas DataFrame and get the result back in the same
Expand Down Expand Up @@ -516,7 +507,7 @@ class GeographyMixin(DataFrameMixin):

__slots__ = ()

def projection(self: T, proj: str = "lcc") -> pyproj.Proj:
def projection(self, proj: str = "lcc") -> pyproj.Proj:
return pyproj.Proj(
proj=proj,
ellps="WGS84",
Expand All @@ -527,8 +518,8 @@ def projection(self: T, proj: str = "lcc") -> pyproj.Proj:
)

def compute_xy(
self: T, projection: None | pyproj.Proj | "crs.Projection" = None
) -> T:
self, projection: None | pyproj.Proj | "crs.Projection" = None
) -> Self:
"""Enrich the structure with new x and y columns computed through a
projection of the latitude and longitude columns.
Expand All @@ -547,7 +538,7 @@ def compute_xy(
projection = pyproj.Proj(projection.proj4_init)

if projection is None:
projection = self.projection(proj="lcc") # type: ignore
projection = self.projection(proj="lcc")

transformer = pyproj.Transformer.from_proj(
pyproj.Proj("epsg:4326"), projection, always_xy=True
Expand All @@ -560,8 +551,8 @@ def compute_xy(
return self.__class__(self.data.assign(x=x, y=y))

def compute_latlon_from_xy(
self: T, projection: pyproj.Proj | crs.Projection
) -> T:
self, projection: pyproj.Proj | crs.Projection
) -> Self:
"""Enrich a DataFrame with new longitude and latitude columns computed
from x and y columns.
Expand Down Expand Up @@ -675,14 +666,14 @@ def geoencode(self, **kwargs: Any) -> "alt.Chart": # coverage: ignore
)

def interpolate_grib(
self: T, wind: "xarray.Dataset", features: list[str] = ["u", "v"]
) -> T:
self, wind: "xarray.Dataset", features: list[str] = ["u", "v"]
) -> Self:
from openap import aero
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

projection: pyproj.Proj = self.projection("lcc") # type: ignore
projection: pyproj.Proj = self.projection("lcc")
transformer = pyproj.Transformer.from_proj(
pyproj.Proj("epsg:4326"), projection, always_xy=True
)
Expand Down Expand Up @@ -778,10 +769,10 @@ class GeoDBMixin(DataFrameMixin):
__slots__ = ()

def extent(
self: G,
self,
extent: str | ShapelyMixin | tuple[float, float, float, float],
buffer: float = 0.5,
) -> None | G:
) -> None | Self:
"""
Selects the subset of data inside the given extent.
Expand Down
Loading

0 comments on commit d9db024

Please sign in to comment.