Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix pyarrow issues #468

Merged
merged 4 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/traffic/algorithms/openap.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def phases(self, twindow: int = 60) -> "Flight":

fp = FlightPhase()
fp.set_trajectory(
(self.data.timestamp.values - np.datetime64("1970-01-01"))
/ np.timedelta64(1, "s"),
(self.data.timestamp.astype(int) // 1_000_000_000).values,
altitude,
groundspeed,
vertical_rate,
Expand Down
25 changes: 13 additions & 12 deletions src/traffic/core/flight.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,17 @@ def _split(
# This method helps splitting a flight into several.
if data.shape[0] < 2:
return
diff = data.timestamp.diff().values
diff = data.timestamp.diff()
if unit is None:
delta = pd.Timedelta(value).to_timedelta64()
else:
delta = np.timedelta64(value, unit)
# There seems to be a change with numpy >= 1.18
# max() now may return NaN, therefore the following fix
max_ = np.nanmax(diff)
max_ = diff.max()
if max_ > delta:
# np.nanargmax seems bugged with timestamps
argmax = np.where(diff == max_)[0][0]
argmax = diff.argmax()
yield from _split(data.iloc[:argmax], value, unit)
yield from _split(data.iloc[argmax:], value, unit)
else:
Expand Down Expand Up @@ -1490,7 +1490,7 @@ def split(

- in the NumPy style: ``Flight.split(10, 'm')`` (see
``np.timedelta64``);
- in the pandas style: ``Flight.split('10T')`` (see ``pd.Timedelta``)
- in the pandas style: ``Flight.split('10 min')`` (see ``pd.Timedelta``)

If the `condition` parameter is set, the flight is split between two
segments only if `condition(f1, f2)` is verified.
Expand Down Expand Up @@ -2720,6 +2720,7 @@ def query_ehs(
self,
data: Union[None, pd.DataFrame, "RawData"] = None,
failure_mode: str = "info",
**kwargs: Any,
) -> Flight:
"""Extends data with extra columns from EHS messages.

Expand Down Expand Up @@ -2779,7 +2780,9 @@ def fail_silent() -> Flight:
failure = failure_dict[failure_mode]

if data is None:
ext = opensky.extended(self.start, self.stop, icao24=self.icao24)
ext = opensky.extended(
self.start, self.stop, icao24=self.icao24, **kwargs
)
df = ext.data if ext is not None else None
else:
df = data if isinstance(data, pd.DataFrame) else data.data
Expand All @@ -2793,19 +2796,17 @@ def fail_silent() -> Flight:

timestamped_df = (
df.sort_values("mintime")
.assign(
timestamp=lambda df: pd.to_datetime(
df.mintime, unit="s", utc=True
)
)
.assign(timestamp=lambda df: df.mintime)
# TODO shouldn't be necessary after pyopensky 2.10
.convert_dtypes(dtype_backend="pyarrow")
)

referenced_df = (
timestamped_df.merge(
# TODO shouldn't be necessary after pyopensky 2.10
self.data.convert_dtypes(dtype_backend="pyarrow"),
self.data.convert_dtypes(dtype_backend="pyarrow").assign(
timestamp=lambda df: df.timestamp.astype("int64") * 1e-9
),
on="timestamp",
how="outer",
)
Expand All @@ -2829,7 +2830,7 @@ def fail_silent() -> Flight:

decoded = rs1090.decode(
referenced_df.rawmsg,
referenced_df.timestamp.astype("int64") * 1e-9,
referenced_df.timestamp.astype("int64"),
)

if len(decoded) == 0:
Expand Down
8 changes: 4 additions & 4 deletions src/traffic/core/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
_log = logging.getLogger(__name__)


def to_datetime(time: timelike) -> datetime:
def to_datetime(time: timelike) -> pd.Timestamp:
"""Facility to convert anything to a datetime.

This function will soon be replaced by pd.to_datetime.
Expand All @@ -41,10 +41,10 @@ def to_datetime(time: timelike) -> datetime:

if isinstance(time, str):
time = pd.Timestamp(time, tz="utc")
if isinstance(time, pd.Timestamp):
time = time.to_pydatetime()
if isinstance(time, datetime):
time = pd.Timestamp(time)
if isinstance(time, Real):
time = datetime.fromtimestamp(float(time), timezone.utc)
time = pd.Timestamp(float(time), tz="utc", unit="s")
if time.tzinfo is None: # coverage: ignore
_log.warning(
"This timestamp is tz-naive. Things may not work as expected. "
Expand Down
5 changes: 0 additions & 5 deletions src/traffic/data/adsb/opensky.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,6 @@ def format_history(
if column_name in df.columns:
df[column_name] = df[column_name].astype(float)

if "onground" in df.columns and df.onground.dtype != bool:
df.onground = df.onground == "true"
df.alert = df.alert == "true"
df.spi = df.spi == "true"

# better (to me) formalism about columns
df = df.rename(
columns={
Expand Down