Skip to content

Commit

Permalink
Improve support for SCAT dataset (#437)
Browse files Browse the repository at this point in the history
  • Loading branch information
niclaswue authored May 24, 2024
1 parent 493f722 commit 7fefd31
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 40 deletions.
151 changes: 111 additions & 40 deletions src/traffic/data/datasets/scat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd

from ...core import Flight, Traffic, tqdm
from ...data.basic.navaid import Navaids
from .mendeley import Mendeley


Expand Down Expand Up @@ -51,55 +52,113 @@ class SCAT:
traffic: Traffic
flight_plans: pd.DataFrame
clearances: pd.DataFrame
waypoints: Navaids
weather: pd.DataFrame

def parse_zipinfo(self, zf: ZipFile, file_info: ZipInfo) -> Entry:
with zf.open(file_info.filename, "r") as fh:
content_bytes = fh.read()
decoded = json.loads(content_bytes.decode())
flight_id = str(decoded["id"]) # noqa: F841

flight_plan = (
pd.json_normalize(decoded["fpl"]["fpl_plan_update"])
.rename(columns=rename_columns)
.eval(
"""
timestamp = @pd.to_datetime(timestamp, utc=True, format="mixed")
flight_id = @flight_id
decoded = json.loads(content_bytes.decode())
flight_id = str(decoded["id"]) # noqa: F841

flight_plan = (
pd.json_normalize(decoded["fpl"]["fpl_plan_update"])
.rename(columns=rename_columns)
.eval(
"""
)
timestamp = @pd.to_datetime(timestamp, utc=True, format="mixed")
flight_id = @flight_id
""",
engine="python",
)
)

clearance = (
pd.json_normalize(decoded["fpl"]["fpl_clearance"])
.rename(columns=rename_columns)
.eval(
"""
timestamp = @pd.to_datetime(timestamp, utc=True, format="mixed")
flight_id = @flight_id
clearance = (
pd.json_normalize(decoded["fpl"]["fpl_clearance"])
.rename(columns=rename_columns)
.eval(
"""
)
timestamp = @pd.to_datetime(timestamp, utc=True, format="mixed")
flight_id = @flight_id
""",
engine="python",
)
)

fpl_base, *_ = decoded["fpl"]["fpl_base"]
df = (
pd.json_normalize(decoded["plots"])
.rename(columns=rename_columns)
.eval(
"""
timestamp = @pd.to_datetime(time_of_track, utc=True, format="mixed")
altitude = 100 * flight_level
origin = @fpl_base['adep']
destination = @fpl_base['ades']
typecode = @fpl_base['aircraft_type']
callsign = @fpl_base['callsign']
flight_id = @flight_id
icao24 = "000000"
"""
)
fpl_base, *_ = decoded["fpl"]["fpl_base"]
df = (
pd.json_normalize(decoded["plots"])
.rename(columns=rename_columns)
.eval(
"""
timestamp = @pd.to_datetime(time_of_track, utc=True, format="mixed")
altitude = 100 * flight_level
origin = @fpl_base['adep']
destination = @fpl_base['ades']
typecode = @fpl_base['aircraft_type']
callsign = @fpl_base['callsign']
flight_id = @flight_id
icao24 = "000000"
""",
engine="python",
)
)
return Entry(Flight(df), flight_plan, clearance)

def parse_waypoints(self, zf: ZipFile, file_info: ZipInfo) -> Navaids:
rename_columns = {
"lat": "latitude",
"lon": "longitude",
}
with zf.open(file_info.filename, "r") as fh:
content_bytes = fh.read()
centers = json.loads(content_bytes.decode())

fixes = []
for center in centers:
points = pd.json_normalize(center["points"])
points["type"] = "FIX"
points["altitude"] = None
points["frequency"] = None
points["magnetic_variation"] = None
points["description"] = f"Center: {center['name']}"
fixes.append(points.rename(columns=rename_columns))
df = pd.concat(fixes).drop_duplicates(ignore_index=True)
waypoints = Navaids(data=df)
waypoints.priority = -1 # prefer over default navaids
return waypoints

def parse_weather(self, zf: ZipFile, file_info: ZipInfo) -> pd.DataFrame:
rename_columns = {
"alt": "altitude",
"lat": "latitude",
"lon": "longitude",
"temp": "temperature",
"time": "timestamp",
"wind_dir": "wind_direction",
"wind_spd": "wind_speed",
}
with zf.open(file_info.filename, "r") as fh:
content_bytes = fh.read()
decoded = json.loads(content_bytes.decode())
return (
pd.json_normalize(decoded)
.rename(columns=rename_columns)
.eval(
"""
timestamp = @pd.to_datetime(timestamp, utc=True, format="mixed")
""",
engine="python",
)
return Entry(Flight(df), flight_plan, clearance)
)

def __init__(self, ident: str, nflights: None | int = None) -> None:
def __init__(
self,
ident: str,
nflights: None | int = None,
include_waypoints: bool = False,
include_weather: bool = False,
) -> None:
mendeley = Mendeley("8yn985bwz5")
filename = mendeley.get_data(ident)

Expand All @@ -108,14 +167,26 @@ def __init__(self, ident: str, nflights: None | int = None) -> None:
flight_plans = []

with ZipFile(filename, "r") as zf:
info_list = zf.infolist()
if nflights is not None:
info_list = info_list[:nflights]
all_files = zf.infolist()
total_flights = len(all_files) - 2
nflights = (
min(nflights, total_flights)
if nflights is not None
else total_flights
)
info_list = all_files[:nflights]
if include_waypoints:
info_list.append(all_files[-2])
if include_weather:
info_list.append(all_files[-1])

for file_info in tqdm(info_list):
if "airspace" in file_info.filename:
self.waypoints = self.parse_waypoints(zf, file_info)
continue

if "grib_meteo" in file_info.filename:
self.weather = self.parse_weather(zf, file_info)
continue

entry = self.parse_zipinfo(zf, file_info)
Expand Down
52 changes: 52 additions & 0 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,60 @@
from traffic.data.datasets.scat import SCAT

import pandas as pd


def test_scat() -> None:
s = SCAT("scat20161015_20161021.zip", nflights=10)
assert len(s.traffic) == 10
assert s.flight_plans.flight_id.nunique() == 10
assert s.clearances.flight_id.nunique() == 10


def test_scat_waypoints() -> None:
s = SCAT("scat20161015_20161021.zip", nflights=10, include_waypoints=True)
assert isinstance(s.waypoints, pd.DataFrame)
assert len(s.waypoints) == 15871
assert set(s.waypoints.columns) == {
"latitude",
"longitude",
"name",
"center",
}
aa212 = s.waypoints[s.waypoints["name"] == "AA212"]
assert len(aa212) == 1
assert aa212["latitude"].item() == 58.4902778
assert aa212["longitude"].item() == 14.4866667
assert aa212["center"].item() == "ESMM"

# KERAX is present for both centers
kerax = s.waypoints[s.waypoints["name"] == "KERAX"]
assert set(kerax["center"].values) == {"ESMM", "ESOS"}
assert kerax.iloc[0]["latitude"] == kerax.iloc[1]["latitude"] == 50.475
assert kerax.iloc[0]["longitude"] == kerax.iloc[1]["longitude"] == 9.5819444


def test_scat_weather() -> None:
s = SCAT("scat20161015_20161021.zip", nflights=10, include_weather=True)
assert isinstance(s.weather, pd.DataFrame)
assert not s.weather.isna().any().max()
assert len(s.weather) == 1519310
assert set(s.weather.columns) == {
"altitude",
"latitude",
"longitude",
"temperature",
"timestamp",
"wind_direction",
"wind_speed",
}
assert isinstance(s.weather["timestamp"].dtype, pd.DatetimeTZDtype)

# compare measurement for a specific timestamp
ts = pd.to_datetime("2016-10-14 10:30:00+00:00") # noqa: F841
measurement = s.weather.query(
"timestamp == @ts & altitude == 50 & latitude == 42.5 & longitude == 60"
)
assert len(measurement) == 1
assert measurement["temperature"].item() == 4
assert measurement["wind_direction"].item() == 166
assert measurement["wind_speed"].item() == 16

0 comments on commit 7fefd31

Please sign in to comment.