Skip to content

Commit

Permalink
feat: add municipality info (#258)
Browse files Browse the repository at this point in the history
* feat: addition municipalities info to households and activities

* upadate tests & improve municipalities for house

* first try correction test & changelog

* fix: test gpkg hashes

---------

Co-authored-by: Marie Laurent <[email protected]>
  • Loading branch information
MarieMcLaurent and Marie Laurent authored Sep 23, 2024
1 parent 550f343 commit e82ae98
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

**Under development**

- feat: add municipality information to households and activities
- chore: update to `eqasim-java` commit `ece4932`
- feat: vehicles and vehicle types are now always generated
- feat: read vehicles data from zip files
Expand Down
69 changes: 40 additions & 29 deletions synthesis/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,6 @@ def execute(context):
output_prefix = context.config("output_prefix")
output_formats = context.config("output_formats")

# Prepare households
df_households = context.stage("synthesis.population.enriched").rename(
columns = { "household_income": "income" }
).drop_duplicates("household_id")

df_households = df_households[[
"household_id",
"car_availability", "bike_availability",
"number_of_vehicles", "number_of_bikes",
"income",
"census_household_id"
]]
if "csv" in output_formats:
df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
if "parquet" in output_formats:
df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix))

# Prepare persons
df_persons = context.stage("synthesis.population.enriched").rename(
columns = { "has_license": "has_driving_license" }
Expand Down Expand Up @@ -106,9 +89,29 @@ def execute(context):
df_activities["preceding_trip_index"] = df_activities["following_trip_index"].shift(1)
df_activities.loc[df_activities["is_first"], "preceding_trip_index"] = -1
df_activities["preceding_trip_index"] = df_activities["preceding_trip_index"].astype(int)
# Prepare spatial data sets
df_locations = context.stage("synthesis.population.spatial.locations")[[
"person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry"
]]

df_activities = pd.merge(df_activities, df_locations[[
"person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry"
]], how = "left", on = ["person_id", "activity_index"])

# Prepare spatial activities
df_spatial = gpd.GeoDataFrame(df_activities[[
"person_id", "household_id", "activity_index",
"iris_id", "commune_id","departement_id","region_id",
"preceding_trip_index", "following_trip_index",
"purpose", "start_time", "end_time",
"is_first", "is_last", "geometry"
]], crs = df_locations.crs)
df_spatial = df_spatial.astype({'purpose': 'str', "departement_id": 'str'})

# Write activities
df_activities = df_activities[[
"person_id", "household_id", "activity_index",
"iris_id", "commune_id","departement_id","region_id",
"preceding_trip_index", "following_trip_index",
"purpose", "start_time", "end_time",
"is_first", "is_last"
Expand All @@ -119,6 +122,25 @@ def execute(context):
if "parquet" in output_formats:
df_activities.to_parquet("%s/%sactivities.parquet" % (output_path, output_prefix))

# Prepare households
df_households = context.stage("synthesis.population.enriched").rename(
columns = { "household_income": "income" }
).drop_duplicates("household_id")

df_households = pd.merge(df_households,df_activities[df_activities["purpose"] == "home"][["household_id",
"iris_id", "commune_id","departement_id","region_id"]].drop_duplicates("household_id"),how="left")
df_households = df_households[[
"household_id","iris_id", "commune_id", "departement_id","region_id",
"car_availability", "bike_availability",
"number_of_vehicles", "number_of_bikes",
"income",
"census_household_id"
]]
if "csv" in output_formats:
df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
if "parquet" in output_formats:
df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix))

# Prepare trips
df_trips = context.stage("synthesis.population.trips").rename(
columns = {
Expand Down Expand Up @@ -170,18 +192,7 @@ def execute(context):
df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix))
df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix))

# Prepare spatial data sets
df_locations = context.stage("synthesis.population.spatial.locations")[[
"person_id", "activity_index", "geometry"
]]

df_activities = pd.merge(df_activities, df_locations[[
"person_id", "activity_index", "geometry"
]], how = "left", on = ["person_id", "activity_index"])

# Write spatial activities
df_spatial = gpd.GeoDataFrame(df_activities, crs = df_locations.crs)
df_spatial["purpose"] = df_spatial["purpose"].astype(str)
if "gpkg" in output_formats:
path = "%s/%sactivities.gpkg" % (output_path, output_prefix)
df_spatial.to_file(path, driver = "GPKG")
Expand All @@ -194,7 +205,7 @@ def execute(context):
df_spatial_homes = df_spatial[
df_spatial["purpose"] == "home"
].drop_duplicates("household_id")[[
"household_id", "geometry"
"household_id","iris_id", "commune_id","departement_id","region_id", "geometry"
]]
if "gpkg" in output_formats:
path = "%s/%shomes.gpkg" % (output_path, output_prefix)
Expand Down
7 changes: 7 additions & 0 deletions synthesis/population/spatial/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def configure(context):

context.stage("synthesis.population.activities")
context.stage("synthesis.population.sampled")
context.stage("data.spatial.iris")

def execute(context):
df_home = context.stage("synthesis.population.spatial.home.locations")
Expand Down Expand Up @@ -57,4 +58,10 @@ def execute(context):
assert not df_locations["geometry"].isna().any()
df_locations = gpd.GeoDataFrame(df_locations, crs = df_home.crs)

# add municipalities
df_iris = context.stage("data.spatial.iris")
df_iris = gpd.GeoDataFrame(df_iris, crs = df_home.crs)

df_locations = gpd.sjoin(df_locations,df_iris,how="left")

return df_locations
8 changes: 4 additions & 4 deletions tests/test_determinism.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,18 @@ def _test_determinism(index, data_path, tmpdir):
synpp.run(stages, config, working_directory = cache_path)

REFERENCE_CSV_HASHES = {
"ile_de_france_activities.csv": "e520003e1876a9542ff1a955a6efcfdc",
"ile_de_france_households.csv": "709ce7ded8a2487e6691d4fb3374754b",
"ile_de_france_activities.csv": "53c44fb4026d2037729ee8ff1c8fb93f",
"ile_de_france_households.csv": "ca2a29ef13467326f937638f1ff8be1a",
"ile_de_france_persons.csv": "ddbe9b418c915b14e888b54efbdf9b1e",
"ile_de_france_trips.csv": "6c5f3427e41e683da768eeb53796a806",
"ile_de_france_vehicle_types.csv": "00bee1ea6d7bc9af43ae6c7101dd75da",
"ile_de_france_vehicles.csv": "3567b0f29e51d521b13d91c82c77cecb",
}

REFERENCE_GPKG_HASHES = {
"ile_de_france_activities.gpkg": "9cf9a5fd8927c709927f7a940f86efbf",
"ile_de_france_activities.gpkg": "884eec1fd0c29904284eb4362ff89be1",
"ile_de_france_commutes.gpkg": "5a4180390a69349cc655c07c5671e8d3",
"ile_de_france_homes.gpkg": "033d1aa7a5350579cbd5e8213b9736f2",
"ile_de_france_homes.gpkg": "a85e973f0e2f51031cd60170d351845e",
"ile_de_france_trips.gpkg": "d0aec4033cfc184bf1b91ae13a537ef8",
}

Expand Down

0 comments on commit e82ae98

Please sign in to comment.