Skip to content

Commit

Permalink
Merge branch 'develop' into update-nantes-gtfs-links
Browse files Browse the repository at this point in the history
  • Loading branch information
Nitnelav authored Sep 25, 2024
2 parents 29bfd11 + 0aebbff commit d25edb8
Show file tree
Hide file tree
Showing 42 changed files with 884 additions and 312 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

**Under development**

- feat: add municipality information to households and activities
- chore: update to `eqasim-java` commit `ece4932`
- feat: vehicles and vehicle types are now always generated
- feat: read vehicles data from zip files
- feat : option parameter to remove filtering for requesting departements in hts
- fix: secondary location model used same random seed in every parallel thread
- feat: add a new method for attributing income to housholds using the bhepop2 package
- fix: fixed special case in repairing ENTD for completely overlapping trips
Expand Down
116 changes: 116 additions & 0 deletions analysis/grid/comparison_flow_volume.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import pandas as pd
import geopandas as gpd

import plotly.express as px


SAMPLING_RATE = 0.05

def configure(context):

if not context.config("analysis_from_file",False) :
context.stage("synthesis.population.trips")
context.stage("synthesis.population.spatial.locations")
context.stage("synthesis.population.enriched")
context.stage("data.spatial.departments")

context.config("comparison_file_prefix",None)
context.config("output_prefix", "ile_de_france_")
context.config("output_formats", ["csv", "gpkg"])
context.config("output_path")
context.config("data_path")

def stat_grid(df_trips,df_locations,df_persons,df_grid):

# Write spatial trips
df_spatial = pd.merge(df_trips, df_locations[[
"person_id", "activity_index", "geometry"
]].rename(columns = {
"activity_index": "following_activity_index",
}), how = "left", on = ["person_id", "following_activity_index"])
df_spatial = pd.merge(df_spatial,df_persons,how = "left", on = ["person_id",])
df_spatial = gpd.GeoDataFrame(df_spatial, crs = "EPSG:2154").to_crs("4326")

df_stats = gpd.sjoin(df_grid,df_spatial,how="left")
return df_stats[['id_carr_1km', 'geometry','person_id', 'following_purpose', 'household_id', 'age']]
def execute(context):

figures = {
"Yrs:0-10":{"min_age": 0, "max_age": 10,},
"Yrs:11-14":{"min_age": 11, "max_age": 14,},
"Yrs:15-18":{"min_age": 15, "max_age": 17,},
"Yrs:18-25":{"min_age": 18, "max_age": 25,},
"Yrs:25-50":{"min_age": 26, "max_age": 50,},
"Yrs:50-65":{"min_age": 51, "max_age": 65,},
"Yrs:65-75":{"min_age": 66, "max_age": 75,},
"Yrs:75+":{"min_age": 76, "max_age": 110,},}
comparison_file = context.config("output_prefix") if context.config("comparison_file_prefix") is None else context.config("comparison_file_prefix")

if not context.config("analysis_from_file"):
print("Récupération simu données ...")
# from simulation cache
df_trips = context.stage("synthesis.population.trips")
df_persons = context.stage("synthesis.population.enriched")[["person_id", "household_id","age"]]
df_locations = context.stage("synthesis.population.spatial.locations")[[
"person_id", "activity_index", "geometry"
]]
df_trips["preceding_activity_index"] = df_trips["trip_index"]
df_trips["following_activity_index"] = df_trips["trip_index"] + 1

else :
# from file trips, activites and person
print("Récupération données ...")
df_trips = pd.read_csv(f'{context.config("output_path")}/{context.config("output_prefix")}trips.csv',sep=';')[["person_id","trip_index" ,"following_activity_index","following_purpose"]]
df_locations = gpd.read_parquet(f'{context.config("output_path")}/{context.config("output_prefix")}activities.geoparquet') if "geoparquet" in context.config("output_formats") else gpd.read_file(f'{context.config("output_path")}/{context.config("output_prefix")}activities.gpkg')
df_persons = pd.read_csv(f'{context.config("output_path")}/{context.config("output_prefix")}persons.csv',sep=';')[["person_id", "household_id","age"]]
print("Récupération comp données ...")
df_trips_comp = pd.read_csv(f'{context.config("output_path")}/{comparison_file}trips.csv',sep=';')[["person_id","trip_index" ,"following_activity_index","following_purpose"]]
df_locations_comp = gpd.read_parquet(f'{context.config("output_path")}/{comparison_file}activities.geoparquet') if "geoparquet" in context.config("output_formats") else gpd.read_file(f'{context.config("output_path")}/{comparison_file}activities.gpkg')
df_persons_comp = pd.read_csv(f'{context.config("output_path")}/{comparison_file}persons.csv',sep=';')[["person_id", "household_id","age"]]

list_purpose = list(df_trips["following_purpose"].unique())

# grid 1km of location data
df_departments = context.stage("data.spatial.departments")
poly_dep = df_departments.unary_union
df_grids = gpd.read_file(
f'{context.config("data_path")}/grid/grille200m_metropole.gpkg',
mask=poly_dep,
)
df_grids = df_grids.to_crs("4326")
df_grid = df_grids[["id_carr_1km","geometry"]].dissolve(by="id_carr_1km").reset_index()

df_stats = stat_grid(df_trips,df_locations,df_persons,df_grid)
df_grids = stat_grid(df_trips_comp,df_locations_comp,df_persons_comp,df_grid)
point = df_grid.unary_union.centroid # a changé avec ploy_dep
print("Printing grids...")
for prefix, figure in figures.items():
df_select_age = df_stats[df_stats["age"].between(figure["min_age"],figure["max_age"])]
df_select_age = df_select_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
df_select_age = df_select_age[~(df_select_age["geometry"].isna())]
df_select_age["following_purpose"] = df_select_age["following_purpose"].astype('str')

df_grids_age = df_grids[df_grids["age"].between(figure["min_age"],figure["max_age"])]
df_grids_age = df_grids_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
df_grids_age = df_grids_age[~(df_grids_age["geometry"].isna())]
df_grids_age["following_purpose"] = df_grids_age["following_purpose"].astype('str')

for purpose in list_purpose :
df_select = df_select_age[df_select_age["following_purpose"]==purpose].rename(columns={"person_id":"count"})
df_grids_select = df_grids_age[df_grids_age["following_purpose"]==purpose].rename(columns={"person_id":"count"})
if context.config("output_prefix") == comparison_file :
df_select = gpd.sjoin(df_select,df_grid,how='right',predicate="contains").fillna(0)
df_select = df_select[df_select["count"] != 0]
fig = px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="count", opacity= 0.7,color_continuous_scale='reds',
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Localisation flow distribution for {prefix} group with {purpose} purpose")
fig.write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
else :
df_grids_select = gpd.sjoin(df_grids_select,df_grid,how='right',predicate="contains").fillna(0)
df_select = gpd.sjoin(df_select,df_grids_select.drop(columns=[ 'index_left']),how='right',predicate="contains").rename(columns={"count_left":"volume_studied_simu","count_right":"volume_compared_simu"}).fillna(0)
df_select["volume_difference"] = df_select["volume_studied_simu"] - df_select["volume_compared_simu"]
df_select = df_select[(df_select["volume_studied_simu"] != 0 )| (df_select["volume_compared_simu"] != 0)]
df_select["pourcentage_vol"] = df_select["volume_difference"] / df_select["volume_compared_simu"]
px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="volume_difference", opacity= 0.7,color_continuous_scale="picnic", color_continuous_midpoint= 0,hover_name="id_carr_1km_right", hover_data=["volume_studied_simu", "volume_compared_simu","pourcentage_vol"],
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')


7 changes: 1 addition & 6 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,5 @@ config:
# Activate if you want to run mode choice
mode_choice: false

# Uncommented below to enable vehicle fleet generation
# generate_vehicles_file: True
# generate_vehicles_method: fleet_sample
# vehicles_data_year: 2015

# Uncomment to use the bhepop2 package for attributing income
# income_assignation_method: bhepop2
# income_assignation_method: bhepop2
5 changes: 4 additions & 1 deletion data/bpe/cleaned.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def execute(context):

df["activity_type"] = df["activity_type"].astype("category")

#Add
df = df.rename(columns={"TYPEQU":"education_type"})
df["weight"] = 500
# Clean coordinates
df["x"] = df["LAMBERT_X"].astype(str).str.replace(",", ".").astype(float)
df["y"] = df["LAMBERT_Y"].astype(str).str.replace(",", ".").astype(float)
Expand Down Expand Up @@ -134,7 +137,7 @@ def execute(context):
df.loc[outside_indices, "imputed"] = True

# Package up data set
df = df[["enterprise_id", "activity_type", "commune_id", "imputed", "x", "y"]]
df = df[["enterprise_id", "activity_type","education_type", "commune_id", "imputed", "x", "y","weight"]]

df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y),crs="EPSG:2154")

Expand Down
33 changes: 33 additions & 0 deletions data/external/education.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import shapely.geometry as geo
import numpy as np
import pandas as pd
import geopandas as gpd

def configure(context):
context.stage("data.bpe.cleaned")
context.stage("data.spatial.municipalities")

context.config("data_path")
context.config("education_file", "education/education_addresses.geojson")

def execute(context):
df_locations = context.stage("data.bpe.cleaned")[[
"activity_type", "education_type", "commune_id","weight", "geometry"
]]

df_locations = df_locations[df_locations["activity_type"] == "education"]
df_locations = df_locations[["activity_type","education_type", "commune_id", "geometry"]].copy()
df_locations["fake"] = False

df_zones = context.stage("data.spatial.municipalities")
required_communes = set(df_zones["commune_id"].unique())


df_education = gpd.read_file("{}/{}".format(context.config("data_path"), context.config("education_file")))[["education_type", "commune_id","weight", "geometry"]]
df_education["fake"] = False
df_education = df_education.to_crs("2154")
df_education["activity_type"] = "education"
list_type = set(df_education["education_type"].unique())
df_locations = pd.concat([df_locations[~(df_locations["education_type"].str.startswith(tuple(list_type)))],df_education[df_education["commune_id"].isin(required_communes)]])

return df_locations
31 changes: 17 additions & 14 deletions data/hts/edgt_44/filtered.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,31 @@
def configure(context):
context.stage("data.hts.edgt_44.cleaned")
context.stage("data.spatial.codes")


context.config("filter_hts",True)
def execute(context):
filter_edgt = context.config("filter_hts")
df_codes = context.stage("data.spatial.codes")
df_households, df_persons, df_trips = context.stage("data.hts.edgt_44.cleaned")

# Filter for non-residents
requested_departments = df_codes["departement_id"].unique()
f = df_persons["departement_id"].astype(str).isin(requested_departments)
df_persons = df_persons[f]
if filter_edgt :
# Filter for non-residents
requested_departments = df_codes["departement_id"].unique()
f = df_persons["departement_id"].astype(str).isin(requested_departments)
df_persons = df_persons[f]

# Filter for people going outside of the area
remove_ids = set()
# Filter for people going outside of the area
remove_ids = set()

remove_ids |= set(df_trips[
~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]["person_id"].unique())
remove_ids |= set(df_trips[
~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]["person_id"].unique())

df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]

# Finish up
df_households = df_households[hts.HOUSEHOLD_COLUMNS]
Expand Down
31 changes: 17 additions & 14 deletions data/hts/edgt_lyon/filtered.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,31 @@ def configure(context):
raise RuntimeError("Unknown Lyon EDGT source (only 'cerema' and 'adisp' are supported): %s" % edgt_lyon_source)

context.stage("data.spatial.codes")


context.config("filter_hts",True)
def execute(context):
filter_edgt = context.config("filter_hts")
df_codes = context.stage("data.spatial.codes")
df_households, df_persons, df_trips = context.stage("data.hts.edgt_lyon.cleaned")

# Filter for non-residents
requested_departments = df_codes["departement_id"].unique()
f = df_persons["departement_id"].astype(str).isin(requested_departments)
df_persons = df_persons[f]
if filter_edgt :
# Filter for non-residents
requested_departments = df_codes["departement_id"].unique()
f = df_persons["departement_id"].astype(str).isin(requested_departments)
df_persons = df_persons[f]

# Filter for people going outside of the area
remove_ids = set()
# Filter for people going outside of the area
remove_ids = set()

remove_ids |= set(df_trips[
~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]["person_id"].unique())
remove_ids |= set(df_trips[
~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]["person_id"].unique())

df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]

# Finish up
df_households = df_households[hts.HOUSEHOLD_COLUMNS]
Expand Down
35 changes: 19 additions & 16 deletions data/hts/egt/filtered.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,35 @@ def configure(context):
context.stage("data.hts.egt.cleaned")
context.stage("data.spatial.codes")

context.config("filter_hts",True)
def execute(context):
filter_egt = context.config("filter_hts")
df_codes = context.stage("data.spatial.codes")

df_households, df_persons, df_trips = context.stage("data.hts.egt.cleaned")

# Filter for non-residents
requested_departments = df_codes["departement_id"].unique()
f = df_persons["departement_id"].astype(str).isin(requested_departments) # pandas bug!
df_persons = df_persons[f]
if filter_egt :
# Filter for non-residents
requested_departments = df_codes["departement_id"].unique()
f = df_persons["departement_id"].astype(str).isin(requested_departments) # pandas bug!
df_persons = df_persons[f]

# Filter for people going outside of the area (because they have NaN distances)
remove_ids = set()
# Filter for people going outside of the area (because they have NaN distances)
remove_ids = set()

remove_ids |= set(df_trips[
~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]["person_id"].unique())
remove_ids |= set(df_trips[
~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
]["person_id"].unique())

remove_ids |= set(df_persons[
~df_persons["departement_id"].isin(requested_departments)
])
remove_ids |= set(df_persons[
~df_persons["departement_id"].isin(requested_departments)
])

df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]
df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]
# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]

# Finish up
household_columns = hts.HOUSEHOLD_COLUMNS + ["income_class"] + ["egt_household_id"]
Expand Down
3 changes: 3 additions & 0 deletions data/hts/entd/cleaned.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,9 @@ def execute(context):
# Socioprofessional class
df_persons["socioprofessional_class"] = df_persons["CS24"].fillna(80).astype(int) // 10

# Fix activity types (because of 1 inconsistent ENTD data)
hts.fix_activity_types(df_trips)

return df_households, df_persons, df_trips

def calculate_income_class(df):
Expand Down
Loading

0 comments on commit d25edb8

Please sign in to comment.