Skip to content

Commit

Permalink
feat : add new analysis output (#266)
Browse files Browse the repository at this point in the history
* feat : first analysis output tables

* feat: add proportion & improvement outputs

* fix : vehicle analysis & output folder

* fix : coments & changelog

* fix: correction with egt

* fix: separate analysis from data output & update docs

---------

Co-authored-by: Marie Laurent <[email protected]>
  • Loading branch information
MarieMcLaurent and Marie Laurent authored Nov 18, 2024
1 parent 2d3d9c1 commit d7453f4
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

**Under development**

- feat: add population analysis output
- fix: avoid regenerating OSM when population changes
- feat: add municipality information to households and activities
- chore: update to `eqasim-java` commit `ece4932`
Expand Down
13 changes: 10 additions & 3 deletions analysis/grid/comparison_flow_volume.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pandas as pd
import geopandas as gpd
import os

import plotly.express as px

ANALYSIS_FOLDER = "compare_flow_volume"

SAMPLING_RATE = 0.05

Expand Down Expand Up @@ -84,6 +85,12 @@ def execute(context):
df_grids = stat_grid(df_trips_comp,df_locations_comp,df_persons_comp,df_grid)
point = df_grid.unary_union.centroid # a changé avec ploy_dep
print("Printing grids...")

# check output folder existence
analysis_output_path = os.path.join(context.config("output_path"), ANALYSIS_FOLDER)
if not os.path.exists(analysis_output_path):
os.mkdir(analysis_output_path)

for prefix, figure in figures.items():
df_select_age = df_stats[df_stats["age"].between(figure["min_age"],figure["max_age"])]
df_select_age = df_select_age.dissolve(by=["id_carr_1km","following_purpose"],aggfunc="count").reset_index()
Expand All @@ -103,14 +110,14 @@ def execute(context):
df_select = df_select[df_select["count"] != 0]
fig = px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="count", opacity= 0.7,color_continuous_scale='reds',
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Localisation flow distribution for {prefix} group with {purpose} purpose")
fig.write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
fig.write_html(f'{analysis_output_path}/{context.config("output_prefix")}{prefix}_{purpose}.html')
else :
df_grids_select = gpd.sjoin(df_grids_select,df_grid,how='right',predicate="contains").fillna(0)
df_select = gpd.sjoin(df_select,df_grids_select.drop(columns=[ 'index_left']),how='right',predicate="contains").rename(columns={"count_left":"volume_studied_simu","count_right":"volume_compared_simu"}).fillna(0)
df_select["volume_difference"] = df_select["volume_studied_simu"] - df_select["volume_compared_simu"]
df_select = df_select[(df_select["volume_studied_simu"] != 0 )| (df_select["volume_compared_simu"] != 0)]
df_select["pourcentage_vol"] = df_select["volume_difference"] / df_select["volume_compared_simu"]
px.choropleth_mapbox(df_select,geojson=df_select.geometry,locations=df_select.index,color="volume_difference", opacity= 0.7,color_continuous_scale="picnic", color_continuous_midpoint= 0,hover_name="id_carr_1km_right", hover_data=["volume_studied_simu", "volume_compared_simu","pourcentage_vol"],
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{context.config("output_path")}/{context.config("output_prefix")}{prefix}_{purpose}.html')
mapbox_style = 'open-street-map',center=dict(lat= point.y,lon=point.x),title=f"Comparison flow distribution with previous simulation for {prefix} group with {purpose} purpose").write_html(f'{analysis_output_path}/{context.config("output_prefix")}{prefix}_{purpose}.html')


127 changes: 127 additions & 0 deletions analysis/synthesis/population.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@

import os
import numpy as np
import pandas as pd
import geopandas as gpd
from analysis.marginals import NUMBER_OF_VEHICLES_LABELS
from shapely import distance
AGE_CLASS = [0, 10, 14, 17, 25, 50, 65, np.inf]
NUMBER_OF_VEHICLES= [0,1,2,3,np.inf]
NAME_AGE_CLASS = ["0-10","11-14","15-17","18-25","26-50","51-65","65+"]
ANALYSIS_FOLDER = "analysis_population"
def configure(context):

context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("sampling_rate")

context.stage("synthesis.population.trips")
context.stage("synthesis.population.enriched")
context.stage("synthesis.population.spatial.locations")

context.stage("data.census.filtered", alias = "census")
context.stage("data.hts.selected", alias = "hts")

def execute(context):

# check output folder existence
analysis_output_path = os.path.join(context.config("output_path"), ANALYSIS_FOLDER)
if not os.path.exists(analysis_output_path):
os.mkdir(analysis_output_path)

prefix = context.config("output_prefix")
sampling_rate = context.config("sampling_rate")
df_person_eq = context.stage("synthesis.population.enriched")
df_trip_eq = context.stage("synthesis.population.trips")
df_location_eq = context.stage("synthesis.population.spatial.locations")[["person_id", "activity_index", "geometry"]]

df_location_eq = df_location_eq.to_crs("EPSG:2154")
df_trip_eq["preceding_activity_index"] = df_trip_eq["trip_index"]
df_trip_eq["following_activity_index"] = df_trip_eq["trip_index"] + 1

df_census = context.stage("census")
df_hts_households, df_hts_person, df_hts_trip = context.stage("hts")
df_hts_person["person_weight"] *=df_census["weight"].sum()/df_hts_person["person_weight"].sum()
df_hts_households["household_weight"] *=df_census["weight"].sum()/df_hts_households["household_weight"].sum()
# get age class
df_person_eq["age_class"] = pd.cut(df_person_eq["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)
df_census["age_class"] = pd.cut(df_census["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)
df_hts_person["age_class"] = pd.cut(df_hts_person["age"],AGE_CLASS,include_lowest=True,labels=NAME_AGE_CLASS)

# get vehicule class
df_person_eq["vehicles_class"] = pd.cut(df_person_eq["number_of_vehicles"],NUMBER_OF_VEHICLES,right=True,labels=NUMBER_OF_VEHICLES_LABELS)
df_hts_households["vehicles_class"] = pd.cut(df_hts_households["number_of_vehicles"],NUMBER_OF_VEHICLES,right=True,labels=NUMBER_OF_VEHICLES_LABELS)


df_eq_travel = pd.merge(df_trip_eq,df_person_eq[["person_id","age_class"]],on=["person_id"])
df_hts_travel = pd.merge(df_hts_trip,df_hts_person[["person_id","age_class","person_weight"]],on=["person_id"])

print("Generate tables ...")
# Age purpose analysis
analysis_age_purpose = pd.pivot_table(df_eq_travel,"person_id",index="age_class",columns="following_purpose",aggfunc="count")
analysis_age_purpose = analysis_age_purpose/sampling_rate
analysis_age_purpose.to_csv(f"{analysis_output_path}/{prefix}age_purpose.csv")

# Compare age volume
analysis_age_class = pd.concat([df_census.groupby("age_class")["weight"].sum(),df_person_eq.groupby("age_class")["person_id"].count()],axis=1).reset_index()
analysis_age_class.columns = ["Age class","INSEE","EQASIM"]
analysis_age_class["Proportion_INSEE"] = analysis_age_class["INSEE"] /df_census["weight"].sum()
analysis_age_class["Proportion_EQASIM"] = analysis_age_class["EQASIM"] /len(df_person_eq)
analysis_age_class["EQASIM"] = analysis_age_class["EQASIM"]/sampling_rate
analysis_age_class.to_csv(f"{analysis_output_path}/{prefix}age.csv")

# Compare vehicle volume
analysis_vehicles_class = pd.concat([df_hts_households.groupby("vehicles_class")["household_weight"].sum(),df_person_eq.groupby("vehicles_class")["household_id"].nunique()],axis=1).reset_index()
analysis_vehicles_class.columns = ["Number of vehicles class","HTS","EQASIM"]
analysis_vehicles_class["Proportion_HTS"] = analysis_vehicles_class["HTS"] / df_hts_households["household_weight"].sum()
analysis_vehicles_class["Proportion_EQASIM"] = analysis_vehicles_class["EQASIM"] / df_person_eq["household_id"].nunique()
analysis_vehicles_class.to_csv(f"{analysis_output_path}/{prefix}nbr_vehicle.csv")

# Compare license volume
analysis_license_class = pd.concat([df_hts_person.groupby("has_license")["person_weight"].sum(),df_person_eq.groupby("has_license")["person_id"].count()],axis=1).reset_index()
analysis_license_class.columns = ["Possession of license","HTS","EQASIM"]
analysis_license_class["Proportion_HTS"] = analysis_license_class["HTS"] /df_hts_person["person_weight"].sum()
analysis_license_class["Proportion_EQASIM"] = analysis_license_class["EQASIM"] /len(df_person_eq)
analysis_license_class["EQASIM"] = analysis_license_class["EQASIM"]/sampling_rate
analysis_license_class.to_csv(f"{analysis_output_path}/{prefix}license.csv")

# Compare travel volume
analysis_travel = pd.concat([df_hts_travel.groupby("age_class")["person_weight"].sum(),df_eq_travel.groupby("age_class")["person_id"].count()],axis=1).reset_index()
analysis_travel.columns = ["Age class","HTS","EQASIM"]
analysis_travel["Proportion_HTS"] = analysis_travel["HTS"] /df_hts_travel["person_weight"].sum()
analysis_travel["Proportion_EQASIM"] = analysis_travel["EQASIM"] /len(df_eq_travel)
analysis_travel["EQASIM"] = analysis_travel["EQASIM"]/sampling_rate
analysis_travel.to_csv(f"{analysis_output_path}/{prefix}travel.csv")

# Compare distance
df_hts_travel["routed_distance"] = df_hts_travel["routed_distance"]/1000 if "routed_distance" in df_hts_travel.columns else df_hts_travel["euclidean_distance"]/1000
df_hts_travel["distance_class"] = pd.cut(df_hts_travel["routed_distance"],list(np.arange(100))+[np.inf])

df_spatial = pd.merge(df_trip_eq, df_location_eq.rename(columns = {
"activity_index": "preceding_activity_index",
"geometry": "preceding_geometry"
}), how = "left", on = ["person_id", "preceding_activity_index"])

df_spatial = pd.merge(df_spatial, df_location_eq.rename(columns = {
"activity_index": "following_activity_index",
"geometry": "following_geometry"
}), how = "left", on = ["person_id", "following_activity_index"])
df_spatial["distance"] = df_spatial.apply(lambda x:distance( x["preceding_geometry"],x["following_geometry"])/1000,axis=1)
df_spatial["distance_class"] = pd.cut(df_spatial["distance"],list(np.arange(100))+[np.inf])

analysis_distance = pd.concat([df_hts_travel.groupby("distance_class")["person_weight"].sum(),df_spatial.groupby("distance_class")["person_id"].count()],axis=1).reset_index()
analysis_distance.columns = ["Distance class","HTS","EQASIM"]
analysis_distance["Proportion_HTS"] = analysis_distance["HTS"] / analysis_distance["HTS"].sum()
analysis_distance["Proportion_EQASIM"] = analysis_distance["EQASIM"] / len(df_spatial)
analysis_distance["EQASIM"] = analysis_distance["EQASIM"]/ sampling_rate
analysis_distance.to_csv(f"{analysis_output_path}/{prefix}distance.csv")










8 changes: 8 additions & 0 deletions docs/population.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,11 @@ folder as: `{output_prefix}_{age group}_{trip pupose}.html`

Note:
With `analysis_from_file` at False, the last synthetic population is studied by default. Also if `output_prefix` and `comparison_file_prefix` refer to the same outputs, or `comparison_file_prefix` is not specified, then only a volume visualisation of this particular population is produced.


### Comparaison population to source data

Using the population pipeline in the Analysis directory, you can generate multiple tables comparing composition of synthetic population to source data. Right now the tables generated compare : population volume by age range, households volume by number of vehicles, population volume with a license and without, trip volume by age range and trip volume by length.
Complementary from the synthetic population only, a table of population volume by age range and trip purpose is also created.

To be able to use this pipeline, you must already have create a synthetic population. Then you need to open the `config.yml` and add the `analysis.synthesis.population` stage in the `run` section.
7 changes: 5 additions & 2 deletions synthesis/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import sqlite3
import math
import numpy as np
from analysis.synthesis.population import ANALYSIS_FOLDER

def configure(context):

context.stage("synthesis.population.enriched")

context.stage("synthesis.population.activities")
Expand All @@ -22,7 +24,8 @@ def configure(context):
context.config("output_path")
context.config("output_prefix", "ile_de_france_")
context.config("output_formats", ["csv", "gpkg"])

context.config("sampling_rate")

if context.config("mode_choice", False):
context.stage("matsim.simulation.prepare")

Expand Down Expand Up @@ -270,4 +273,4 @@ def execute(context):
clean_gpkg(path)
if "geoparquet" in output_formats:
path = "%s/%strips.geoparquet" % (output_path, output_prefix)
df_spatial.to_parquet(path)
df_spatial.to_parquet(path)

0 comments on commit d7453f4

Please sign in to comment.