From 66969ab776998927329b507d6c094c44d6cbf438 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20H=C3=B6rl?= <sebastian.horl@irt-systemx.fr>
Date: Thu, 5 Sep 2024 19:51:02 +0200
Subject: [PATCH 1/3] feat: integrate vehicles by default (#233)

* feat: integrate vehicles by default

* bugfix

* several fixes

* fix tests

* update standalone mode choice

* add fix in fleet sampling

* update changelog

* some cleanup

* additional cleanup

* further fixes

* update to latest pr commit

* update commit

* update commit for testing

* update commit for testing

* set final eqasim-java commit

* update changelog
---
 CHANGELOG.md                                  |  3 +
 config.yml                                    |  7 +-
 data/vehicles/raw.py                          | 80 +++++++++++--------
 docs/simulation.md                            | 39 ++++-----
 matsim/output.py                              | 25 +-----
 matsim/runtime/eqasim.py                      |  2 +-
 matsim/scenario/population.py                 | 29 ++++++-
 matsim/scenario/vehicles.py                   |  4 +-
 matsim/simulation/prepare.py                  | 18 ++---
 synthesis/output.py                           | 12 +--
 synthesis/vehicles/cars/default.py            | 31 +++++++
 .../vehicles.py => cars/fleet_sampling.py}    | 10 ++-
 synthesis/vehicles/passengers/default.py      | 31 +++++++
 synthesis/vehicles/selected.py                | 11 ---
 synthesis/vehicles/vehicles.py                | 22 +++++
 tests/test_determinism.py                     |  5 +-
 tests/test_pipeline.py                        | 15 ++--
 tests/test_simulation.py                      |  1 +
 tests/testdata.py                             | 26 +++---
 19 files changed, 232 insertions(+), 139 deletions(-)
 create mode 100644 synthesis/vehicles/cars/default.py
 rename synthesis/vehicles/{fleet_sample/vehicles.py => cars/fleet_sampling.py} (93%)
 create mode 100644 synthesis/vehicles/passengers/default.py
 delete mode 100644 synthesis/vehicles/selected.py
 create mode 100644 synthesis/vehicles/vehicles.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 10f8649e..b3f30918 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 **Under development**
 
+- chore: update to `eqasim-java` commit `ece4932`
+- feat: vehicles and vehicle types are now always generated
+- feat: read vehicles data from zip files
 - feat : option parameter to remove filtering for requesting departements in hts
 - fix: secondary location model used same random seed in every parallel thread
 - feat: add a new method for attributing income to housholds using the bhepop2 package
diff --git a/config.yml b/config.yml
index 10d794da..95be71a5 100644
--- a/config.yml
+++ b/config.yml
@@ -31,10 +31,5 @@ config:
   # Activate if you want to run mode choice
   mode_choice: false
 
-  # Uncommented below to enable vehicle fleet generation
-  # generate_vehicles_file: True
-  # generate_vehicles_method: fleet_sample
-  # vehicles_data_year: 2015
-  
   # Uncomment to use the bhepop2 package for attributing income
-  # income_assignation_method: bhepop2
\ No newline at end of file
+  # income_assignation_method: bhepop2
diff --git a/data/vehicles/raw.py b/data/vehicles/raw.py
index 37721432..95a9fc31 100644
--- a/data/vehicles/raw.py
+++ b/data/vehicles/raw.py
@@ -1,7 +1,8 @@
 import numpy as np
 import pandas as pd
-import mock
+import mock, os, glob
 from openpyxl.reader import excel
+import zipfile
 
 """
 This stage loads the raw data of the specified vehicle fleet data
@@ -10,60 +11,73 @@
 
 def configure(context):
     context.config("data_path")
-    context.config("vehicles_data_year", 2015)
+    context.config("vehicles_path", "vehicles")
+    context.config("vehicles_year", 2021)
     context.stage("data.spatial.codes")
 
 def execute(context):
-
-    year = context.config("vehicles_data_year")
-
     df_codes = context.stage("data.spatial.codes")
 
     # the downloaded excel files meta-data are actually have a badly formatted ISO datetime
     # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1659 
     with mock.patch.object(excel.ExcelReader, 'read_properties', lambda self: None):
-        df_vehicle_com_counts = pd.read_excel(
-            "%s/vehicles_%s/Parc_VP_Communes_%s.xlsx" % (context.config("data_path"), year, year)
-        )
-        df_vehicle_reg_counts = pd.read_excel(
-            "%s/vehicles_%s/Parc_VP_Regions_%s.xlsx" % (context.config("data_path"), year, year)
-        )
+        year = str(context.config("vehicles_year"))
+        
+        with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")) as archive:
+            with archive.open("Parc_VP_Communes_{}.xlsx".format(year)) as f:
+                df_municipalities = pd.read_excel(f)
+
+        with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")) as archive:
+            with archive.open("Parc_VP_Regions_{}.xlsx".format(year)) as f:
+                df_regions = pd.read_excel(f)
     
-    df_vehicle_com_counts["region_id"] = df_vehicle_com_counts["Code région"].astype("category")
-    df_vehicle_com_counts["departement_id"] = df_vehicle_com_counts["Code départment"].astype("category")
-    df_vehicle_com_counts["commune_id"] = df_vehicle_com_counts["Code commune"].astype("category")
+    df_municipalities["region_id"] = df_municipalities["Code région"].astype("category")
+    df_municipalities["departement_id"] = df_municipalities["Code départment"].astype("category")
+    df_municipalities["commune_id"] = df_municipalities["Code commune"].astype("category")
 
-    df_vehicle_reg_counts["region_id"] = df_vehicle_reg_counts["Code région"].astype("category")
+    df_regions["region_id"] = df_regions["Code région"].astype("category")
 
     requested_departements = set(df_codes["departement_id"].unique())
     requested_regions = set(df_codes["region_id"].astype(str).unique())
 
     if len(requested_departements) > 0:
-        df_vehicle_com_counts = df_vehicle_com_counts[df_vehicle_com_counts["departement_id"].isin(requested_departements)]
+        df_municipalities = df_municipalities[df_municipalities["departement_id"].isin(requested_departements)]
 
     if len(requested_regions) > 0:
-        df_vehicle_reg_counts = df_vehicle_reg_counts[df_vehicle_reg_counts["region_id"].isin(requested_regions)]
+        df_regions = df_regions[df_regions["region_id"].isin(requested_regions)]
+
+    df_municipalities["region_id"] = df_municipalities["region_id"].cat.remove_unused_categories()
+    df_municipalities["departement_id"] = df_municipalities["departement_id"].cat.remove_unused_categories()
+    df_municipalities["commune_id"] = df_municipalities["commune_id"].cat.remove_unused_categories()
 
-    df_vehicle_com_counts["region_id"] = df_vehicle_com_counts["region_id"].cat.remove_unused_categories()
-    df_vehicle_com_counts["departement_id"] = df_vehicle_com_counts["departement_id"].cat.remove_unused_categories()
-    df_vehicle_com_counts["commune_id"] = df_vehicle_com_counts["commune_id"].cat.remove_unused_categories()
+    df_regions["region_id"] = df_regions["region_id"].cat.remove_unused_categories()
 
-    df_vehicle_reg_counts["region_id"] = df_vehicle_reg_counts["region_id"].cat.remove_unused_categories()
+    df_municipalities["critair"] = df_municipalities["Vignette Crit'air"]
+    df_municipalities["technology"] = df_municipalities["Energie"]
 
-    df_vehicle_com_counts["critair"] = df_vehicle_com_counts["Vignette Crit'air"]
-    df_vehicle_com_counts["technology"] = df_vehicle_com_counts["Energie"]
+    df_regions["critair"] = df_regions["Vignette crit'air"]
+    df_regions["technology"] = df_regions["Energie"]
 
-    df_vehicle_reg_counts["critair"] = df_vehicle_reg_counts["Vignette crit'air"]
-    df_vehicle_reg_counts["technology"] = df_vehicle_reg_counts["Energie"]
+    count_column_name = "Parc au 01/01/%s" % context.config("vehicles_year")
+    age_column_name = "Age au 01/01/%s" % context.config("vehicles_year")
 
-    count_column_name = "Parc au 01/01/%s" % context.config("vehicles_data_year")
-    age_column_name = "Age au 01/01/%s" % context.config("vehicles_data_year")
+    df_municipalities["fleet"] = df_municipalities[count_column_name]
+    df_regions["fleet"] = df_regions[count_column_name]
+    df_regions["age"] = df_regions[age_column_name]
 
-    df_vehicle_com_counts["fleet"] = df_vehicle_com_counts[count_column_name]
-    df_vehicle_reg_counts["fleet"] = df_vehicle_reg_counts[count_column_name]
-    df_vehicle_reg_counts["age"] = df_vehicle_reg_counts[age_column_name]
+    df_vehicle_fleet_counts = df_municipalities.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna()
+    df_vehicle_age_counts = df_regions.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna()
 
-    df_vehicle_fleet_counts = df_vehicle_com_counts.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna()
-    df_vehicle_age_counts = df_vehicle_reg_counts.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna()
+    return df_vehicle_fleet_counts, df_vehicle_age_counts
+
+def validate(context):
+    municipalities_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")
+    regions_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")
+
+    if not os.path.exists(municipalities_path):
+        raise RuntimeError("Municipalities vehicle data is not available at {}".format(municipalities_path))
+    
+    if not os.path.exists(regions_path):
+        raise RuntimeError("Regions vehicle data is not available at {}".format(regions_path))
 
-    return df_vehicle_fleet_counts, df_vehicle_age_counts
\ No newline at end of file
+    return os.path.getsize(municipalities_path) + os.path.getsize(regions_path)
diff --git a/docs/simulation.md b/docs/simulation.md
index 20efaa9d..d5cae631 100644
--- a/docs/simulation.md
+++ b/docs/simulation.md
@@ -127,36 +127,31 @@ config:
 
 ## <a name="section-data"></a>Using MATSim's emissions contrib
 
-You can calculate air pollution emissions using matsim by using some additional data.
+In order to use a detailed emissions analysis, you need to let the pipeline generate a meaningful vehicle fleet. Data on the private vehicle stock across France are available from the Ministry of Ecology:
 
-You must download the crit'air data from this site : https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021
+- [Vehicle stock data](https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021)
+- Click on *Données sur les voitures particulières* (first tab) to get information on the private vehicles
+- Download *Données régionales des voitures particulières - 2011 à 2021*
+- Download *Données communales des voitures particulières - 2011 à 2021*
+- Put both zip files into `data/vehicles`
 
+In the `config.yml`, you must enable the vehicle fleet generation :
 
-You should download both files :
-
- - Données régionales des voitures particulières - 2011 à 2021 (zip, 1.79 Mo)
- - Données communales des voitures particulières - 2011 à 2021 (zip, 130.33 Mo)
+```yaml
+config:
+  vehicles_method: fleet_sample
+```
 
-Inside the zip you'll find one data file per year, you can extract the files concerning the year you're intereseted in (let's use `2015` for this exemple). Then unzip and place them in a `data/vehicles_2015/`.
+After doing so, the `vehicles.xml.gz` and `vehicle_types.xml.gz` in the output will not only contain default vehicles and vehicle types, but realistic ones, based on the regional probabilities.
 
-Then, in the `config.yml`, you must enable the vehicle fleet generation :
+You can also choose to generate vehicles for a different year. The 2021 edition ZIP, for instance, contains all the years from 2012 and newer editions will contain more recent years. You can choose the year by setting:
 
 ```yaml
-# ...
-
 config:
-  generate_vehicles_file: True
-  generate_vehicles_method: fleet_sample
-  vehicles_data_year: 2015
-
-# ...
+  vehicles_year: 2015
 ```
 
-You should end up, at the end of the `matsim.output` stage, with a vechicles.xml file.
-
-After you run the full simulation, you'll be able to use some classes defined in `eqasim-java` to analyse and compute emissions based on the MATSim outputs.
-
-for exemple :
+Once have run a full simulation, you'll be able to use some classes defined in `eqasim-java` to analyse and compute emissions based on the MATSim outputs. For example:
 
 ```bash
 java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunComputeEmissionsEvents --config-path config.xml --hbefa-cold-avg ./EFA_ColdStart_Vehcat_2015_Cold_Average.csv --hbefa-hot-avg ./EFA_HOT_Vehcat_2015_Hot_Average.csv --hbefa-cold-detailed ./EFA_ColdStart_Subsegm_2015_Cold_Detailed.csv --hbefa-hot-detailed ./EFA_HOT_Subsegm_2015_Hot_Detailed.csv
@@ -170,6 +165,4 @@ java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunExportEmi
 java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunComputeEmissionsGrid --config-path config.xml --domain-shp-path idf_2154.shp
 ```
 
-Please note that you need a copy of the HBEFA database in order to run those.
-
-For further information you can look at [eqasim-java](https://github.com/eqasim-org/eqasim-java) and [matsim-libs/contribs/emissions](https://github.com/matsim-org/matsim-libs/tree/master/contribs/emissions)
+Please note that you need a copy of the HBEFA database in order to run those. For further information you can look at [eqasim-java](https://github.com/eqasim-org/eqasim-java) and [matsim-libs/contribs/emissions](https://github.com/matsim-org/matsim-libs/tree/master/contribs/emissions)
diff --git a/matsim/output.py b/matsim/output.py
index 1ef3b459..2f616403 100644
--- a/matsim/output.py
+++ b/matsim/output.py
@@ -11,7 +11,6 @@ def configure(context):
     context.config("output_path")
     context.config("output_prefix", "ile_de_france_")
     context.config("write_jar", True)
-    context.config("generate_vehicles_file", False)
     need_osm = context.config("export_detailed_network", False)
     if need_osm:
         context.stage("matsim.scenario.supply.osm")
@@ -28,6 +27,7 @@ def execute(context):
     file_names = [
         "%shouseholds.xml.gz" % context.config("output_prefix"),
         "%spopulation.xml.gz" % context.config("output_prefix"),
+        "%svehicles.xml.gz" % context.config("output_prefix"),
         "%sfacilities.xml.gz" % context.config("output_prefix"),
         "%snetwork.xml.gz" % context.config("output_prefix"),
         "%stransit_schedule.xml.gz" % context.config("output_prefix"),
@@ -35,29 +35,6 @@ def execute(context):
         "%sconfig.xml" % context.config("output_prefix")
     ]
 
-    if context.config("generate_vehicles_file"):
-        vehicle_file = "%svehicles.xml.gz" % context.config("output_prefix")
-
-        # it would make more sense to modify this in the eqasim-java part (in org.eqasim.core.scenario.config)
-        # but it's not obvious how to preserve backward compatibility hence the following method :
-        config_file = "%sconfig.xml" % context.config("output_prefix")
-        with open( "%s/%s" % (context.path("matsim.simulation.prepare"), config_file)) as f_read:
-            content = f_read.read()
-            content = content.replace(
-                '<param name="vehiclesFile" value="null" />',
-                '<param name="vehiclesFile" value="%s" />' % vehicle_file
-            )
-            content = content.replace(
-                '<param name="vehiclesSource" value="defaultVehicle" />',
-                '<param name="vehiclesSource" value="fromVehiclesData" />'
-            )
-            with open("%s/%s" % (context.config("output_path"), config_file), "w+") as f_write:
-                f_write.write(content)
-        
-        file_names.append(vehicle_file)
-        # since we did a copy & modify, no need to copy it again
-        file_names.remove(config_file)
-
     for name in file_names:
         shutil.copy(
             "%s/%s" % (context.path("matsim.simulation.prepare"), name),
diff --git a/matsim/runtime/eqasim.py b/matsim/runtime/eqasim.py
index 5dc2ffe3..72e4846e 100644
--- a/matsim/runtime/eqasim.py
+++ b/matsim/runtime/eqasim.py
@@ -7,7 +7,7 @@
 
 DEFAULT_EQASIM_VERSION = "1.5.0"
 DEFAULT_EQASIM_BRANCH = "develop"
-DEFAULT_EQASIM_COMMIT = "73ac087"
+DEFAULT_EQASIM_COMMIT = "ece4932"
 
 def configure(context):
     context.stage("matsim.runtime.git")
diff --git a/matsim/scenario/population.py b/matsim/scenario/population.py
index 2f19f9e9..2fc0fa4d 100644
--- a/matsim/scenario/population.py
+++ b/matsim/scenario/population.py
@@ -14,6 +14,7 @@ def configure(context):
     context.stage("synthesis.population.spatial.locations")
 
     context.stage("synthesis.population.trips")
+    context.stage("synthesis.vehicles.vehicles")
 
 PERSON_FIELDS = [
     "person_id", "household_income", "car_availability", "bike_availability",
@@ -31,7 +32,11 @@ def configure(context):
     "person_id", "mode", "departure_time", "travel_time"
 ]
 
-def add_person(writer, person, activities, trips):
+VEHICLE_FIELDS = [
+    "owner_id", "vehicle_id", "mode"
+]
+
+def add_person(writer, person, activities, trips, vehicles):
     writer.start_person(person[PERSON_FIELDS.index("person_id")])
 
     writer.start_attributes()
@@ -56,6 +61,11 @@ def add_person(writer, person, activities, trips):
     writer.add_attribute("employed", "java.lang.String", person[PERSON_FIELDS.index("employed")])
     writer.add_attribute("sex", "java.lang.String", person[PERSON_FIELDS.index("sex")][0])
 
+    writer.add_attribute("vehicles", "org.matsim.vehicles.PersonVehicles", "{{{content}}}".format(content = ",".join([
+        "\"{mode}\":\"{id}\"".format(mode = v[VEHICLE_FIELDS.index("mode")], id = v[VEHICLE_FIELDS.index("vehicle_id")])
+        for v in vehicles
+    ])))
+
     writer.end_attributes()
 
     writer.start_plan(selected = True)
@@ -108,6 +118,9 @@ def execute(context):
     df_trips = context.stage("synthesis.population.trips")
     df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"]
 
+    df_vehicles = context.stage("synthesis.vehicles.vehicles")[1]
+    df_vehicles = df_vehicles.sort_values(by = ["owner_id"])
+
     with gzip.open(output_path, 'wb+') as writer:
         with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
             writer = writers.PopulationWriter(writer)
@@ -115,6 +128,7 @@ def execute(context):
 
             activity_iterator = backlog_iterator(iter(df_activities[ACTIVITY_FIELDS].itertuples(index = False)))
             trip_iterator = backlog_iterator(iter(df_trips[TRIP_FIELDS].itertuples(index = False)))
+            vehicle_iterator = backlog_iterator(iter(df_vehicles[VEHICLE_FIELDS].itertuples(index = False)))
 
             with context.progress(total = len(df_persons), label = "Writing population ...") as progress:
                 for person in df_persons.itertuples(index = False):
@@ -122,6 +136,7 @@ def execute(context):
 
                     activities = []
                     trips = []
+                    vehicles = []
 
                     # Track all activities for person
                     while activity_iterator.has_next():
@@ -147,7 +162,17 @@ def execute(context):
 
                     assert len(trips) == len(activities) - 1
 
-                    add_person(writer, person, activities, trips)
+                    # Track all vehicles for person
+                    while vehicle_iterator.has_next():
+                        vehicle = vehicle_iterator.next()
+
+                        if not vehicle[VEHICLE_FIELDS.index("owner_id")] == person_id:
+                            vehicle_iterator.previous()
+                            break
+                        else:
+                            vehicles.append(vehicle)
+
+                    add_person(writer, person, activities, trips, vehicles)
                     progress.update()
 
             writer.end_population()
diff --git a/matsim/scenario/vehicles.py b/matsim/scenario/vehicles.py
index d9ecbaee..63205fc3 100644
--- a/matsim/scenario/vehicles.py
+++ b/matsim/scenario/vehicles.py
@@ -6,7 +6,7 @@
 import matsim.writers as writers
 
 def configure(context):
-    context.stage("synthesis.vehicles.selected")
+    context.stage("synthesis.vehicles.vehicles")
 
 TYPE_FIELDS = ["type_id", "nb_seats", "length", "width", "pce", "mode"]
 VEHICLE_FIELDS = ["vehicle_id", "type_id", "critair", "technology", "age", "euro"]
@@ -14,7 +14,7 @@ def configure(context):
 def execute(context):
     output_path = "%s/vehicles.xml.gz" % context.path()
 
-    df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.selected")
+    df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles")
 
     with gzip.open(output_path, 'wb+') as writer:
         with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer:
diff --git a/matsim/simulation/prepare.py b/matsim/simulation/prepare.py
index 8cb41af9..7a73e6d8 100644
--- a/matsim/simulation/prepare.py
+++ b/matsim/simulation/prepare.py
@@ -8,9 +8,7 @@ def configure(context):
     
     context.stage("matsim.scenario.population")
     context.stage("matsim.scenario.households")
-
-    if context.config("generate_vehicles_file", False):
-        context.stage("matsim.scenario.vehicles")
+    context.stage("matsim.scenario.vehicles")
 
     context.stage("matsim.scenario.facilities")
     context.stage("matsim.scenario.supply.processed")
@@ -78,12 +76,11 @@ def execute(context):
     )
     shutil.copy(transit_vehicles_path, "%s/%stransit_vehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
-    if context.config("generate_vehicles_file"):
-        vehicles_path = "%s/%s" % (
-            context.path("matsim.scenario.vehicles"),
-            context.stage("matsim.scenario.vehicles")
-        )
-        shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
+    vehicles_path = "%s/%s" % (
+        context.path("matsim.scenario.vehicles"),
+        context.stage("matsim.scenario.vehicles")
+    )
+    shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix")))
 
     # Generate base configuration
     eqasim.run(context, "org.eqasim.core.scenario.config.RunGenerateConfig", [
@@ -98,7 +95,8 @@ def execute(context):
     # Adapt config for Île-de-France
     eqasim.run(context, "org.eqasim.ile_de_france.scenario.RunAdaptConfig", [
         "--input-path", "generic_config.xml",
-        "--output-path", "%sconfig.xml" % context.config("output_prefix")
+        "--output-path", "%sconfig.xml" % context.config("output_prefix"),
+        "--prefix", context.config("output_prefix")
     ])
     assert os.path.exists("%s/%sconfig.xml" % (context.path(), context.config("output_prefix")))
 
diff --git a/synthesis/output.py b/synthesis/output.py
index b970e59b..1c47962f 100644
--- a/synthesis/output.py
+++ b/synthesis/output.py
@@ -13,8 +13,7 @@ def configure(context):
     context.stage("synthesis.population.activities")
     context.stage("synthesis.population.trips")
 
-    if context.config("generate_vehicles_file", False):
-        context.stage("synthesis.vehicles.selected")
+    context.stage("synthesis.vehicles.vehicles")
 
     context.stage("synthesis.population.spatial.locations")
 
@@ -161,12 +160,15 @@ def execute(context):
     if "parquet" in output_formats:
         df_trips.to_csv("%s/%strips.parquet" % (output_path, output_prefix))
 
-    if context.config("generate_vehicles_file"):
-        # Prepare vehicles
-        df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.selected")
+    # Prepare vehicles
+    df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles")
 
+    if "csv" in output_formats:
         df_vehicle_types.to_csv("%s/%svehicle_types.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
         df_vehicles.to_csv("%s/%svehicles.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+    if "parquet" in output_formats:
+        df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix))
+        df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix))
 
     # Prepare spatial data sets
     df_locations = context.stage("synthesis.population.spatial.locations")[[
diff --git a/synthesis/vehicles/cars/default.py b/synthesis/vehicles/cars/default.py
new file mode 100644
index 00000000..1bf32836
--- /dev/null
+++ b/synthesis/vehicles/cars/default.py
@@ -0,0 +1,31 @@
+import re
+import pandas as pd
+
+"""
+Creates a vehicle fleet based on a default vehicle type
+"""
+
+def configure(context):
+    context.stage("synthesis.population.enriched")
+
+def execute(context):
+    df_persons = context.stage("synthesis.population.enriched")
+
+    df_vehicle_types = pd.DataFrame.from_records([{
+        "type_id": "default_car", "nb_seats": 4, "length": 5.0, "width": 1.0, "pce": 1.0, "mode": "car",
+        "hbefa_cat": "PASSENGER_CAR", "hbefa_tech": "average", "hbefa_size": "average", "hbefa_emission": "average",
+    }])
+
+    df_vehicles = df_persons[["person_id"]].copy()
+    df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" })
+    
+    df_vehicles["mode"] = "car"
+
+    df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car"
+    df_vehicles["type_id"] = "default_car"
+    df_vehicles["critair"] = "Crit'air 1"
+    df_vehicles["technology"] = "Gazole"
+    df_vehicles["age"] = 0
+    df_vehicles["euro"] = 6
+
+    return df_vehicle_types, df_vehicles
\ No newline at end of file
diff --git a/synthesis/vehicles/fleet_sample/vehicles.py b/synthesis/vehicles/cars/fleet_sampling.py
similarity index 93%
rename from synthesis/vehicles/fleet_sample/vehicles.py
rename to synthesis/vehicles/cars/fleet_sampling.py
index fd2f4128..dcd20a5a 100644
--- a/synthesis/vehicles/fleet_sample/vehicles.py
+++ b/synthesis/vehicles/cars/fleet_sampling.py
@@ -13,11 +13,11 @@ def configure(context):
     context.stage("data.vehicles.raw")
     context.stage("data.vehicles.types")
 
-    context.config("vehicles_data_year", 2015)
+    context.config("vehicles_year", 2021)
 
 def _sample_vehicle(context, args):
     vehicle = args
-    year = context.config("vehicles_data_year")
+    year = context.config("vehicles_year")
     df_vehicle_fleet_counts, df_vehicle_age_counts = context.data("fleet"), context.data("age")
 
     commune_id = vehicle["commune_id"]
@@ -120,9 +120,11 @@ def execute(context):
 
     df_vehicles = pd.merge(df_persons[["household_id", "person_id"]], df_homes[["household_id", "commune_id"]], on = "household_id")
 
-    df_vehicles = df_vehicles.rename(columns = { "person_id": "vehicle_id" })
-    df_vehicles = df_vehicles.drop_duplicates("vehicle_id")
+    df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" })
+    df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car"
+    df_vehicles = df_vehicles.drop_duplicates("vehicle_id") # is this needed?
     df_vehicles["type_id"] = "default_car"
+    df_vehicles["mode"] = "car"
 
     df_vehicle_fleet_counts, df_vehicle_age_counts = context.stage("data.vehicles.raw")
 
diff --git a/synthesis/vehicles/passengers/default.py b/synthesis/vehicles/passengers/default.py
new file mode 100644
index 00000000..6916f5bb
--- /dev/null
+++ b/synthesis/vehicles/passengers/default.py
@@ -0,0 +1,31 @@
+import re
+import pandas as pd
+
+"""
+Creates a vehicle fleet based on a default vehicle type for the dummy passenger mode
+"""
+
+def configure(context):
+    context.stage("synthesis.population.enriched")
+
+def execute(context):
+    df_persons = context.stage("synthesis.population.enriched")
+
+    df_vehicle_types = pd.DataFrame.from_records([{
+        "type_id": "default_car_passenger", "nb_seats": 4, "length": 5.0, "width": 1.0, "pce": 1.0, "mode": "car_passenger",
+        "hbefa_cat": "PASSENGER_CAR", "hbefa_tech": "average", "hbefa_size": "average", "hbefa_emission": "average",
+    }])
+
+    df_vehicles = df_persons[["person_id"]].copy()
+    df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" })
+    
+    df_vehicles["mode"] = "car_passenger"
+
+    df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car_passenger"
+    df_vehicles["type_id"] = "default_car_passenger"
+    df_vehicles["critair"] = "Crit'air 1"
+    df_vehicles["technology"] = "Gazole"
+    df_vehicles["age"] = 0
+    df_vehicles["euro"] = 6
+
+    return df_vehicle_types, df_vehicles
\ No newline at end of file
diff --git a/synthesis/vehicles/selected.py b/synthesis/vehicles/selected.py
deleted file mode 100644
index 6f558858..00000000
--- a/synthesis/vehicles/selected.py
+++ /dev/null
@@ -1,11 +0,0 @@
-
-def configure(context):
-    method = context.config("generate_vehicles_method")
-
-    if method == "fleet_sample":
-        context.stage("synthesis.vehicles.fleet_sample.vehicles", alias = "vehicles")
-    else:
-        raise RuntimeError("Unknown vehicles generation method : %s" % method)
-
-def execute(context):
-    return context.stage("vehicles")
diff --git a/synthesis/vehicles/vehicles.py b/synthesis/vehicles/vehicles.py
new file mode 100644
index 00000000..922cd36c
--- /dev/null
+++ b/synthesis/vehicles/vehicles.py
@@ -0,0 +1,22 @@
+import pandas as pd
+
+def configure(context):
+    method = context.config("vehicles_method", "default")
+
+    if method == "default":
+        context.stage("synthesis.vehicles.cars.default", alias = "cars")
+    elif method == "fleet_sample":
+        context.stage("synthesis.vehicles.cars.fleet_sampling", alias = "cars")
+    else:
+        raise RuntimeError("Unknown vehicles generation method : %s" % method)
+    
+    context.stage("synthesis.vehicles.passengers.default")
+
+def execute(context):
+    df_car_types, df_cars = context.stage("cars")
+    df_passenger_types, df_passengers = context.stage("synthesis.vehicles.passengers.default")
+
+    df_vehicles = pd.concat([df_cars, df_passengers])
+    df_types = pd.concat([df_car_types, df_passenger_types])
+
+    return df_types, df_vehicles
diff --git a/tests/test_determinism.py b/tests/test_determinism.py
index 763e567e..e6ca821c 100644
--- a/tests/test_determinism.py
+++ b/tests/test_determinism.py
@@ -72,6 +72,8 @@ def _test_determinism(index, data_path, tmpdir):
         "ile_de_france_households.csv":     "709ce7ded8a2487e6691d4fb3374754b",
         "ile_de_france_persons.csv":        "ddbe9b418c915b14e888b54efbdf9b1e",
         "ile_de_france_trips.csv":          "6c5f3427e41e683da768eeb53796a806",
+        "ile_de_france_vehicle_types.csv":  "00bee1ea6d7bc9af43ae6c7101dd75da",
+        "ile_de_france_vehicles.csv":       "3567b0f29e51d521b13d91c82c77cecb",
     }
 
     REFERENCE_GPKG_HASHES = {
@@ -133,7 +135,8 @@ def _test_determinism_matsim(index, data_path, tmpdir):
         #"ile_de_france_network.xml.gz":     "5f10ec295b49d2bb768451c812955794",
         "ile_de_france_households.xml.gz":  "64a0c9fab72aad51bc6adb926a1c9d44",
         #"ile_de_france_facilities.xml.gz":  "5ad41afff9ae5c470082510b943e6778",
-        "ile_de_france_config.xml":         "481fac5fb3e7b90810caa38ff460c00a"
+        "ile_de_france_config.xml":         "30871dfbbd2b5bf6922be1dfe20ffe73",
+        "ile_de_france_vehicles.xml.gz":    "d7c8d0dba531a21dc83355b2f82778c2"
     }
 
     # activities.gpkg, trips.gpkg, meta.json,
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 01bd6448..d9856f52 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -66,10 +66,12 @@ def run_population(tmpdir, hts, update = {}):
     assert 2235 == len(pd.read_csv("%s/ile_de_france_activities.csv" % output_path, usecols = ["household_id"], sep = ";"))
     assert 447 == len(pd.read_csv("%s/ile_de_france_persons.csv" % output_path, usecols = ["household_id"], sep = ";"))
     assert 149 == len(pd.read_csv("%s/ile_de_france_households.csv" % output_path, usecols = ["household_id"], sep = ";"))
-
-    if "generate_vehicles_file" in update and update["generate_vehicles_file"]:
-        assert 17 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";"))
-        assert 447 == len(pd.read_csv("%s/ile_de_france_vehicles.csv" % output_path, usecols = ["vehicle_id"], sep = ";"))
+    
+    assert 447 * 2 == len(pd.read_csv("%s/ile_de_france_vehicles.csv" % output_path, usecols = ["vehicle_id"], sep = ";"))
+    if "vehicles_method" in update and update["vehicles_method"] == "fleet_sample":
+        assert 17 + 1 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";"))
+    else:
+        assert 2 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";"))
 
 def test_population_with_entd(tmpdir):
     run_population(tmpdir, "entd")
@@ -82,9 +84,8 @@ def test_population_with_mode_choice(tmpdir):
 
 def test_population_with_fleet_sample(tmpdir):
     run_population(tmpdir, "entd", { 
-        "generate_vehicles_file": True,
-        "generate_vehicles_method": "fleet_sample",
-        "vehicles_data_year": 2015
+        "vehicles_method": "fleet_sample",
+        "vehicles_year": 2021
     })
 
 def test_population_with_bhepop2_income(tmpdir):
diff --git a/tests/test_simulation.py b/tests/test_simulation.py
index 6056b3e5..e31d6be9 100644
--- a/tests/test_simulation.py
+++ b/tests/test_simulation.py
@@ -30,3 +30,4 @@ def test_simulation(tmpdir):
     assert os.path.isfile("%s/ile_de_france_transit_vehicles.xml.gz" % output_path)
     assert os.path.isfile("%s/ile_de_france_households.xml.gz" % output_path)
     assert os.path.isfile("%s/ile_de_france_facilities.xml.gz" % output_path)
+    assert os.path.isfile("%s/ile_de_france_vehicles.xml.gz" % output_path)
diff --git a/tests/testdata.py b/tests/testdata.py
index ba09224e..e00d1b86 100644
--- a/tests/testdata.py
+++ b/tests/testdata.py
@@ -877,29 +877,29 @@ def create(output_path):
         df["region"].unique(),
         np.arange(20),
     ], names = [
-        "Code région", "Age au 01/01/2015"
+        "Code région", "Age au 01/01/2021"
     ])).reset_index()
 
     # to enforce string
     df_vehicles_region = pd.concat([df_vehicles_region, pd.DataFrame({
         "Code région": ["AB"],
-        "Age au 01/01/2015": [0],
+        "Age au 01/01/2021": [0],
     })])
 
     df_vehicles_region["Code région"] = df_vehicles_region["Code région"].astype(str)
 
-    df_vehicles_region["Parc au 01/01/2015"] = 100
+    df_vehicles_region["Parc au 01/01/2021"] = 100
     df_vehicles_region["Energie"] = "Gazole"
     df_vehicles_region["Vignette crit'air"] = "Crit'air 1"
 
-    df_vehicles_region["Age au 01/01/2015"] = df_vehicles_region["Age au 01/01/2015"].astype(str)
-    df_vehicles_region["Age au 01/01/2015"] = df_vehicles_region["Age au 01/01/2015"].replace("20", ">20")
-    df_vehicles_region["Age au 01/01/2015"] = df_vehicles_region["Age au 01/01/2015"] + " ans"
+    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"].astype(str)
+    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"].replace("20", ">20")
+    df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"] + " ans"
 
     df_vehicles_commune = pd.DataFrame({
         "municipality": df["municipality"].unique()
     })
-    df_vehicles_commune["Parc au 01/01/2015"] = 100
+    df_vehicles_commune["Parc au 01/01/2021"] = 100
     df_vehicles_commune["Energie"] = "Gazole"
     df_vehicles_commune["Vignette Crit'air"] = "Crit'air 1"
 
@@ -913,9 +913,15 @@ def create(output_path):
         "region": "Code région",
     })
 
-    os.mkdir("%s/vehicles_2015" % output_path)
-    df_vehicles_region.to_excel("%s/vehicles_2015/Parc_VP_Regions_2015.xlsx" % output_path)
-    df_vehicles_commune.to_excel("%s/vehicles_2015/Parc_VP_Communes_2015.xlsx" % output_path)
+    os.mkdir("%s/vehicles" % output_path)
+    
+    with zipfile.ZipFile("%s/vehicles/parc_vp_regions.zip" % output_path, "w") as archive:
+        with archive.open("Parc_VP_Regions_2021.xlsx", "w") as f:
+            df_vehicles_region.to_excel(f)
+
+    with zipfile.ZipFile("%s/vehicles/parc_vp_communes.zip" % output_path, "w") as archive:
+        with archive.open("Parc_VP_Communes_2021.xlsx", "w") as f:
+            df_vehicles_commune.to_excel(f)
 
 if __name__ == "__main__":
     import shutil

From 550f3433329bf006725b03e880830cf5d50eebe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20H=C3=B6rl?= <sebastian.horl@irt-systemx.fr>
Date: Sat, 14 Sep 2024 09:55:27 +0200
Subject: [PATCH 2/3] chore: add delay into data verification (#256)

* chore: add delay into data verification

* triggering verification

* printing the error to see what is going on

* add a timeout of 2 minutes

* trying to add retries

* update

* revert
---
 scripts/verify_data.py | 44 +++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/scripts/verify_data.py b/scripts/verify_data.py
index ce56f31e..93b77d4f 100644
--- a/scripts/verify_data.py
+++ b/scripts/verify_data.py
@@ -1,8 +1,13 @@
 import requests
+import time
 
 # The goal of this script is to verify the availability of the data 
 # that is needed to set up the pipeline
 
+sleep_time = 5 # seconds
+timeout = 30 # seconds
+retries = 3
+
 class Report:
     def __init__(self):
         self.sources = []
@@ -13,19 +18,32 @@ def register(self, name, url):
     def validate(self):
         failed = []
 
-        for index, source in enumerate(self.sources):
-            print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"]))
-            
-            try:
-                response = requests.head(source["url"])
-                source["status"] = response.status_code
-            except:
-                source["status"] = "error"
-            
-            print("  Status {}".format(source["status"]))
-
-            if source["status"] != 200:
-                failed.append(source["name"])
+        with requests.Session() as session:
+            session.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0" })
+            for index, source in enumerate(self.sources):
+                print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"]))
+                
+                retry = 0
+                success = False
+
+                while not success and retry < retries:
+                    try:
+                        response = session.head(source["url"], timeout = timeout)
+                        source["status"] = response.status_code
+                        success = True
+                    except TimeoutError:
+                        source["status"] = "timeout"
+                    except Exception as e:
+                        source["status"] = "error"
+                        print(e)
+
+                    retry += 1
+                    print("  Status {} (retry {}/{})".format(source["status"], retry, retries))
+                    
+                    time.sleep(sleep_time)
+
+                if source["status"] != 200:
+                    failed.append(source["name"])
         
         print("Done.")
         print("Missing: ", len(failed))

From e82ae98861b85ff93086cc1f4f7c143cf2101589 Mon Sep 17 00:00:00 2001
From: MarieMcLaurent <117629025+MarieMcLaurent@users.noreply.github.com>
Date: Mon, 23 Sep 2024 14:22:11 +0200
Subject: [PATCH 3/3] feat: add municipality info (#258)

* feat: addition municipalities info to households and activities

* upadate tests & improve municipalities for house

* first try correction test & changelog

* fix: test gpkg hashes

---------

Co-authored-by: Marie Laurent <mlaurent@tellae.fr>
---
 CHANGELOG.md                              |  1 +
 synthesis/output.py                       | 69 +++++++++++++----------
 synthesis/population/spatial/locations.py |  7 +++
 tests/test_determinism.py                 |  8 +--
 4 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3f30918..468795dc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 **Under development**
 
+- feat: add municipality information to households and activities
 - chore: update to `eqasim-java` commit `ece4932`
 - feat: vehicles and vehicle types are now always generated
 - feat: read vehicles data from zip files
diff --git a/synthesis/output.py b/synthesis/output.py
index 1c47962f..84c52a36 100644
--- a/synthesis/output.py
+++ b/synthesis/output.py
@@ -62,23 +62,6 @@ def execute(context):
     output_prefix = context.config("output_prefix")
     output_formats = context.config("output_formats")
 
-    # Prepare households
-    df_households = context.stage("synthesis.population.enriched").rename(
-        columns = { "household_income": "income" }
-    ).drop_duplicates("household_id")
-
-    df_households = df_households[[
-        "household_id",
-        "car_availability", "bike_availability",
-        "number_of_vehicles", "number_of_bikes",
-        "income",
-        "census_household_id"
-    ]]
-    if "csv" in output_formats:
-        df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
-    if "parquet" in output_formats:
-        df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix))
-
     # Prepare persons
     df_persons = context.stage("synthesis.population.enriched").rename(
         columns = { "has_license": "has_driving_license" }
@@ -106,9 +89,29 @@ def execute(context):
     df_activities["preceding_trip_index"] = df_activities["following_trip_index"].shift(1)
     df_activities.loc[df_activities["is_first"], "preceding_trip_index"] = -1
     df_activities["preceding_trip_index"] = df_activities["preceding_trip_index"].astype(int)
+    # Prepare spatial data sets
+    df_locations = context.stage("synthesis.population.spatial.locations")[[
+        "person_id",  "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry"
+    ]]
 
+    df_activities = pd.merge(df_activities, df_locations[[
+        "person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry"
+    ]], how = "left", on = ["person_id", "activity_index"])
+
+    # Prepare spatial activities
+    df_spatial = gpd.GeoDataFrame(df_activities[[
+            "person_id", "household_id", "activity_index",
+            "iris_id", "commune_id","departement_id","region_id",
+            "preceding_trip_index", "following_trip_index",
+            "purpose", "start_time", "end_time",
+            "is_first", "is_last", "geometry"
+        ]], crs = df_locations.crs)
+    df_spatial = df_spatial.astype({'purpose': 'str', "departement_id": 'str'})
+
+    # Write activities
     df_activities = df_activities[[
         "person_id", "household_id", "activity_index",
+        "iris_id", "commune_id","departement_id","region_id",
         "preceding_trip_index", "following_trip_index",
         "purpose", "start_time", "end_time",
         "is_first", "is_last"
@@ -119,6 +122,25 @@ def execute(context):
     if "parquet" in output_formats:
         df_activities.to_parquet("%s/%sactivities.parquet" % (output_path, output_prefix))
 
+    # Prepare households
+    df_households = context.stage("synthesis.population.enriched").rename(
+        columns = { "household_income": "income" }
+    ).drop_duplicates("household_id")
+
+    df_households = pd.merge(df_households,df_activities[df_activities["purpose"] == "home"][["household_id",
+        "iris_id", "commune_id","departement_id","region_id"]].drop_duplicates("household_id"),how="left")
+    df_households = df_households[[
+        "household_id","iris_id", "commune_id", "departement_id","region_id",
+        "car_availability", "bike_availability",
+        "number_of_vehicles", "number_of_bikes",
+        "income",
+        "census_household_id"
+    ]]
+    if "csv" in output_formats:
+        df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n")
+    if "parquet" in output_formats:
+        df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix))
+
     # Prepare trips
     df_trips = context.stage("synthesis.population.trips").rename(
         columns = {
@@ -170,18 +192,7 @@ def execute(context):
         df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix))
         df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix))
 
-    # Prepare spatial data sets
-    df_locations = context.stage("synthesis.population.spatial.locations")[[
-        "person_id", "activity_index", "geometry"
-    ]]
-
-    df_activities = pd.merge(df_activities, df_locations[[
-        "person_id", "activity_index", "geometry"
-    ]], how = "left", on = ["person_id", "activity_index"])
 
-    # Write spatial activities
-    df_spatial = gpd.GeoDataFrame(df_activities, crs = df_locations.crs)
-    df_spatial["purpose"] = df_spatial["purpose"].astype(str)
     if "gpkg" in output_formats:
         path = "%s/%sactivities.gpkg" % (output_path, output_prefix)
         df_spatial.to_file(path, driver = "GPKG")
@@ -194,7 +205,7 @@ def execute(context):
     df_spatial_homes = df_spatial[
         df_spatial["purpose"] == "home"
     ].drop_duplicates("household_id")[[
-        "household_id", "geometry"
+        "household_id","iris_id", "commune_id","departement_id","region_id", "geometry"
     ]]
     if "gpkg" in output_formats:
         path = "%s/%shomes.gpkg" % (output_path, output_prefix)
diff --git a/synthesis/population/spatial/locations.py b/synthesis/population/spatial/locations.py
index 5277fd19..2397e095 100644
--- a/synthesis/population/spatial/locations.py
+++ b/synthesis/population/spatial/locations.py
@@ -9,6 +9,7 @@ def configure(context):
 
     context.stage("synthesis.population.activities")
     context.stage("synthesis.population.sampled")
+    context.stage("data.spatial.iris")
 
 def execute(context):
     df_home = context.stage("synthesis.population.spatial.home.locations")
@@ -57,4 +58,10 @@ def execute(context):
     assert not df_locations["geometry"].isna().any()
     df_locations = gpd.GeoDataFrame(df_locations, crs = df_home.crs)
 
+    # add municipalities
+    df_iris = context.stage("data.spatial.iris")
+    df_iris = gpd.GeoDataFrame(df_iris, crs = df_home.crs)
+
+    df_locations = gpd.sjoin(df_locations,df_iris,how="left")
+
     return df_locations
diff --git a/tests/test_determinism.py b/tests/test_determinism.py
index e6ca821c..e2755d7a 100644
--- a/tests/test_determinism.py
+++ b/tests/test_determinism.py
@@ -68,8 +68,8 @@ def _test_determinism(index, data_path, tmpdir):
     synpp.run(stages, config, working_directory = cache_path)
 
     REFERENCE_CSV_HASHES = {
-        "ile_de_france_activities.csv":     "e520003e1876a9542ff1a955a6efcfdc",
-        "ile_de_france_households.csv":     "709ce7ded8a2487e6691d4fb3374754b",
+        "ile_de_france_activities.csv":     "53c44fb4026d2037729ee8ff1c8fb93f",
+        "ile_de_france_households.csv":     "ca2a29ef13467326f937638f1ff8be1a",
         "ile_de_france_persons.csv":        "ddbe9b418c915b14e888b54efbdf9b1e",
         "ile_de_france_trips.csv":          "6c5f3427e41e683da768eeb53796a806",
         "ile_de_france_vehicle_types.csv":  "00bee1ea6d7bc9af43ae6c7101dd75da",
@@ -77,9 +77,9 @@ def _test_determinism(index, data_path, tmpdir):
     }
 
     REFERENCE_GPKG_HASHES = {
-        "ile_de_france_activities.gpkg":    "9cf9a5fd8927c709927f7a940f86efbf",
+        "ile_de_france_activities.gpkg":    "884eec1fd0c29904284eb4362ff89be1",
         "ile_de_france_commutes.gpkg":      "5a4180390a69349cc655c07c5671e8d3",
-        "ile_de_france_homes.gpkg":         "033d1aa7a5350579cbd5e8213b9736f2",
+        "ile_de_france_homes.gpkg":         "a85e973f0e2f51031cd60170d351845e",
         "ile_de_france_trips.gpkg":         "d0aec4033cfc184bf1b91ae13a537ef8",
     }