From 66969ab776998927329b507d6c094c44d6cbf438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20H=C3=B6rl?= Date: Thu, 5 Sep 2024 19:51:02 +0200 Subject: [PATCH 1/3] feat: integrate vehicles by default (#233) * feat: integrate vehicles by default * bugfix * several fixes * fix tests * update standalone mode choice * add fix in fleet sampling * update changelog * some cleanup * additional cleanup * further fixes * update to latest pr commit * update commit * update commit for testing * update commit for testing * set final eqasim-java commit * update changelog --- CHANGELOG.md | 3 + config.yml | 7 +- data/vehicles/raw.py | 80 +++++++++++-------- docs/simulation.md | 39 ++++----- matsim/output.py | 25 +----- matsim/runtime/eqasim.py | 2 +- matsim/scenario/population.py | 29 ++++++- matsim/scenario/vehicles.py | 4 +- matsim/simulation/prepare.py | 18 ++--- synthesis/output.py | 12 +-- synthesis/vehicles/cars/default.py | 31 +++++++ .../vehicles.py => cars/fleet_sampling.py} | 10 ++- synthesis/vehicles/passengers/default.py | 31 +++++++ synthesis/vehicles/selected.py | 11 --- synthesis/vehicles/vehicles.py | 22 +++++ tests/test_determinism.py | 5 +- tests/test_pipeline.py | 15 ++-- tests/test_simulation.py | 1 + tests/testdata.py | 26 +++--- 19 files changed, 232 insertions(+), 139 deletions(-) create mode 100644 synthesis/vehicles/cars/default.py rename synthesis/vehicles/{fleet_sample/vehicles.py => cars/fleet_sampling.py} (93%) create mode 100644 synthesis/vehicles/passengers/default.py delete mode 100644 synthesis/vehicles/selected.py create mode 100644 synthesis/vehicles/vehicles.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 10f8649e..b3f30918 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ **Under development** +- chore: update to `eqasim-java` commit `ece4932` +- feat: vehicles and vehicle types are now always generated +- feat: read vehicles data from zip files - feat : option parameter to remove filtering for requesting departements in hts - fix: secondary location model used same random seed in every parallel thread - feat: add a new method for attributing income to housholds using the bhepop2 package diff --git a/config.yml b/config.yml index 10d794da..95be71a5 100644 --- a/config.yml +++ b/config.yml @@ -31,10 +31,5 @@ config: # Activate if you want to run mode choice mode_choice: false - # Uncommented below to enable vehicle fleet generation - # generate_vehicles_file: True - # generate_vehicles_method: fleet_sample - # vehicles_data_year: 2015 - # Uncomment to use the bhepop2 package for attributing income - # income_assignation_method: bhepop2 \ No newline at end of file + # income_assignation_method: bhepop2 diff --git a/data/vehicles/raw.py b/data/vehicles/raw.py index 37721432..95a9fc31 100644 --- a/data/vehicles/raw.py +++ b/data/vehicles/raw.py @@ -1,7 +1,8 @@ import numpy as np import pandas as pd -import mock +import mock, os, glob from openpyxl.reader import excel +import zipfile """ This stage loads the raw data of the specified vehicle fleet data @@ -10,60 +11,73 @@ def configure(context): context.config("data_path") - context.config("vehicles_data_year", 2015) + context.config("vehicles_path", "vehicles") + context.config("vehicles_year", 2021) context.stage("data.spatial.codes") def execute(context): - - year = context.config("vehicles_data_year") - df_codes = context.stage("data.spatial.codes") # the downloaded excel files meta-data are actually have a badly formatted ISO datetime # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1659 with mock.patch.object(excel.ExcelReader, 'read_properties', lambda self: None): - df_vehicle_com_counts = pd.read_excel( - "%s/vehicles_%s/Parc_VP_Communes_%s.xlsx" % (context.config("data_path"), year, year) - ) - df_vehicle_reg_counts = pd.read_excel( - "%s/vehicles_%s/Parc_VP_Regions_%s.xlsx" % (context.config("data_path"), year, year) - ) + year = str(context.config("vehicles_year")) + + with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip")) as archive: + with archive.open("Parc_VP_Communes_{}.xlsx".format(year)) as f: + df_municipalities = pd.read_excel(f) + + with zipfile.ZipFile("{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip")) as archive: + with archive.open("Parc_VP_Regions_{}.xlsx".format(year)) as f: + df_regions = pd.read_excel(f) - df_vehicle_com_counts["region_id"] = df_vehicle_com_counts["Code région"].astype("category") - df_vehicle_com_counts["departement_id"] = df_vehicle_com_counts["Code départment"].astype("category") - df_vehicle_com_counts["commune_id"] = df_vehicle_com_counts["Code commune"].astype("category") + df_municipalities["region_id"] = df_municipalities["Code région"].astype("category") + df_municipalities["departement_id"] = df_municipalities["Code départment"].astype("category") + df_municipalities["commune_id"] = df_municipalities["Code commune"].astype("category") - df_vehicle_reg_counts["region_id"] = df_vehicle_reg_counts["Code région"].astype("category") + df_regions["region_id"] = df_regions["Code région"].astype("category") requested_departements = set(df_codes["departement_id"].unique()) requested_regions = set(df_codes["region_id"].astype(str).unique()) if len(requested_departements) > 0: - df_vehicle_com_counts = df_vehicle_com_counts[df_vehicle_com_counts["departement_id"].isin(requested_departements)] + df_municipalities = df_municipalities[df_municipalities["departement_id"].isin(requested_departements)] if len(requested_regions) > 0: - df_vehicle_reg_counts = df_vehicle_reg_counts[df_vehicle_reg_counts["region_id"].isin(requested_regions)] + df_regions = df_regions[df_regions["region_id"].isin(requested_regions)] + + df_municipalities["region_id"] = df_municipalities["region_id"].cat.remove_unused_categories() + df_municipalities["departement_id"] = df_municipalities["departement_id"].cat.remove_unused_categories() + df_municipalities["commune_id"] = df_municipalities["commune_id"].cat.remove_unused_categories() - df_vehicle_com_counts["region_id"] = df_vehicle_com_counts["region_id"].cat.remove_unused_categories() - df_vehicle_com_counts["departement_id"] = df_vehicle_com_counts["departement_id"].cat.remove_unused_categories() - df_vehicle_com_counts["commune_id"] = df_vehicle_com_counts["commune_id"].cat.remove_unused_categories() + df_regions["region_id"] = df_regions["region_id"].cat.remove_unused_categories() - df_vehicle_reg_counts["region_id"] = df_vehicle_reg_counts["region_id"].cat.remove_unused_categories() + df_municipalities["critair"] = df_municipalities["Vignette Crit'air"] + df_municipalities["technology"] = df_municipalities["Energie"] - df_vehicle_com_counts["critair"] = df_vehicle_com_counts["Vignette Crit'air"] - df_vehicle_com_counts["technology"] = df_vehicle_com_counts["Energie"] + df_regions["critair"] = df_regions["Vignette crit'air"] + df_regions["technology"] = df_regions["Energie"] - df_vehicle_reg_counts["critair"] = df_vehicle_reg_counts["Vignette crit'air"] - df_vehicle_reg_counts["technology"] = df_vehicle_reg_counts["Energie"] + count_column_name = "Parc au 01/01/%s" % context.config("vehicles_year") + age_column_name = "Age au 01/01/%s" % context.config("vehicles_year") - count_column_name = "Parc au 01/01/%s" % context.config("vehicles_data_year") - age_column_name = "Age au 01/01/%s" % context.config("vehicles_data_year") + df_municipalities["fleet"] = df_municipalities[count_column_name] + df_regions["fleet"] = df_regions[count_column_name] + df_regions["age"] = df_regions[age_column_name] - df_vehicle_com_counts["fleet"] = df_vehicle_com_counts[count_column_name] - df_vehicle_reg_counts["fleet"] = df_vehicle_reg_counts[count_column_name] - df_vehicle_reg_counts["age"] = df_vehicle_reg_counts[age_column_name] + df_vehicle_fleet_counts = df_municipalities.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna() + df_vehicle_age_counts = df_regions.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna() - df_vehicle_fleet_counts = df_vehicle_com_counts.groupby(["region_id", "commune_id", "critair","technology"])["fleet"].sum().reset_index().dropna() - df_vehicle_age_counts = df_vehicle_reg_counts.groupby(["region_id", "critair", "technology", "age"])["fleet"].sum().reset_index().dropna() + return df_vehicle_fleet_counts, df_vehicle_age_counts + +def validate(context): + municipalities_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_communes.zip") + regions_path = "{}/{}/{}".format(context.config("data_path"), context.config("vehicles_path"), "parc_vp_regions.zip") + + if not os.path.exists(municipalities_path): + raise RuntimeError("Municipalities vehicle data is not available at {}".format(municipalities_path)) + + if not os.path.exists(regions_path): + raise RuntimeError("Regions vehicle data is not available at {}".format(regions_path)) - return df_vehicle_fleet_counts, df_vehicle_age_counts \ No newline at end of file + return os.path.getsize(municipalities_path) + os.path.getsize(regions_path) diff --git a/docs/simulation.md b/docs/simulation.md index 20efaa9d..d5cae631 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -127,36 +127,31 @@ config: ## Using MATSim's emissions contrib -You can calculate air pollution emissions using matsim by using some additional data. +In order to use a detailed emissions analysis, you need to let the pipeline generate a meaningful vehicle fleet. Data on the private vehicle stock across France are available from the Ministry of Ecology: -You must download the crit'air data from this site : https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021 +- [Vehicle stock data](https://www.statistiques.developpement-durable.gouv.fr/donnees-sur-le-parc-automobile-francais-au-1er-janvier-2021) +- Click on *Données sur les voitures particulières* (first tab) to get information on the private vehicles +- Download *Données régionales des voitures particulières - 2011 à 2021* +- Download *Données communales des voitures particulières - 2011 à 2021* +- Put both zip files into `data/vehicles` +In the `config.yml`, you must enable the vehicle fleet generation : -You should download both files : - - - Données régionales des voitures particulières - 2011 à 2021 (zip, 1.79 Mo) - - Données communales des voitures particulières - 2011 à 2021 (zip, 130.33 Mo) +```yaml +config: + vehicles_method: fleet_sample +``` -Inside the zip you'll find one data file per year, you can extract the files concerning the year you're intereseted in (let's use `2015` for this exemple). Then unzip and place them in a `data/vehicles_2015/`. +After doing so, the `vehicles.xml.gz` and `vehicle_types.xml.gz` in the output will not only contain default vehicles and vehicle types, but realistic ones, based on the regional probabilities. -Then, in the `config.yml`, you must enable the vehicle fleet generation : +You can also choose to generate vehicles for a different year. The 2021 edition ZIP, for instance, contains all the years from 2012 and newer editions will contain more recent years. You can choose the year by setting: ```yaml -# ... - config: - generate_vehicles_file: True - generate_vehicles_method: fleet_sample - vehicles_data_year: 2015 - -# ... + vehicles_year: 2015 ``` -You should end up, at the end of the `matsim.output` stage, with a vechicles.xml file. - -After you run the full simulation, you'll be able to use some classes defined in `eqasim-java` to analyse and compute emissions based on the MATSim outputs. - -for exemple : +Once have run a full simulation, you'll be able to use some classes defined in `eqasim-java` to analyse and compute emissions based on the MATSim outputs. For example: ```bash java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunComputeEmissionsEvents --config-path config.xml --hbefa-cold-avg ./EFA_ColdStart_Vehcat_2015_Cold_Average.csv --hbefa-hot-avg ./EFA_HOT_Vehcat_2015_Hot_Average.csv --hbefa-cold-detailed ./EFA_ColdStart_Subsegm_2015_Cold_Detailed.csv --hbefa-hot-detailed ./EFA_HOT_Subsegm_2015_Hot_Detailed.csv @@ -170,6 +165,4 @@ java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunExportEmi java -cp ile_de_france-1.0.6.jar org.eqasim.ile_de_france.emissions.RunComputeEmissionsGrid --config-path config.xml --domain-shp-path idf_2154.shp ``` -Please note that you need a copy of the HBEFA database in order to run those. - -For further information you can look at [eqasim-java](https://github.com/eqasim-org/eqasim-java) and [matsim-libs/contribs/emissions](https://github.com/matsim-org/matsim-libs/tree/master/contribs/emissions) +Please note that you need a copy of the HBEFA database in order to run those. For further information you can look at [eqasim-java](https://github.com/eqasim-org/eqasim-java) and [matsim-libs/contribs/emissions](https://github.com/matsim-org/matsim-libs/tree/master/contribs/emissions) diff --git a/matsim/output.py b/matsim/output.py index 1ef3b459..2f616403 100644 --- a/matsim/output.py +++ b/matsim/output.py @@ -11,7 +11,6 @@ def configure(context): context.config("output_path") context.config("output_prefix", "ile_de_france_") context.config("write_jar", True) - context.config("generate_vehicles_file", False) need_osm = context.config("export_detailed_network", False) if need_osm: context.stage("matsim.scenario.supply.osm") @@ -28,6 +27,7 @@ def execute(context): file_names = [ "%shouseholds.xml.gz" % context.config("output_prefix"), "%spopulation.xml.gz" % context.config("output_prefix"), + "%svehicles.xml.gz" % context.config("output_prefix"), "%sfacilities.xml.gz" % context.config("output_prefix"), "%snetwork.xml.gz" % context.config("output_prefix"), "%stransit_schedule.xml.gz" % context.config("output_prefix"), @@ -35,29 +35,6 @@ def execute(context): "%sconfig.xml" % context.config("output_prefix") ] - if context.config("generate_vehicles_file"): - vehicle_file = "%svehicles.xml.gz" % context.config("output_prefix") - - # it would make more sense to modify this in the eqasim-java part (in org.eqasim.core.scenario.config) - # but it's not obvious how to preserve backward compatibility hence the following method : - config_file = "%sconfig.xml" % context.config("output_prefix") - with open( "%s/%s" % (context.path("matsim.simulation.prepare"), config_file)) as f_read: - content = f_read.read() - content = content.replace( - '', - '' % vehicle_file - ) - content = content.replace( - '', - '' - ) - with open("%s/%s" % (context.config("output_path"), config_file), "w+") as f_write: - f_write.write(content) - - file_names.append(vehicle_file) - # since we did a copy & modify, no need to copy it again - file_names.remove(config_file) - for name in file_names: shutil.copy( "%s/%s" % (context.path("matsim.simulation.prepare"), name), diff --git a/matsim/runtime/eqasim.py b/matsim/runtime/eqasim.py index 5dc2ffe3..72e4846e 100644 --- a/matsim/runtime/eqasim.py +++ b/matsim/runtime/eqasim.py @@ -7,7 +7,7 @@ DEFAULT_EQASIM_VERSION = "1.5.0" DEFAULT_EQASIM_BRANCH = "develop" -DEFAULT_EQASIM_COMMIT = "73ac087" +DEFAULT_EQASIM_COMMIT = "ece4932" def configure(context): context.stage("matsim.runtime.git") diff --git a/matsim/scenario/population.py b/matsim/scenario/population.py index 2f19f9e9..2fc0fa4d 100644 --- a/matsim/scenario/population.py +++ b/matsim/scenario/population.py @@ -14,6 +14,7 @@ def configure(context): context.stage("synthesis.population.spatial.locations") context.stage("synthesis.population.trips") + context.stage("synthesis.vehicles.vehicles") PERSON_FIELDS = [ "person_id", "household_income", "car_availability", "bike_availability", @@ -31,7 +32,11 @@ def configure(context): "person_id", "mode", "departure_time", "travel_time" ] -def add_person(writer, person, activities, trips): +VEHICLE_FIELDS = [ + "owner_id", "vehicle_id", "mode" +] + +def add_person(writer, person, activities, trips, vehicles): writer.start_person(person[PERSON_FIELDS.index("person_id")]) writer.start_attributes() @@ -56,6 +61,11 @@ def add_person(writer, person, activities, trips): writer.add_attribute("employed", "java.lang.String", person[PERSON_FIELDS.index("employed")]) writer.add_attribute("sex", "java.lang.String", person[PERSON_FIELDS.index("sex")][0]) + writer.add_attribute("vehicles", "org.matsim.vehicles.PersonVehicles", "{{{content}}}".format(content = ",".join([ + "\"{mode}\":\"{id}\"".format(mode = v[VEHICLE_FIELDS.index("mode")], id = v[VEHICLE_FIELDS.index("vehicle_id")]) + for v in vehicles + ]))) + writer.end_attributes() writer.start_plan(selected = True) @@ -108,6 +118,9 @@ def execute(context): df_trips = context.stage("synthesis.population.trips") df_trips["travel_time"] = df_trips["arrival_time"] - df_trips["departure_time"] + df_vehicles = context.stage("synthesis.vehicles.vehicles")[1] + df_vehicles = df_vehicles.sort_values(by = ["owner_id"]) + with gzip.open(output_path, 'wb+') as writer: with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer: writer = writers.PopulationWriter(writer) @@ -115,6 +128,7 @@ def execute(context): activity_iterator = backlog_iterator(iter(df_activities[ACTIVITY_FIELDS].itertuples(index = False))) trip_iterator = backlog_iterator(iter(df_trips[TRIP_FIELDS].itertuples(index = False))) + vehicle_iterator = backlog_iterator(iter(df_vehicles[VEHICLE_FIELDS].itertuples(index = False))) with context.progress(total = len(df_persons), label = "Writing population ...") as progress: for person in df_persons.itertuples(index = False): @@ -122,6 +136,7 @@ def execute(context): activities = [] trips = [] + vehicles = [] # Track all activities for person while activity_iterator.has_next(): @@ -147,7 +162,17 @@ def execute(context): assert len(trips) == len(activities) - 1 - add_person(writer, person, activities, trips) + # Track all vehicles for person + while vehicle_iterator.has_next(): + vehicle = vehicle_iterator.next() + + if not vehicle[VEHICLE_FIELDS.index("owner_id")] == person_id: + vehicle_iterator.previous() + break + else: + vehicles.append(vehicle) + + add_person(writer, person, activities, trips, vehicles) progress.update() writer.end_population() diff --git a/matsim/scenario/vehicles.py b/matsim/scenario/vehicles.py index d9ecbaee..63205fc3 100644 --- a/matsim/scenario/vehicles.py +++ b/matsim/scenario/vehicles.py @@ -6,7 +6,7 @@ import matsim.writers as writers def configure(context): - context.stage("synthesis.vehicles.selected") + context.stage("synthesis.vehicles.vehicles") TYPE_FIELDS = ["type_id", "nb_seats", "length", "width", "pce", "mode"] VEHICLE_FIELDS = ["vehicle_id", "type_id", "critair", "technology", "age", "euro"] @@ -14,7 +14,7 @@ def configure(context): def execute(context): output_path = "%s/vehicles.xml.gz" % context.path() - df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.selected") + df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles") with gzip.open(output_path, 'wb+') as writer: with io.BufferedWriter(writer, buffer_size = 2 * 1024**3) as writer: diff --git a/matsim/simulation/prepare.py b/matsim/simulation/prepare.py index 8cb41af9..7a73e6d8 100644 --- a/matsim/simulation/prepare.py +++ b/matsim/simulation/prepare.py @@ -8,9 +8,7 @@ def configure(context): context.stage("matsim.scenario.population") context.stage("matsim.scenario.households") - - if context.config("generate_vehicles_file", False): - context.stage("matsim.scenario.vehicles") + context.stage("matsim.scenario.vehicles") context.stage("matsim.scenario.facilities") context.stage("matsim.scenario.supply.processed") @@ -78,12 +76,11 @@ def execute(context): ) shutil.copy(transit_vehicles_path, "%s/%stransit_vehicles.xml.gz" % (context.cache_path, context.config("output_prefix"))) - if context.config("generate_vehicles_file"): - vehicles_path = "%s/%s" % ( - context.path("matsim.scenario.vehicles"), - context.stage("matsim.scenario.vehicles") - ) - shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix"))) + vehicles_path = "%s/%s" % ( + context.path("matsim.scenario.vehicles"), + context.stage("matsim.scenario.vehicles") + ) + shutil.copy(vehicles_path, "%s/%svehicles.xml.gz" % (context.cache_path, context.config("output_prefix"))) # Generate base configuration eqasim.run(context, "org.eqasim.core.scenario.config.RunGenerateConfig", [ @@ -98,7 +95,8 @@ def execute(context): # Adapt config for Île-de-France eqasim.run(context, "org.eqasim.ile_de_france.scenario.RunAdaptConfig", [ "--input-path", "generic_config.xml", - "--output-path", "%sconfig.xml" % context.config("output_prefix") + "--output-path", "%sconfig.xml" % context.config("output_prefix"), + "--prefix", context.config("output_prefix") ]) assert os.path.exists("%s/%sconfig.xml" % (context.path(), context.config("output_prefix"))) diff --git a/synthesis/output.py b/synthesis/output.py index b970e59b..1c47962f 100644 --- a/synthesis/output.py +++ b/synthesis/output.py @@ -13,8 +13,7 @@ def configure(context): context.stage("synthesis.population.activities") context.stage("synthesis.population.trips") - if context.config("generate_vehicles_file", False): - context.stage("synthesis.vehicles.selected") + context.stage("synthesis.vehicles.vehicles") context.stage("synthesis.population.spatial.locations") @@ -161,12 +160,15 @@ def execute(context): if "parquet" in output_formats: df_trips.to_csv("%s/%strips.parquet" % (output_path, output_prefix)) - if context.config("generate_vehicles_file"): - # Prepare vehicles - df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.selected") + # Prepare vehicles + df_vehicle_types, df_vehicles = context.stage("synthesis.vehicles.vehicles") + if "csv" in output_formats: df_vehicle_types.to_csv("%s/%svehicle_types.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n") df_vehicles.to_csv("%s/%svehicles.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n") + if "parquet" in output_formats: + df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix)) + df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix)) # Prepare spatial data sets df_locations = context.stage("synthesis.population.spatial.locations")[[ diff --git a/synthesis/vehicles/cars/default.py b/synthesis/vehicles/cars/default.py new file mode 100644 index 00000000..1bf32836 --- /dev/null +++ b/synthesis/vehicles/cars/default.py @@ -0,0 +1,31 @@ +import re +import pandas as pd + +""" +Creates a vehicle fleet based on a default vehicle type +""" + +def configure(context): + context.stage("synthesis.population.enriched") + +def execute(context): + df_persons = context.stage("synthesis.population.enriched") + + df_vehicle_types = pd.DataFrame.from_records([{ + "type_id": "default_car", "nb_seats": 4, "length": 5.0, "width": 1.0, "pce": 1.0, "mode": "car", + "hbefa_cat": "PASSENGER_CAR", "hbefa_tech": "average", "hbefa_size": "average", "hbefa_emission": "average", + }]) + + df_vehicles = df_persons[["person_id"]].copy() + df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" }) + + df_vehicles["mode"] = "car" + + df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car" + df_vehicles["type_id"] = "default_car" + df_vehicles["critair"] = "Crit'air 1" + df_vehicles["technology"] = "Gazole" + df_vehicles["age"] = 0 + df_vehicles["euro"] = 6 + + return df_vehicle_types, df_vehicles \ No newline at end of file diff --git a/synthesis/vehicles/fleet_sample/vehicles.py b/synthesis/vehicles/cars/fleet_sampling.py similarity index 93% rename from synthesis/vehicles/fleet_sample/vehicles.py rename to synthesis/vehicles/cars/fleet_sampling.py index fd2f4128..dcd20a5a 100644 --- a/synthesis/vehicles/fleet_sample/vehicles.py +++ b/synthesis/vehicles/cars/fleet_sampling.py @@ -13,11 +13,11 @@ def configure(context): context.stage("data.vehicles.raw") context.stage("data.vehicles.types") - context.config("vehicles_data_year", 2015) + context.config("vehicles_year", 2021) def _sample_vehicle(context, args): vehicle = args - year = context.config("vehicles_data_year") + year = context.config("vehicles_year") df_vehicle_fleet_counts, df_vehicle_age_counts = context.data("fleet"), context.data("age") commune_id = vehicle["commune_id"] @@ -120,9 +120,11 @@ def execute(context): df_vehicles = pd.merge(df_persons[["household_id", "person_id"]], df_homes[["household_id", "commune_id"]], on = "household_id") - df_vehicles = df_vehicles.rename(columns = { "person_id": "vehicle_id" }) - df_vehicles = df_vehicles.drop_duplicates("vehicle_id") + df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" }) + df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car" + df_vehicles = df_vehicles.drop_duplicates("vehicle_id") # is this needed? df_vehicles["type_id"] = "default_car" + df_vehicles["mode"] = "car" df_vehicle_fleet_counts, df_vehicle_age_counts = context.stage("data.vehicles.raw") diff --git a/synthesis/vehicles/passengers/default.py b/synthesis/vehicles/passengers/default.py new file mode 100644 index 00000000..6916f5bb --- /dev/null +++ b/synthesis/vehicles/passengers/default.py @@ -0,0 +1,31 @@ +import re +import pandas as pd + +""" +Creates a vehicle fleet based on a default vehicle type for the dummy passenger mode +""" + +def configure(context): + context.stage("synthesis.population.enriched") + +def execute(context): + df_persons = context.stage("synthesis.population.enriched") + + df_vehicle_types = pd.DataFrame.from_records([{ + "type_id": "default_car_passenger", "nb_seats": 4, "length": 5.0, "width": 1.0, "pce": 1.0, "mode": "car_passenger", + "hbefa_cat": "PASSENGER_CAR", "hbefa_tech": "average", "hbefa_size": "average", "hbefa_emission": "average", + }]) + + df_vehicles = df_persons[["person_id"]].copy() + df_vehicles = df_vehicles.rename(columns = { "person_id": "owner_id" }) + + df_vehicles["mode"] = "car_passenger" + + df_vehicles["vehicle_id"] = df_vehicles["owner_id"].astype(str) + ":car_passenger" + df_vehicles["type_id"] = "default_car_passenger" + df_vehicles["critair"] = "Crit'air 1" + df_vehicles["technology"] = "Gazole" + df_vehicles["age"] = 0 + df_vehicles["euro"] = 6 + + return df_vehicle_types, df_vehicles \ No newline at end of file diff --git a/synthesis/vehicles/selected.py b/synthesis/vehicles/selected.py deleted file mode 100644 index 6f558858..00000000 --- a/synthesis/vehicles/selected.py +++ /dev/null @@ -1,11 +0,0 @@ - -def configure(context): - method = context.config("generate_vehicles_method") - - if method == "fleet_sample": - context.stage("synthesis.vehicles.fleet_sample.vehicles", alias = "vehicles") - else: - raise RuntimeError("Unknown vehicles generation method : %s" % method) - -def execute(context): - return context.stage("vehicles") diff --git a/synthesis/vehicles/vehicles.py b/synthesis/vehicles/vehicles.py new file mode 100644 index 00000000..922cd36c --- /dev/null +++ b/synthesis/vehicles/vehicles.py @@ -0,0 +1,22 @@ +import pandas as pd + +def configure(context): + method = context.config("vehicles_method", "default") + + if method == "default": + context.stage("synthesis.vehicles.cars.default", alias = "cars") + elif method == "fleet_sample": + context.stage("synthesis.vehicles.cars.fleet_sampling", alias = "cars") + else: + raise RuntimeError("Unknown vehicles generation method : %s" % method) + + context.stage("synthesis.vehicles.passengers.default") + +def execute(context): + df_car_types, df_cars = context.stage("cars") + df_passenger_types, df_passengers = context.stage("synthesis.vehicles.passengers.default") + + df_vehicles = pd.concat([df_cars, df_passengers]) + df_types = pd.concat([df_car_types, df_passenger_types]) + + return df_types, df_vehicles diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 763e567e..e6ca821c 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -72,6 +72,8 @@ def _test_determinism(index, data_path, tmpdir): "ile_de_france_households.csv": "709ce7ded8a2487e6691d4fb3374754b", "ile_de_france_persons.csv": "ddbe9b418c915b14e888b54efbdf9b1e", "ile_de_france_trips.csv": "6c5f3427e41e683da768eeb53796a806", + "ile_de_france_vehicle_types.csv": "00bee1ea6d7bc9af43ae6c7101dd75da", + "ile_de_france_vehicles.csv": "3567b0f29e51d521b13d91c82c77cecb", } REFERENCE_GPKG_HASHES = { @@ -133,7 +135,8 @@ def _test_determinism_matsim(index, data_path, tmpdir): #"ile_de_france_network.xml.gz": "5f10ec295b49d2bb768451c812955794", "ile_de_france_households.xml.gz": "64a0c9fab72aad51bc6adb926a1c9d44", #"ile_de_france_facilities.xml.gz": "5ad41afff9ae5c470082510b943e6778", - "ile_de_france_config.xml": "481fac5fb3e7b90810caa38ff460c00a" + "ile_de_france_config.xml": "30871dfbbd2b5bf6922be1dfe20ffe73", + "ile_de_france_vehicles.xml.gz": "d7c8d0dba531a21dc83355b2f82778c2" } # activities.gpkg, trips.gpkg, meta.json, diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 01bd6448..d9856f52 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -66,10 +66,12 @@ def run_population(tmpdir, hts, update = {}): assert 2235 == len(pd.read_csv("%s/ile_de_france_activities.csv" % output_path, usecols = ["household_id"], sep = ";")) assert 447 == len(pd.read_csv("%s/ile_de_france_persons.csv" % output_path, usecols = ["household_id"], sep = ";")) assert 149 == len(pd.read_csv("%s/ile_de_france_households.csv" % output_path, usecols = ["household_id"], sep = ";")) - - if "generate_vehicles_file" in update and update["generate_vehicles_file"]: - assert 17 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";")) - assert 447 == len(pd.read_csv("%s/ile_de_france_vehicles.csv" % output_path, usecols = ["vehicle_id"], sep = ";")) + + assert 447 * 2 == len(pd.read_csv("%s/ile_de_france_vehicles.csv" % output_path, usecols = ["vehicle_id"], sep = ";")) + if "vehicles_method" in update and update["vehicles_method"] == "fleet_sample": + assert 17 + 1 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";")) + else: + assert 2 == len(pd.read_csv("%s/ile_de_france_vehicle_types.csv" % output_path, usecols = ["type_id"], sep = ";")) def test_population_with_entd(tmpdir): run_population(tmpdir, "entd") @@ -82,9 +84,8 @@ def test_population_with_mode_choice(tmpdir): def test_population_with_fleet_sample(tmpdir): run_population(tmpdir, "entd", { - "generate_vehicles_file": True, - "generate_vehicles_method": "fleet_sample", - "vehicles_data_year": 2015 + "vehicles_method": "fleet_sample", + "vehicles_year": 2021 }) def test_population_with_bhepop2_income(tmpdir): diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 6056b3e5..e31d6be9 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -30,3 +30,4 @@ def test_simulation(tmpdir): assert os.path.isfile("%s/ile_de_france_transit_vehicles.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_households.xml.gz" % output_path) assert os.path.isfile("%s/ile_de_france_facilities.xml.gz" % output_path) + assert os.path.isfile("%s/ile_de_france_vehicles.xml.gz" % output_path) diff --git a/tests/testdata.py b/tests/testdata.py index ba09224e..e00d1b86 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -877,29 +877,29 @@ def create(output_path): df["region"].unique(), np.arange(20), ], names = [ - "Code région", "Age au 01/01/2015" + "Code région", "Age au 01/01/2021" ])).reset_index() # to enforce string df_vehicles_region = pd.concat([df_vehicles_region, pd.DataFrame({ "Code région": ["AB"], - "Age au 01/01/2015": [0], + "Age au 01/01/2021": [0], })]) df_vehicles_region["Code région"] = df_vehicles_region["Code région"].astype(str) - df_vehicles_region["Parc au 01/01/2015"] = 100 + df_vehicles_region["Parc au 01/01/2021"] = 100 df_vehicles_region["Energie"] = "Gazole" df_vehicles_region["Vignette crit'air"] = "Crit'air 1" - df_vehicles_region["Age au 01/01/2015"] = df_vehicles_region["Age au 01/01/2015"].astype(str) - df_vehicles_region["Age au 01/01/2015"] = df_vehicles_region["Age au 01/01/2015"].replace("20", ">20") - df_vehicles_region["Age au 01/01/2015"] = df_vehicles_region["Age au 01/01/2015"] + " ans" + df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"].astype(str) + df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"].replace("20", ">20") + df_vehicles_region["Age au 01/01/2021"] = df_vehicles_region["Age au 01/01/2021"] + " ans" df_vehicles_commune = pd.DataFrame({ "municipality": df["municipality"].unique() }) - df_vehicles_commune["Parc au 01/01/2015"] = 100 + df_vehicles_commune["Parc au 01/01/2021"] = 100 df_vehicles_commune["Energie"] = "Gazole" df_vehicles_commune["Vignette Crit'air"] = "Crit'air 1" @@ -913,9 +913,15 @@ def create(output_path): "region": "Code région", }) - os.mkdir("%s/vehicles_2015" % output_path) - df_vehicles_region.to_excel("%s/vehicles_2015/Parc_VP_Regions_2015.xlsx" % output_path) - df_vehicles_commune.to_excel("%s/vehicles_2015/Parc_VP_Communes_2015.xlsx" % output_path) + os.mkdir("%s/vehicles" % output_path) + + with zipfile.ZipFile("%s/vehicles/parc_vp_regions.zip" % output_path, "w") as archive: + with archive.open("Parc_VP_Regions_2021.xlsx", "w") as f: + df_vehicles_region.to_excel(f) + + with zipfile.ZipFile("%s/vehicles/parc_vp_communes.zip" % output_path, "w") as archive: + with archive.open("Parc_VP_Communes_2021.xlsx", "w") as f: + df_vehicles_commune.to_excel(f) if __name__ == "__main__": import shutil From 550f3433329bf006725b03e880830cf5d50eebe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20H=C3=B6rl?= Date: Sat, 14 Sep 2024 09:55:27 +0200 Subject: [PATCH 2/3] chore: add delay into data verification (#256) * chore: add delay into data verification * triggering verification * printing the error to see what is going on * add a timeout of 2 minutes * trying to add retries * update * revert --- scripts/verify_data.py | 44 +++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/scripts/verify_data.py b/scripts/verify_data.py index ce56f31e..93b77d4f 100644 --- a/scripts/verify_data.py +++ b/scripts/verify_data.py @@ -1,8 +1,13 @@ import requests +import time # The goal of this script is to verify the availability of the data # that is needed to set up the pipeline +sleep_time = 5 # seconds +timeout = 30 # seconds +retries = 3 + class Report: def __init__(self): self.sources = [] @@ -13,19 +18,32 @@ def register(self, name, url): def validate(self): failed = [] - for index, source in enumerate(self.sources): - print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"])) - - try: - response = requests.head(source["url"]) - source["status"] = response.status_code - except: - source["status"] = "error" - - print(" Status {}".format(source["status"])) - - if source["status"] != 200: - failed.append(source["name"]) + with requests.Session() as session: + session.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0" }) + for index, source in enumerate(self.sources): + print("[{}/{}] Checking {} ...".format(index + 1, len(self.sources), source["name"])) + + retry = 0 + success = False + + while not success and retry < retries: + try: + response = session.head(source["url"], timeout = timeout) + source["status"] = response.status_code + success = True + except TimeoutError: + source["status"] = "timeout" + except Exception as e: + source["status"] = "error" + print(e) + + retry += 1 + print(" Status {} (retry {}/{})".format(source["status"], retry, retries)) + + time.sleep(sleep_time) + + if source["status"] != 200: + failed.append(source["name"]) print("Done.") print("Missing: ", len(failed)) From e82ae98861b85ff93086cc1f4f7c143cf2101589 Mon Sep 17 00:00:00 2001 From: MarieMcLaurent <117629025+MarieMcLaurent@users.noreply.github.com> Date: Mon, 23 Sep 2024 14:22:11 +0200 Subject: [PATCH 3/3] feat: add municipality info (#258) * feat: addition municipalities info to households and activities * upadate tests & improve municipalities for house * first try correction test & changelog * fix: test gpkg hashes --------- Co-authored-by: Marie Laurent --- CHANGELOG.md | 1 + synthesis/output.py | 69 +++++++++++++---------- synthesis/population/spatial/locations.py | 7 +++ tests/test_determinism.py | 8 +-- 4 files changed, 52 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3f30918..468795dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ **Under development** +- feat: add municipality information to households and activities - chore: update to `eqasim-java` commit `ece4932` - feat: vehicles and vehicle types are now always generated - feat: read vehicles data from zip files diff --git a/synthesis/output.py b/synthesis/output.py index 1c47962f..84c52a36 100644 --- a/synthesis/output.py +++ b/synthesis/output.py @@ -62,23 +62,6 @@ def execute(context): output_prefix = context.config("output_prefix") output_formats = context.config("output_formats") - # Prepare households - df_households = context.stage("synthesis.population.enriched").rename( - columns = { "household_income": "income" } - ).drop_duplicates("household_id") - - df_households = df_households[[ - "household_id", - "car_availability", "bike_availability", - "number_of_vehicles", "number_of_bikes", - "income", - "census_household_id" - ]] - if "csv" in output_formats: - df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n") - if "parquet" in output_formats: - df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix)) - # Prepare persons df_persons = context.stage("synthesis.population.enriched").rename( columns = { "has_license": "has_driving_license" } @@ -106,9 +89,29 @@ def execute(context): df_activities["preceding_trip_index"] = df_activities["following_trip_index"].shift(1) df_activities.loc[df_activities["is_first"], "preceding_trip_index"] = -1 df_activities["preceding_trip_index"] = df_activities["preceding_trip_index"].astype(int) + # Prepare spatial data sets + df_locations = context.stage("synthesis.population.spatial.locations")[[ + "person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry" + ]] + df_activities = pd.merge(df_activities, df_locations[[ + "person_id", "iris_id", "commune_id","departement_id","region_id","activity_index", "geometry" + ]], how = "left", on = ["person_id", "activity_index"]) + + # Prepare spatial activities + df_spatial = gpd.GeoDataFrame(df_activities[[ + "person_id", "household_id", "activity_index", + "iris_id", "commune_id","departement_id","region_id", + "preceding_trip_index", "following_trip_index", + "purpose", "start_time", "end_time", + "is_first", "is_last", "geometry" + ]], crs = df_locations.crs) + df_spatial = df_spatial.astype({'purpose': 'str', "departement_id": 'str'}) + + # Write activities df_activities = df_activities[[ "person_id", "household_id", "activity_index", + "iris_id", "commune_id","departement_id","region_id", "preceding_trip_index", "following_trip_index", "purpose", "start_time", "end_time", "is_first", "is_last" @@ -119,6 +122,25 @@ def execute(context): if "parquet" in output_formats: df_activities.to_parquet("%s/%sactivities.parquet" % (output_path, output_prefix)) + # Prepare households + df_households = context.stage("synthesis.population.enriched").rename( + columns = { "household_income": "income" } + ).drop_duplicates("household_id") + + df_households = pd.merge(df_households,df_activities[df_activities["purpose"] == "home"][["household_id", + "iris_id", "commune_id","departement_id","region_id"]].drop_duplicates("household_id"),how="left") + df_households = df_households[[ + "household_id","iris_id", "commune_id", "departement_id","region_id", + "car_availability", "bike_availability", + "number_of_vehicles", "number_of_bikes", + "income", + "census_household_id" + ]] + if "csv" in output_formats: + df_households.to_csv("%s/%shouseholds.csv" % (output_path, output_prefix), sep = ";", index = None, lineterminator = "\n") + if "parquet" in output_formats: + df_households.to_parquet("%s/%shouseholds.parquet" % (output_path, output_prefix)) + # Prepare trips df_trips = context.stage("synthesis.population.trips").rename( columns = { @@ -170,18 +192,7 @@ def execute(context): df_vehicle_types.to_parquet("%s/%svehicle_types.parquet" % (output_path, output_prefix)) df_vehicles.to_parquet("%s/%svehicles.parquet" % (output_path, output_prefix)) - # Prepare spatial data sets - df_locations = context.stage("synthesis.population.spatial.locations")[[ - "person_id", "activity_index", "geometry" - ]] - - df_activities = pd.merge(df_activities, df_locations[[ - "person_id", "activity_index", "geometry" - ]], how = "left", on = ["person_id", "activity_index"]) - # Write spatial activities - df_spatial = gpd.GeoDataFrame(df_activities, crs = df_locations.crs) - df_spatial["purpose"] = df_spatial["purpose"].astype(str) if "gpkg" in output_formats: path = "%s/%sactivities.gpkg" % (output_path, output_prefix) df_spatial.to_file(path, driver = "GPKG") @@ -194,7 +205,7 @@ def execute(context): df_spatial_homes = df_spatial[ df_spatial["purpose"] == "home" ].drop_duplicates("household_id")[[ - "household_id", "geometry" + "household_id","iris_id", "commune_id","departement_id","region_id", "geometry" ]] if "gpkg" in output_formats: path = "%s/%shomes.gpkg" % (output_path, output_prefix) diff --git a/synthesis/population/spatial/locations.py b/synthesis/population/spatial/locations.py index 5277fd19..2397e095 100644 --- a/synthesis/population/spatial/locations.py +++ b/synthesis/population/spatial/locations.py @@ -9,6 +9,7 @@ def configure(context): context.stage("synthesis.population.activities") context.stage("synthesis.population.sampled") + context.stage("data.spatial.iris") def execute(context): df_home = context.stage("synthesis.population.spatial.home.locations") @@ -57,4 +58,10 @@ def execute(context): assert not df_locations["geometry"].isna().any() df_locations = gpd.GeoDataFrame(df_locations, crs = df_home.crs) + # add municipalities + df_iris = context.stage("data.spatial.iris") + df_iris = gpd.GeoDataFrame(df_iris, crs = df_home.crs) + + df_locations = gpd.sjoin(df_locations,df_iris,how="left") + return df_locations diff --git a/tests/test_determinism.py b/tests/test_determinism.py index e6ca821c..e2755d7a 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -68,8 +68,8 @@ def _test_determinism(index, data_path, tmpdir): synpp.run(stages, config, working_directory = cache_path) REFERENCE_CSV_HASHES = { - "ile_de_france_activities.csv": "e520003e1876a9542ff1a955a6efcfdc", - "ile_de_france_households.csv": "709ce7ded8a2487e6691d4fb3374754b", + "ile_de_france_activities.csv": "53c44fb4026d2037729ee8ff1c8fb93f", + "ile_de_france_households.csv": "ca2a29ef13467326f937638f1ff8be1a", "ile_de_france_persons.csv": "ddbe9b418c915b14e888b54efbdf9b1e", "ile_de_france_trips.csv": "6c5f3427e41e683da768eeb53796a806", "ile_de_france_vehicle_types.csv": "00bee1ea6d7bc9af43ae6c7101dd75da", @@ -77,9 +77,9 @@ def _test_determinism(index, data_path, tmpdir): } REFERENCE_GPKG_HASHES = { - "ile_de_france_activities.gpkg": "9cf9a5fd8927c709927f7a940f86efbf", + "ile_de_france_activities.gpkg": "884eec1fd0c29904284eb4362ff89be1", "ile_de_france_commutes.gpkg": "5a4180390a69349cc655c07c5671e8d3", - "ile_de_france_homes.gpkg": "033d1aa7a5350579cbd5e8213b9736f2", + "ile_de_france_homes.gpkg": "a85e973f0e2f51031cd60170d351845e", "ile_de_france_trips.gpkg": "d0aec4033cfc184bf1b91ae13a537ef8", }