fix: correct & docs

eqasim-org · Jan 9, 2025 · b17d6ef · b17d6ef
1 parent 24db79c
commit b17d6ef
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 14 deletions.
diff --git a/data/hts/emp/cleaned.py b/data/hts/emp/cleaned.py
@@ -110,8 +110,8 @@ def execute(context):
     df_households["departement_id"] = df_households["DEP_RES"].fillna("undefined").astype("category")
     df_persons["departement_id"] = df_persons["DEP_RES"].fillna("undefined").astype("category")
 
-    df_trips["origin_departement_id"] = df_trips["REG_ORI"].fillna("undefined").astype("category")
-    df_trips["destination_departement_id"] = df_trips["REG_DES"].fillna("undefined").astype("category")
+    df_trips["origin_departement_id"] = '00'
+    df_trips["destination_departement_id"] = '00'
 
     # Clean urban type
     df_households["urban_type"] = df_households["STATUTCOM_UU_RES"].replace({
@@ -224,10 +224,10 @@ def execute(context):
     df_persons["is_passenger"] = df_persons["person_id"].isin(
         df_trips[df_trips["mode"] == "car_passenger"]["person_id"].unique()
     )
-    print(len(df_persons))
-    #Force clean 
+
+    #Drop person without right household size 
     df_persons = df_persons.drop(df_persons[(df_persons["number_of_trips"] == -1) & (df_persons['household_id'].isin([1647,6182,12630]))].index)
-    print(len(df_persons))
+
     # Calculate consumption units
     hts.check_household_size(df_households, df_persons)
     df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")

diff --git a/data/hts/emp/raw.py b/data/hts/emp/raw.py
@@ -48,7 +48,7 @@ def configure(context):
 def execute(context):
      # Load IRIS registry
     with zipfile.ZipFile(
-        f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip') as archive: 
+        f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip') as archive: 
         with archive.open("k_individu_public_V3.csv") as f:    
             df_individu = pd.read_csv(f,
                 sep = ";", encoding = "latin1", usecols = K_INDIVIDU_COLUMNS,
@@ -71,15 +71,15 @@ def execute(context):
                 sep = ",", encoding = "latin1", usecols = Q_TCM_MENAGE_COLUMNS,
                 dtype = { "DEP_RES": str })
 
-        with archive.open("k_deploc_public_V3.csv") as f:
+        with archive.open("5. k_deploc_public_V4.csv") as f:
             df_deploc = pd.read_csv(f,
                 sep = ",", encoding = "latin1", usecols = K_DEPLOC_COLUMNS,
                 )
 
     return df_individu, df_tcm_individu,df_tcm_individu_kish, df_menage, df_tcm_menage, df_deploc
 
 def validate(context):
-    if not os.path.exists(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip'):
+    if not os.path.exists(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip'):
         raise RuntimeError("Files for EMP are not available")
 
-    return os.path.getsize(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip')
+    return os.path.getsize(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip')
diff --git a/docs/population.md b/docs/population.md
@@ -66,7 +66,7 @@ The census of services and facilities in France is available from INSEE:
 services while the lower data sets only contain observations for specific sectors.
 - Copy the *zip* file into the folder `data/bpe_2023`.
 
-### 6a) National household travel survey (ENTD 2008)
+### 6a)  National household travel survey (ENTD 2008)
 
 The national household travel survey is available from the Ministry of Ecology:
 
@@ -83,7 +83,17 @@ a few are actually relevant for the pipeline. Those are:
   - Données mobilité déplacements locaux (K_deploc.csv)
 - Put the downloaded *csv* files in to the folder `data/entd_2008`.
 
-### 6b) *(Optional)* Regional household travel survey (EGT)
+### 6b) *(Optional)* National persons mobility survey (EMP 2019)
+
+The national persons mobility survey is also available from the Ministry of Ecology:
+
+- [National persons mobility survey](https://www.statistiques.developpement-durable.gouv.fr/resultats-detailles-de-lenquete-mobilite-des-personnes-de-2019)
+- Scroll all the way down the website to the **Télécharger les données individuelles anonymisées et leurs dictionnaires** (a clickable
+pop-down menu).
+- Download the data set in **csv** by clicking on the link **Données individuelles anonymisées (fichiers au format CSV) - EMP 2019**
+- Copy the *zip* file into the folder `data/emp_2019`.
+
+### 6c) *(Optional)* Regional household travel survey (EGT)
 
 Usually, you do not have access to the regional household travel
 survey, which is not available publicly. In case you have access (but we cannot
@@ -193,9 +203,9 @@ Your folder structure should now have at least the following files:
 - `data/ban_idf/adresses-93.csv.gz`
 - `data/ban_idf/adresses-94.csv.gz`
 
-In case you are using the regional household travel survey (EGT), the following
-files should also be in place:
-
+In case you are using the national persons mobility survey or the regional household travel survey (EGT), the following files should also be respectively in place:
+- `data/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip`
+or 
 - `data/egt_2010/Menages_semaine.csv`
 - `data/egt_2010/Personnes_semaine.csv`
 - `data/egt_2010/Deplacements_semaine.csv`