diff --git a/data/hts/emp/cleaned.py b/data/hts/emp/cleaned.py index c64f691..77d1d5b 100644 --- a/data/hts/emp/cleaned.py +++ b/data/hts/emp/cleaned.py @@ -110,8 +110,8 @@ def execute(context): df_households["departement_id"] = df_households["DEP_RES"].fillna("undefined").astype("category") df_persons["departement_id"] = df_persons["DEP_RES"].fillna("undefined").astype("category") - df_trips["origin_departement_id"] = df_trips["REG_ORI"].fillna("undefined").astype("category") - df_trips["destination_departement_id"] = df_trips["REG_DES"].fillna("undefined").astype("category") + df_trips["origin_departement_id"] = '00' + df_trips["destination_departement_id"] = '00' # Clean urban type df_households["urban_type"] = df_households["STATUTCOM_UU_RES"].replace({ @@ -224,10 +224,10 @@ def execute(context): df_persons["is_passenger"] = df_persons["person_id"].isin( df_trips[df_trips["mode"] == "car_passenger"]["person_id"].unique() ) - print(len(df_persons)) - #Force clean + + #Drop person without right household size df_persons = df_persons.drop(df_persons[(df_persons["number_of_trips"] == -1) & (df_persons['household_id'].isin([1647,6182,12630]))].index) - print(len(df_persons)) + # Calculate consumption units hts.check_household_size(df_households, df_persons) df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id") diff --git a/data/hts/emp/raw.py b/data/hts/emp/raw.py index a05f54f..80c8023 100644 --- a/data/hts/emp/raw.py +++ b/data/hts/emp/raw.py @@ -48,7 +48,7 @@ def configure(context): def execute(context): # Load IRIS registry with zipfile.ZipFile( - f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip') as archive: + f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip') as archive: with archive.open("k_individu_public_V3.csv") as f: df_individu = pd.read_csv(f, sep = ";", encoding = "latin1", usecols = K_INDIVIDU_COLUMNS, @@ -71,7 +71,7 @@ def execute(context): sep = ",", encoding = "latin1", usecols = Q_TCM_MENAGE_COLUMNS, dtype = { "DEP_RES": str }) - with archive.open("k_deploc_public_V3.csv") as f: + with archive.open("5. k_deploc_public_V4.csv") as f: df_deploc = pd.read_csv(f, sep = ",", encoding = "latin1", usecols = K_DEPLOC_COLUMNS, ) @@ -79,7 +79,7 @@ def execute(context): return df_individu, df_tcm_individu,df_tcm_individu_kish, df_menage, df_tcm_menage, df_deploc def validate(context): - if not os.path.exists(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip'): + if not os.path.exists(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip'): raise RuntimeError("Files for EMP are not available") - return os.path.getsize(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip') + return os.path.getsize(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip') diff --git a/docs/population.md b/docs/population.md index 6742958..5df955f 100644 --- a/docs/population.md +++ b/docs/population.md @@ -66,7 +66,7 @@ The census of services and facilities in France is available from INSEE: services while the lower data sets only contain observations for specific sectors. - Copy the *zip* file into the folder `data/bpe_2023`. -### 6a) National household travel survey (ENTD 2008) +### 6a) National household travel survey (ENTD 2008) The national household travel survey is available from the Ministry of Ecology: @@ -83,7 +83,17 @@ a few are actually relevant for the pipeline. Those are: - Données mobilité déplacements locaux (K_deploc.csv) - Put the downloaded *csv* files in to the folder `data/entd_2008`. -### 6b) *(Optional)* Regional household travel survey (EGT) +### 6b) *(Optional)* National persons mobility survey (EMP 2019) + +The national persons mobility survey is also available from the Ministry of Ecology: + +- [National persons mobility survey](https://www.statistiques.developpement-durable.gouv.fr/resultats-detailles-de-lenquete-mobilite-des-personnes-de-2019) +- Scroll all the way down the website to the **Télécharger les données individuelles anonymisées et leurs dictionnaires** (a clickable +pop-down menu). +- Download the data set in **csv** by clicking on the link **Données individuelles anonymisées (fichiers au format CSV) - EMP 2019** +- Copy the *zip* file into the folder `data/emp_2019`. + +### 6c) *(Optional)* Regional household travel survey (EGT) Usually, you do not have access to the regional household travel survey, which is not available publicly. In case you have access (but we cannot @@ -193,9 +203,9 @@ Your folder structure should now have at least the following files: - `data/ban_idf/adresses-93.csv.gz` - `data/ban_idf/adresses-94.csv.gz` -In case you are using the regional household travel survey (EGT), the following -files should also be in place: - +In case you are using the national persons mobility survey or the regional household travel survey (EGT), the following files should also be respectively in place: +- `data/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip` +or - `data/egt_2010/Menages_semaine.csv` - `data/egt_2010/Personnes_semaine.csv` - `data/egt_2010/Deplacements_semaine.csv`