Skip to content

Commit

Permalink
fix: correct & docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Marie Laurent committed Jan 9, 2025
1 parent 24db79c commit b17d6ef
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 14 deletions.
10 changes: 5 additions & 5 deletions data/hts/emp/cleaned.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def execute(context):
df_households["departement_id"] = df_households["DEP_RES"].fillna("undefined").astype("category")
df_persons["departement_id"] = df_persons["DEP_RES"].fillna("undefined").astype("category")

df_trips["origin_departement_id"] = df_trips["REG_ORI"].fillna("undefined").astype("category")
df_trips["destination_departement_id"] = df_trips["REG_DES"].fillna("undefined").astype("category")
df_trips["origin_departement_id"] = '00'
df_trips["destination_departement_id"] = '00'

# Clean urban type
df_households["urban_type"] = df_households["STATUTCOM_UU_RES"].replace({
Expand Down Expand Up @@ -224,10 +224,10 @@ def execute(context):
df_persons["is_passenger"] = df_persons["person_id"].isin(
df_trips[df_trips["mode"] == "car_passenger"]["person_id"].unique()
)
print(len(df_persons))
#Force clean

#Drop person without right household size
df_persons = df_persons.drop(df_persons[(df_persons["number_of_trips"] == -1) & (df_persons['household_id'].isin([1647,6182,12630]))].index)
print(len(df_persons))

# Calculate consumption units
hts.check_household_size(df_households, df_persons)
df_households = pd.merge(df_households, hts.calculate_consumption_units(df_persons), on = "household_id")
Expand Down
8 changes: 4 additions & 4 deletions data/hts/emp/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def configure(context):
def execute(context):
# Load IRIS registry
with zipfile.ZipFile(
f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip') as archive:
f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip') as archive:
with archive.open("k_individu_public_V3.csv") as f:
df_individu = pd.read_csv(f,
sep = ";", encoding = "latin1", usecols = K_INDIVIDU_COLUMNS,
Expand All @@ -71,15 +71,15 @@ def execute(context):
sep = ",", encoding = "latin1", usecols = Q_TCM_MENAGE_COLUMNS,
dtype = { "DEP_RES": str })

with archive.open("k_deploc_public_V3.csv") as f:
with archive.open("5. k_deploc_public_V4.csv") as f:
df_deploc = pd.read_csv(f,
sep = ",", encoding = "latin1", usecols = K_DEPLOC_COLUMNS,
)

return df_individu, df_tcm_individu,df_tcm_individu_kish, df_menage, df_tcm_menage, df_deploc

def validate(context):
if not os.path.exists(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip'):
if not os.path.exists(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip'):
raise RuntimeError("Files for EMP are not available")

return os.path.getsize(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees.zip')
return os.path.getsize(f'{context.config("data_path")}/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip')
20 changes: 15 additions & 5 deletions docs/population.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ The census of services and facilities in France is available from INSEE:
services while the lower data sets only contain observations for specific sectors.
- Copy the *zip* file into the folder `data/bpe_2023`.

### 6a) National household travel survey (ENTD 2008)
### 6a) National household travel survey (ENTD 2008)

The national household travel survey is available from the Ministry of Ecology:

Expand All @@ -83,7 +83,17 @@ a few are actually relevant for the pipeline. Those are:
- Données mobilité déplacements locaux (K_deploc.csv)
- Put the downloaded *csv* files in to the folder `data/entd_2008`.

### 6b) *(Optional)* Regional household travel survey (EGT)
### 6b) *(Optional)* National persons mobility survey (EMP 2019)

The national persons mobility survey is also available from the Ministry of Ecology:

- [National persons mobility survey](https://www.statistiques.developpement-durable.gouv.fr/resultats-detailles-de-lenquete-mobilite-des-personnes-de-2019)
- Scroll all the way down the website to the **Télécharger les données individuelles anonymisées et leurs dictionnaires** (a clickable
pop-down menu).
- Download the data set in **csv** by clicking on the link **Données individuelles anonymisées (fichiers au format CSV) - EMP 2019**
- Copy the *zip* file into the folder `data/emp_2019`.

### 6c) *(Optional)* Regional household travel survey (EGT)

Usually, you do not have access to the regional household travel
survey, which is not available publicly. In case you have access (but we cannot
Expand Down Expand Up @@ -193,9 +203,9 @@ Your folder structure should now have at least the following files:
- `data/ban_idf/adresses-93.csv.gz`
- `data/ban_idf/adresses-94.csv.gz`

In case you are using the regional household travel survey (EGT), the following
files should also be in place:

In case you are using the national persons mobility survey or the regional household travel survey (EGT), the following files should also be respectively in place:
- `data/emp_2019/emp_2019_donnees_individuelles_anonymisees_novembre2024.zip`
or
- `data/egt_2010/Menages_semaine.csv`
- `data/egt_2010/Personnes_semaine.csv`
- `data/egt_2010/Deplacements_semaine.csv`
Expand Down

0 comments on commit b17d6ef

Please sign in to comment.