update

eqasim-org · Feb 5, 2024 · 8e078ca · 8e078ca
1 parent 2af75c7
commit 8e078ca
Show file tree

Hide file tree

Showing 5 changed files with 4 additions and 66 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 **Under development**
 
+- feat: functionality to make use of INSEE population projection data
+- update: don't remove households with people not living/studying in Île-de-France anymore to be more consistent with other use cases
+- fix bug where always one household_id existed twice
 - Fix read order when exploring files using `glob`
 - Modes are only written now to `trips.csv` if `mode_choice` is activated
 - Update to `eqasim-java` commit `7cbe85b`

diff --git a/data/census/cleaned.py b/data/census/cleaned.py
@@ -93,10 +93,6 @@ def execute(context):
     # Socioprofessional category
     df["socioprofessional_class"] = df["CS1"].astype(int)
 
-    # Place of work or education
-    df["work_outside_region"] = df["ILT"].isin(("4", "5", "6"))
-    df["education_outside_region"] = df["ILETUD"].isin(("4", "5", "6"))
-
     # Consumption units
     df = pd.merge(df, hts.calculate_consumption_units(df), on = "household_id")
 
@@ -106,6 +102,5 @@ def execute(context):
         "age", "sex", "couple",
         "commute_mode", "employed",
         "studies", "number_of_vehicles", "household_size",
-        "work_outside_region", "education_outside_region",
         "consumption_units", "socioprofessional_class"
     ]]
diff --git a/data/census/filtered.py b/data/census/filtered.py
@@ -14,16 +14,6 @@ def configure(context):
 def execute(context):
     df = context.stage("data.census.cleaned")
 
-    # We remove people who study or work in another region
-    f = df["work_outside_region"] | df["education_outside_region"]
-    remove_ids = df[f]["household_id"].unique()
-
-    initial_households = len(df["household_id"].unique())
-    removed_households = len(remove_ids)
-
-    initial_persons = len(df["person_id"].unique())
-    removed_persons = np.count_nonzero(df["household_id"].isin(remove_ids))
-
     # Filter requested codes
     df_codes = context.stage("data.spatial.codes")
 
@@ -38,44 +28,4 @@ def execute(context):
     if not excess_iris == {"undefined"}:
         raise RuntimeError("Found additional IRIS: %s" % excess_iris)
 
-    # TODO: This filtering is not really compatible with defining multiple regions
-    # or departments. This used to be a filter to avoid people going outside of
-    # Île-de-France, but we should consider removing this filter altogether, or
-    # find some smarter way (e.g. using OD matrices and filter out people in
-    # each municipality by the share of outside workers).
-    df_codes = context.stage("data.spatial.codes")
-
-    if len(df_codes["region_id"].unique()) > 1:
-        raise RuntimeError("""
-            Multiple regions are defined, so the filtering for people going outside
-            of Île-de-France does not make sense in that case. Consider adjusting the
-            data.census.filtered stage!
-        """)
-
-    print(
-        "Removing %d/%d (%.2f%%) households (with %d/%d persons, %.2f%%) because at least one person is working outside of Île-de-France" % (
-        removed_households, initial_households, 100 * removed_households / initial_households,
-        removed_persons, initial_persons, 100 * removed_persons / initial_persons
-    ))
-
-    context.set_info("filtered_households_share", removed_households / initial_households)
-    context.set_info("filtered_persons_share", removed_persons / initial_persons)
-
-    df = df[~df["household_id"].isin(remove_ids)]
-
-
-    # Household size
-    df_size = df[["household_id"]].groupby("household_id").size().reset_index(name = "household_size2")
-    df = pd.merge(df, df_size)
-
-    f = df["household_size"] != df["household_size2"]
-    print(np.count_nonzero(f))
-    print(df[f])
-
-    print(df[df["household_id"] == 8958513])
-
-    assert np.all(df["household_size"] == df["household_size2"])
-    print("all good")
-    exit()
-
     return df
diff --git a/data/census/raw.py b/data/census/raw.py
@@ -22,9 +22,7 @@ def configure(context):
     "COUPLE":"str", 
     "CS1":"str",
     "DEPT":"str", 
-    "ETUD":"str", 
-    "ILETUD":"str",
-    "ILT":"str", 
+    "ETUD":"str",
     "IPONDI":"str", 
     "IRIS":"str",
     "REGION":"str", 

diff --git a/synthesis/population/sampled.py b/synthesis/population/sampled.py
@@ -25,14 +25,6 @@ def execute(context):
     sampling_rate = context.config("sampling_rate")
     random = np.random.RandomState(context.config("random_seed"))
 
-    # Household size
-    df_size = df_census[["household_id"]].groupby("household_id").size().reset_index(name = "household_size2")
-    df_census = pd.merge(df_census, df_size)
-
-    assert np.all(df_census["household_size"] == df_census["household_size2"])
-    print("all good")
-    exit()
-
     # Perform stochastic rounding for the population (and scale weights)
     df_rounding = df_census[["household_id", "weight", "household_size"]].drop_duplicates("household_id")
     df_rounding["multiplicator"] = np.floor(df_rounding["weight"])