Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
sebhoerl committed Feb 5, 2024
1 parent 2af75c7 commit 8e078ca
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 66 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

**Under development**

- feat: functionality to make use of INSEE population projection data
- update: don't remove households with people not living/studying in Île-de-France anymore to be more consistent with other use cases
- fix bug where always one household_id existed twice
- Fix read order when exploring files using `glob`
- Modes are only written now to `trips.csv` if `mode_choice` is activated
- Update to `eqasim-java` commit `7cbe85b`
Expand Down
5 changes: 0 additions & 5 deletions data/census/cleaned.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ def execute(context):
# Socioprofessional category
df["socioprofessional_class"] = df["CS1"].astype(int)

# Place of work or education
df["work_outside_region"] = df["ILT"].isin(("4", "5", "6"))
df["education_outside_region"] = df["ILETUD"].isin(("4", "5", "6"))

# Consumption units
df = pd.merge(df, hts.calculate_consumption_units(df), on = "household_id")

Expand All @@ -106,6 +102,5 @@ def execute(context):
"age", "sex", "couple",
"commute_mode", "employed",
"studies", "number_of_vehicles", "household_size",
"work_outside_region", "education_outside_region",
"consumption_units", "socioprofessional_class"
]]
50 changes: 0 additions & 50 deletions data/census/filtered.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,6 @@ def configure(context):
def execute(context):
df = context.stage("data.census.cleaned")

# We remove people who study or work in another region
f = df["work_outside_region"] | df["education_outside_region"]
remove_ids = df[f]["household_id"].unique()

initial_households = len(df["household_id"].unique())
removed_households = len(remove_ids)

initial_persons = len(df["person_id"].unique())
removed_persons = np.count_nonzero(df["household_id"].isin(remove_ids))

# Filter requested codes
df_codes = context.stage("data.spatial.codes")

Expand All @@ -38,44 +28,4 @@ def execute(context):
if not excess_iris == {"undefined"}:
raise RuntimeError("Found additional IRIS: %s" % excess_iris)

# TODO: This filtering is not really compatible with defining multiple regions
# or departments. This used to be a filter to avoid people going outside of
# Île-de-France, but we should consider removing this filter altogether, or
# find some smarter way (e.g. using OD matrices and filter out people in
# each municipality by the share of outside workers).
df_codes = context.stage("data.spatial.codes")

if len(df_codes["region_id"].unique()) > 1:
raise RuntimeError("""
Multiple regions are defined, so the filtering for people going outside
of Île-de-France does not make sense in that case. Consider adjusting the
data.census.filtered stage!
""")

print(
"Removing %d/%d (%.2f%%) households (with %d/%d persons, %.2f%%) because at least one person is working outside of Île-de-France" % (
removed_households, initial_households, 100 * removed_households / initial_households,
removed_persons, initial_persons, 100 * removed_persons / initial_persons
))

context.set_info("filtered_households_share", removed_households / initial_households)
context.set_info("filtered_persons_share", removed_persons / initial_persons)

df = df[~df["household_id"].isin(remove_ids)]


# Household size
df_size = df[["household_id"]].groupby("household_id").size().reset_index(name = "household_size2")
df = pd.merge(df, df_size)

f = df["household_size"] != df["household_size2"]
print(np.count_nonzero(f))
print(df[f])

print(df[df["household_id"] == 8958513])

assert np.all(df["household_size"] == df["household_size2"])
print("all good")
exit()

return df
4 changes: 1 addition & 3 deletions data/census/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def configure(context):
"COUPLE":"str",
"CS1":"str",
"DEPT":"str",
"ETUD":"str",
"ILETUD":"str",
"ILT":"str",
"ETUD":"str",
"IPONDI":"str",
"IRIS":"str",
"REGION":"str",
Expand Down
8 changes: 0 additions & 8 deletions synthesis/population/sampled.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ def execute(context):
sampling_rate = context.config("sampling_rate")
random = np.random.RandomState(context.config("random_seed"))

# Household size
df_size = df_census[["household_id"]].groupby("household_id").size().reset_index(name = "household_size2")
df_census = pd.merge(df_census, df_size)

assert np.all(df_census["household_size"] == df_census["household_size2"])
print("all good")
exit()

# Perform stochastic rounding for the population (and scale weights)
df_rounding = df_census[["household_id", "weight", "household_size"]].drop_duplicates("household_id")
df_rounding["multiplicator"] = np.floor(df_rounding["weight"])
Expand Down

0 comments on commit 8e078ca

Please sign in to comment.