diff --git a/code/synthetic_population/bogota_synth_pop.qmd b/code/synthetic_population/bogota_synth_pop.qmd index 85b60d34..54d74777 100644 --- a/code/synthetic_population/bogota_synth_pop.qmd +++ b/code/synthetic_population/bogota_synth_pop.qmd @@ -27,6 +27,7 @@ Load all the require packages library(rsample) library(tidyverse) library(summarytools) +library(arrow) # Set seed set.seed(2023) @@ -38,6 +39,8 @@ st_options(plain.ascii = FALSE) Daniel has kindly added `participation weights` to each row in the travel dataset, which represents household weights +## Summary without weights + ```{r, results='asis'} bt <- read_csv("https://raw.githubusercontent.com/ITHIM/TravelSurveyPreprocessing/fc8bae5381440fcb96e6789d26d115305597e5f1/Data/ITHIM/bogota/trips_bogota.csv") @@ -53,29 +56,48 @@ print(dfSummary(raw_dataset_without_weights, # print(dfSummary(dfSummary(bt |> filter(age != 9999))), method = "render") -rts <- bt |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(raw_trip_freq = round(n / sum(n) * 100, 1)) -rss <- bt |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(raw_stage_freq = round(n / sum(n) * 100, 1)) +# rts <- bt |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(raw_trip_freq = round(n / sum(n) * 100, 1)) +# rss <- bt |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(raw_stage_freq = round(n / sum(n) * 100, 1)) +# +# ets <- exp1 |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1)) +# ess <- exp1 |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(exp_stage_freq = round(n / sum(n) * 100, 1)) +# +# ts <- full_join(rts, ets) +# ss <- left_join(rss, ess) + +# +# st |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1)) +# +# rd <- plyr::rbind.fill(exp1, exp2) %>% arrange(hh_id, person_id, trip_id) + + +``` + + +## Summary with weights - expanded travel survey + +```{r, results='asis'} + +exp1 <- read_parquet("../../data/local/bogota/exp_trips.parquet") +# Remove undefined ages +exp1 <- exp1 |> filter(age != 9999) -et <- bt %>% mutate(pw = round(participant_wt - 1)) -exp1 <- et |> filter(pw > 0) |> uncount(pw, .id = "pid") -exp2 <- et |> filter(pw > 0) |> filter(pw == 0) %>% mutate(pid = 1) +# exp2 <- et |> filter(pw > 0) |> filter(pw == 0) %>% mutate(pid = 1) -print(dfSummary(exp1 |> filter(age != 9999), +print(dfSummary(exp1, varnumbers = FALSE, valid.col = FALSE, graph.magnif = 0.76), method = 'render') +``` -ets <- exp1 |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1)) -ess <- exp1 |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(exp_stage_freq = round(n / sum(n) * 100, 1)) -ts <- full_join(rts, ets) -ss <- left_join(rss, ess) +## Summary of sampled travel survey - with trip_mode strata +```{r, results='asis'} st <- rsample::training(rsample::initial_split(exp1, prop = 0.1, strata = trip_mode)) -ss <- rsample::training(rsample::initial_split(exp1, prop = 0.1, strata = stage_mode)) print(dfSummary(st, varnumbers = FALSE, @@ -83,15 +105,19 @@ print(dfSummary(st, graph.magnif = 0.76), method = 'render') +``` + + +## Summary of sampled travel survey - with stage_mode strata + +```{r, results='asis'} +ss <- rsample::training(rsample::initial_split(exp1, prop = 0.1, strata = stage_mode)) print(dfSummary(ss, varnumbers = FALSE, valid.col = FALSE, graph.magnif = 0.76), method = 'render') -# -# st |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1)) -# -# rd <- plyr::rbind.fill(exp1, exp2) %>% arrange(hh_id, person_id, trip_id) ``` +