Use arrow and remove unused code. Ref #128

ITHIM · Mar 22, 2023 · 3202a1a · 3202a1a
1 parent dfab704
commit 3202a1a
Showing 1 changed file with 41 additions and 15 deletions.
diff --git a/code/synthetic_population/bogota_synth_pop.qmd b/code/synthetic_population/bogota_synth_pop.qmd
@@ -27,6 +27,7 @@ Load all the require packages
 library(rsample)
 library(tidyverse)
 library(summarytools)
+library(arrow)
 
 # Set seed
 set.seed(2023)
@@ -38,6 +39,8 @@ st_options(plain.ascii = FALSE)
 
 Daniel has kindly added `participation weights` to each row in the travel dataset, which represents household weights
 
+## Summary without weights
+
 ```{r, results='asis'}
 bt <- read_csv("https://raw.githubusercontent.com/ITHIM/TravelSurveyPreprocessing/fc8bae5381440fcb96e6789d26d115305597e5f1/Data/ITHIM/bogota/trips_bogota.csv")
 
@@ -53,45 +56,68 @@ print(dfSummary(raw_dataset_without_weights,
 
 # print(dfSummary(dfSummary(bt |> filter(age != 9999))), method = "render")
 
-rts <- bt |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(raw_trip_freq = round(n / sum(n) * 100, 1))
-rss <- bt |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(raw_stage_freq = round(n / sum(n) * 100, 1))
+# rts <- bt |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(raw_trip_freq = round(n / sum(n) * 100, 1))
+# rss <- bt |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(raw_stage_freq = round(n / sum(n) * 100, 1))
+# 
+# ets <- exp1 |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1))
+# ess <- exp1 |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(exp_stage_freq = round(n / sum(n) * 100, 1))
+# 
+# ts <- full_join(rts, ets)
+# ss <- left_join(rss, ess)
+
+# 
+# st |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1))
+# 
+# rd <- plyr::rbind.fill(exp1, exp2) %>% arrange(hh_id, person_id, trip_id)
+
+
+```
+
+
+## Summary with weights - expanded travel survey
+
+```{r, results='asis'}
+
+exp1 <- read_parquet("../../data/local/bogota/exp_trips.parquet")
+# Remove undefined ages
+exp1 <- exp1 |> filter(age != 9999)
 
-et <- bt %>% mutate(pw = round(participant_wt - 1))
-exp1 <- et  |> filter(pw > 0) |> uncount(pw, .id = "pid")
-exp2 <- et  |> filter(pw > 0) |> filter(pw == 0) %>% mutate(pid = 1)
+# exp2 <- et  |> filter(pw > 0) |> filter(pw == 0) %>% mutate(pid = 1)
 
 
-print(dfSummary(exp1 |> filter(age != 9999),
+print(dfSummary(exp1,
                 varnumbers   = FALSE, 
                 valid.col    = FALSE, 
                 graph.magnif = 0.76),
       method = 'render')
 
+```
 
-ets <- exp1 |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1))
-ess <- exp1 |> filter(!is.na(stage_mode)) |> group_by(stage_mode) |> summarise(n = n()) |> mutate(exp_stage_freq = round(n / sum(n) * 100, 1))
 
-ts <- full_join(rts, ets)
-ss <- left_join(rss, ess)
+## Summary of sampled travel survey - with trip_mode strata
 
+```{r, results='asis'}
 st <- rsample::training(rsample::initial_split(exp1, prop = 0.1, strata = trip_mode))
-ss <- rsample::training(rsample::initial_split(exp1, prop = 0.1, strata = stage_mode))
 
 print(dfSummary(st,
                 varnumbers   = FALSE, 
                 valid.col    = FALSE, 
                 graph.magnif = 0.76),
       method = 'render')
 
+```
+
+
+## Summary of sampled travel survey - with stage_mode strata
+
+```{r, results='asis'}
+ss <- rsample::training(rsample::initial_split(exp1, prop = 0.1, strata = stage_mode))
 print(dfSummary(ss,
                 varnumbers   = FALSE, 
                 valid.col    = FALSE, 
                 graph.magnif = 0.76),
       method = 'render')
-# 
-# st |> filter(!is.na(trip_mode)) |> group_by(trip_mode) |> summarise(n = n()) |> mutate(exp_trip_freq = round(n / sum(n) * 100, 1))
-# 
-# rd <- plyr::rbind.fill(exp1, exp2) %>% arrange(hh_id, person_id, trip_id)
 
 
 ```
+