Skip to content

Commit

Permalink
Merge pull request #45 from cmu-delphi/lcb-day1-regression
Browse files Browse the repository at this point in the history
Add NCHS regression model slides
  • Loading branch information
dajmcdon authored Dec 10, 2024
2 parents d428894 + d7edbab commit 089fd10
Show file tree
Hide file tree
Showing 5 changed files with 397 additions and 596 deletions.
39 changes: 18 additions & 21 deletions _casestudies/forecast-covid/forecast-covid.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ Now, use `epi_slide_mean()`, to calculate trailing 7 day averages of cases and d
```{r trailing-averages}
ca <- ca %>%
epi_slide_mean(c(cases, deaths), .window_size = 7) %>%
epi_slide_mean(deaths, .window_size = 7) %>%
select(-cases, -deaths) %>%
rename(cases = cases_7dav, deaths = deaths_7dav)
```
Expand Down Expand Up @@ -431,7 +430,7 @@ head(test %>% select(pred_trailing_cv, time_value))
```

The method fitting on all past data up to the forecasting date can be
implemented by changing `before = Inf` in `epi_slide`.
implemented by changing `.before = Inf` in `epi_slide`.

```{r epipredict-cv}
# slide an arx_forecaster with appropriate outcome, predictions and lags
Expand All @@ -443,8 +442,8 @@ epi_pred_cv <- epi_slide(
trainer = linear_reg() %>% set_engine("lm"),
args_list = arx_args_list(lags = k-1, ahead = 1L)
)$predictions,
before = Inf,
ref_time_values = fc_time_values,
.before = Inf,
.ref_time_values = fc_time_values,
.new_col_name = "fc"
) |>
unpack(fc, names_sep = "_")
Expand Down Expand Up @@ -484,8 +483,8 @@ epi_pred_cv_7 <- epi_slide(
trainer = linear_reg() %>% set_engine("lm"),
args_list = arx_args_list(lags = k-n_ahead, ahead = n_ahead)
)$predictions,
before = Inf,
ref_time_values = fc_time_values_7,
.before = Inf,
.ref_time_values = fc_time_values_7,
.new_col_name = "fc"
) |>
unpack(fc, names_sep = "_")
Expand Down Expand Up @@ -725,8 +724,8 @@ ar_all_past <- epi_slide(
trainer = linear_reg() %>% set_engine("lm"),
args_list = arx_args_list(lags = 0L, ahead = 1L)
)$predictions,
before = Inf,
ref_time_values = fc_time_values,
.before = Inf,
.ref_time_values = fc_time_values,
.new_col_name = "all_past"
) |>
unpack(all_past, names_sep = "_")
Expand All @@ -739,8 +738,8 @@ ar_trailing <- epi_slide(
trainer = linear_reg() %>% set_engine("lm"),
args_list = arx_args_list(lags = 0L, ahead = 1L)
)$predictions,
before = w,
ref_time_values = fc_time_values,
.before = w,
.ref_time_values = fc_time_values,
.new_col_name = "trailing"
) |>
unpack(trailing, names_sep = "_")
Expand Down Expand Up @@ -952,8 +951,8 @@ arx_all_past <- epi_slide(
args_list = arx_args_list(lags = list(0, k-1),
ahead = 1L)
)$predictions,
before = Inf,
ref_time_values = fc_time_values,
.before = Inf,
.ref_time_values = fc_time_values,
.new_col_name = "all_past"
) |>
unpack(all_past, names_sep = "_")
Expand All @@ -967,8 +966,8 @@ arx_trailing <- epi_slide(
args_list = arx_args_list(lags = list(0, k-1),
ahead = 1L)
)$predictions,
before = (w+k-1),
ref_time_values = fc_time_values,
.before = (w+k-1),
.ref_time_values = fc_time_values,
.new_col_name = "trailing"
) |>
unpack(trailing, names_sep = "_")
Expand Down Expand Up @@ -1552,8 +1551,8 @@ fc_time_values <- seq(
data_archive <- data_archive %>%
epix_slide(
before = Inf,
ref_time_values = fc_time_values,
.before = Inf,
.versions = fc_time_values,
function(x, gk, rtv) {
x %>%
epi_slide_mean(case_rate, .window_size = 7L, .suffix = "_7d_av") %>%
Expand Down Expand Up @@ -1585,7 +1584,7 @@ pred_all_past = pred_trailing <- matrix(NA, ncol = 4, nrow = 0)
w <- 30 # trailing window size
for (fc_date in fc_time_values) {
data <- epix_as_of(ca_archive, max_version = as.Date(fc_date))
data <- epix_as_of(ca_archive, as.Date(fc_date))
data$lagged_deaths <- dplyr::lag(data$deaths, 1)
data$lagged_cases <- dplyr::lag(data$cases, k)
Expand Down Expand Up @@ -1729,14 +1728,12 @@ forecaster <- function(x) {
arx_preds <- data %>%
epix_slide(~ forecaster(.x),
before = 120, ref_time_values = fc_time_values,
names_sep = NULL
.before = 120, .versions = fc_time_values
) %>%
mutate(engine_type = quantile_reg()$engine) %>%
mutate(ahead_val = target_date - forecast_date)
x_latest <- epix_as_of(data,
max_version = max(data$versions_end))
x_latest <- epix_as_of(data, data$versions_end)
```


Expand Down
4 changes: 2 additions & 2 deletions _code/cover-image.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ library(epiprocess)
library(tidyverse)
primary <- "#941120"
x <- archive_cases_dv_subset
x_latest <- epix_as_of(x, max_version = max(x$DT$version))
x_latest <- epix_as_of(x, max(x$DT$version))
self_max = max(x$DT$version)
versions = seq(as.Date("2020-06-01"), self_max - 1, by = "1 month")
snapshots_all <- map_dfr(versions, function(v) {
epix_as_of(x, max_version = v) %>% mutate(version = v)}) %>%
epix_as_of(x, v) %>% mutate(version = v)}) %>%
bind_rows(x_latest %>% mutate(version = self_max)) %>%
mutate(latest = version == self_max)
snapshots <- snapshots_all %>%
Expand Down
5 changes: 2 additions & 3 deletions _code/weekly_hhs.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ if (!file.exists(file_path)) {
day_of_week_end = 7L) {
agg_method <- arg_match(agg_method)
keys <- key_colnames(epi_arch, exclude = "time_value")
ref_time_values <- epi_arch$DT$version %>%
versions <- epi_arch$DT$version %>%
unique() %>%
sort()
if (agg_method == "sum") {
Expand All @@ -90,8 +90,7 @@ if (!file.exists(file_path)) {
}
too_many_tibbles <- epix_slide(
epi_arch,
.before = 99999999L,
.versions = ref_time_values,
.versions = versions,
function(x, group, ref_time) {
ref_time_last_week_end <-
floor_date(ref_time, "week", day_of_week_end - 1) # this is over by 1
Expand Down
Loading

0 comments on commit 089fd10

Please sign in to comment.