brm_data_chronologize()

openpharma · Jul 9, 2024 · 340f5b8 · 340f5b8
1 parent 1f1f231
commit 340f5b8
Show file tree

Hide file tree

Showing 13 changed files with 485 additions and 259 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: brms.mmrm
 Title: Bayesian MMRMs using 'brms'
-Version: 1.0.1.9003
+Version: 1.0.1.9004
 Authors@R: c(
   person(
     given = c("William", "Michael"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -25,6 +25,7 @@ export(brm_archetype_successive_cells)
 export(brm_archetype_successive_effects)
 export(brm_data)
 export(brm_data_change)
+export(brm_data_chronologize)
 export(brm_formula)
 export(brm_formula_sigma)
 export(brm_marginal_data)
@@ -56,8 +57,10 @@ importFrom(brms,make_standata)
 importFrom(brms,prior)
 importFrom(brms,unstr)
 importFrom(dplyr,across)
+importFrom(dplyr,arrange)
 importFrom(dplyr,bind_cols)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,distinct)
 importFrom(dplyr,left_join)
 importFrom(dplyr,rename)
 importFrom(dplyr,select)

diff --git a/NEWS.md b/NEWS.md
@@ -1,9 +1,10 @@
-# brms.mmrm 1.0.1.9003 (development)
+# brms.mmrm 1.0.1.9004 (development)
 
 * Add `brm_marginal_grid()`.
 * Show posterior samples of `sigma` in `brm_marginal_draws()` and `brm_marginal_summaries()`.
 * Allow `outcome = "response"` with `reference_time = NULL`. Sometimes raw response is analyzed but the data has no baseline time point.
-* Preserve factors in `brm_data()` and encourage ordered factors for the time variable (#113). 
+* Preserve factors in `brm_data()` and encourage ordered factors for the time variable (#113).
+* Add `brm_data_chronologize()` to ensure the correctness of the time variable.
 
 # brms.mmrm 1.0.1
 

diff --git a/R/brm_data.R b/R/brm_data.R
@@ -41,7 +41,11 @@
 #'   character vector or unordered factor.
 #' @param time Character of length 1, name of the discrete time variable.
 #'   For most analyses, please use an ordered factor for the `time` column
-#'   in the data. This ensures the time points sort in chronological order,
+#'   in the data. You can easily turn
+#'   the time variable into an ordered factor using
+#'   [brm_data_chronologize()], either before or immediately after
+#'   [brm_data()] (but before any `brm_archetype_*()` functions).
+#'   This ensures the time points sort in chronological order,
 #'   which ensures the correctness of informative prior archetypes and
 #'   autoregressive / moving average correlation structures.
 #'

diff --git a/R/brm_data_chronologize.R b/R/brm_data_chronologize.R
@@ -0,0 +1,134 @@
+#' @title Chronologize a dataset
+#' @export
+#' @family data
+#' @description Convert the discrete time variable into an ordered factor.
+#' @details Most MMRMs should use an ordered factor for the `time` column
+#'   in the data. This way, individual time points are treated as
+#'   distinct factor levels for the purposes of fixed effect parameterizations
+#'   (see the "Contrasts" section), and the explicit ordering ensures
+#'   that informative prior archetypes and ARMA-like correlation structures
+#'   are expressed correctly. Without the ordering, problems can arise when
+#'   character vectors are sorted: e.g. if `AVISIT` has levels
+#'   `"VISIT1", "VISIT2", ..., "VISIT10"`, then `brms` will mistake the
+#'   the order of scheduled study visits to be
+#'   `"VISIT1", "VISIT10", "VISIT2", ...`, which is not chronological.
+#'
+#'   You can easily turn
+#'   the time variable into an ordered factor using
+#'   [brm_data_chronologize()]. Either supply an explicit character vector
+#'   of chronologically-ordered factor levels in the `levels` argument,
+#'   or supply the name of a time-ordered variable in the `order` argument.
+#'
+#'   [brm_data_chronologize()] can be called either before or just after
+#'   [brm_data()], but in the former case, the discrete time variable
+#'   needs to be specified explicitly in `time` argument. And in the latter,
+#'   [brm_data_chronologize()] must be called before any of the informative
+#'   prior archetype functions such as [brm_archetype_successive_cells()].
+#' @section Contrasts:
+#'   Ordinarily, ordered factors automatically use polynomial contrasts from
+#'   [contr.poly()]. This is undesirable for MMRMs, so if the time variable
+#'   is an ordered factor, then [brm_data()]
+#'   manually sets `contrasts(data[[time]])` to a set of treatment contrasts
+#'   using [contr.treatment()]. If you prefer different contrasts, please
+#'   manually set `contrasts(data[[time]])` to something else after
+#'   calling [brm_data()].
+#' @return A data frame with the time column as an ordered factor.
+#' @inheritParams brm_data
+#' @param order Optional character string with the name of a variable in
+#'   the data for ordering the time variable.
+#'   Either `order` or `levels` must be supplied, but not both together.
+#'   If `order` is supplied,
+#'   the levels of `data[[order]]` must have a 1:1 correspondence with
+#'   those of `data[[time]]`, and `sort(unique(data[[order]]))` must
+#'   reflect the desired order of the levels of `data[[time]]`. For example,
+#'   suppose you have a CDISC dataset with categorical time variable `AVISIT`
+#'   and integer variable `AVISITN`. Then,
+#'   `brm_data_chronologize(time = "AVISIT", order = "AVISITN")` will turn
+#'   `AVISIT` into an ordered factor with levels that respect the ordering
+#'   in `AVISITN`.
+#' @param levels Optional character vector of levels of `data[[time]]`
+#'   in chronological order. Used to turn `data[[time]]` into an
+#'   ordered factor.
+#'   Either `order` or `levels` must be supplied, but not both together.
+#' @param time Character string with the name of the discrete time
+#'   variable in the data. This is the variable that [brm_data_chronologize()]
+#'   turns into an ordered factor. It needs to be specified explicitly
+#'   if and only if the `data` argument was not produced by a call to
+#'   [brm_data()].
+#' @examples
+#' data <- brm_simulate_outline(n_time = 12, n_patient = 4)
+#' data$AVISIT <- gsub("_0", "_", data$time)
+#' data$AVISITN <- as.integer(gsub("time_", "", data$time))
+#' data[, c("AVISIT", "AVISITN")]
+#' sort(unique(data$AVISIT)) # wrong order
+#' data1 <- brm_data_chronologize(data, time = "AVISIT", order = "AVISITN")
+#' sort(unique(data1$AVISIT)) # correct order
+#' levels <- paste0("time_", seq_len(12))
+#' data2 <- brm_data_chronologize(data, time = "AVISIT", levels = levels)
+#' sort(unique(data2$AVISIT)) # correct order
+brm_data_chronologize <- function(
+  data,
+  order = NULL,
+  levels = NULL,
+  time = attr(data, "brm_time")
+) {
+  if_any(
+    inherits(data, "brms_mmrm_data"),
+    brm_data_validate(data),
+    assert(is.data.frame(data), message = "data must be a data frame")
+  )
+  assert_chr(time, message = "time must be a character string")
+  assert_chr(
+    order %|||% "x",
+    message = "order must be NULL or a character string"
+  )
+  assert(
+    time %in% colnames(data),
+    message = "time must be a column name in the data"
+  )
+  assert(
+    (order %|||% time) %in% colnames(data),
+    message = "order must be NULL or a column name in the data"
+  )
+  assert_chr_vec(levels %|||% data[[time]][1L])
+  assert(
+    (levels %|||% data[[time]][1L]) %in% data[[time]],
+    message = "all elements of levels must be in data[[time]]"
+  )
+  assert(
+    !(is.null(order) && is.null(levels)),
+    message = "at least one of 'order' or 'levels' must be given"
+  )
+  assert(
+    !(!is.null(order) && !is.null(levels)),
+    message = paste(
+      "'order' and 'levels' cannot both be given.",
+      "Please choose one or the other."
+    )
+  )
+  if (!is.null(order)) {
+    grid <- dplyr::distinct(data[, c(time, order)])
+    assert(
+      !anyDuplicated(grid[[time]]),
+      !anyDuplicated(grid[[order]]),
+      as.character(sort(grid[[time]])) ==
+        as.character(sort(unique(data[[time]]))),
+      as.character(sort(grid[[order]])) ==
+        as.character(sort(unique(data[[order]]))),
+      message = paste(
+        "Cannot create an ordered factor for the discrete time variable",
+        "because the elements of the discrete time variable do not have a",
+        "1:1 correspondence with the elements of the ordering variable.",
+        "Please make sure variables",
+        time,
+        "and",
+        order,
+        "have a 1:1 correspondence between their levels."
+      )
+    )
+    grid <- grid[order(grid[[order]]),, drop = FALSE] # nolint
+    levels <- grid[[time]]
+  }
+  data[[time]] <- ordered(data[[time]], levels = levels)
+  data
+}
diff --git a/R/brm_package.R b/R/brm_package.R
@@ -26,8 +26,8 @@
 #'     CRC Press, Taylor & Francis Group.
 #' @family help
 #' @importFrom brms brm brmsformula get_prior make_standata prior unstr
-#' @importFrom dplyr across bind_cols bind_rows left_join rename select
-#'   summarize
+#' @importFrom dplyr across arrange bind_cols bind_rows distinct left_join
+#'   rename select summarize
 #' @importFrom ggplot2 aes facet_wrap geom_point geom_errorbar ggplot
 #'   position_dodge theme_gray xlab ylab
 #' @importFrom ggridges geom_density_ridges2

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -27,6 +27,7 @@ reference:
   contents:
   - 'brm_data'
   - 'brm_data_change'
+  - 'brm_data_chronologize'
 - title: Simulation
   contents:
   - 'brm_simulate_categorical'

diff --git a/man/brm_data.Rd b/man/brm_data.Rd
diff --git a/man/brm_data_change.Rd b/man/brm_data_change.Rd
diff --git a/man/brm_data_chronologize.Rd b/man/brm_data_chronologize.Rd
diff --git a/tests/testthat/test-brm_data_chronologize.R b/tests/testthat/test-brm_data_chronologize.R
@@ -0,0 +1,25 @@
+test_that("brm_data_chronologize() with class", {
+  data <- brm_simulate_outline(n_time = 12, n_patient = 4)
+  data$time <- gsub("_0", "_", data$time)
+  attr(data, "brm_reference_time") <- "time_1"
+  data$AVISITN <- as.integer(gsub("time_", "", data$time))
+  data[, c("time", "AVISITN")]
+  data1 <- brm_data_chronologize(data, time = "time", order = "AVISITN")
+  levels <- paste0("time_", seq_len(12))
+  expect_equal(as.character(sort(unique(data1$time))), levels)
+  data2 <- brm_data_chronologize(data, time = "time", levels = levels)
+  expect_equal(as.character(sort(unique(data2$time))), levels)
+})
+
+test_that("brm_data_chronologize() without class", {
+  data <- brm_simulate_outline(n_time = 12, n_patient = 4)
+  data <- tibble::as_tibble(data)
+  data$AVISIT <- gsub("_0", "_", data$time)
+  data$AVISITN <- as.integer(gsub("time_", "", data$time))
+  data[, c("AVISIT", "AVISITN")]
+  data1 <- brm_data_chronologize(data, time = "AVISIT", order = "AVISITN")
+  levels <- paste0("time_", seq_len(12))
+  expect_equal(as.character(sort(unique(data1$AVISIT))), levels)
+  data2 <- brm_data_chronologize(data, time = "AVISIT", levels = levels)
+  expect_equal(as.character(sort(unique(data2$AVISIT))), levels)
+})