From 6be59e9559be389170df57d3edc640925fd8a5cf Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Mon, 20 May 2024 21:01:33 +0300 Subject: [PATCH] Update Rd --- .github/workflows/R-CMD-check.yaml | 1 + DESCRIPTION | 1 + R/log_lik.R | 369 +++++++++++---------- R/loo-prediction.R | 1 + R/misc.R | 471 ++++++++++++++------------- R/pp_data.R | 273 ++++++++-------- R/predictive_error.R | 119 +++---- R/stan_gamm4.R | 152 ++++----- R/stan_glm.fit.R | 279 ++++++++-------- R/stanreg_list.R | 121 +++---- man/evaluate_log_survival.Rd | 2 +- man/evaluate_log_survival.default.Rd | 2 +- man/evaluate_log_survival.matrix.Rd | 2 +- man/extract_pars.Rd | 3 +- man/extract_pars.stanmvreg.Rd | 8 +- man/extract_pars.stansurv.Rd | 8 +- man/get_model_data.Rd | 11 +- man/get_model_data.stanmvreg.Rd | 10 +- man/get_model_data.stansurv.Rd | 10 +- man/linear_predictor.Rd | 2 +- man/linear_predictor.default.Rd | 2 +- man/linear_predictor.matrix.Rd | 2 +- man/linkinv.stanmvreg.Rd | 2 + man/ll_args.Rd | 12 +- man/ll_args.stanjm.Rd | 2 + man/ll_args.stanreg.Rd | 4 +- man/ll_args.stansurv.Rd | 4 +- man/log_lik.stanreg.Rd | 18 +- man/predictive_error.stanreg.Rd | 48 +-- man/psis.stanreg.Rd | 2 + man/rename_loos.Rd | 16 +- man/rename_loos.stanreg.Rd | 4 +- man/rename_loos.stanreg_list.Rd | 8 +- man/split2.Rd | 2 +- man/split2.matrix.Rd | 4 +- man/split2.vector.Rd | 2 +- man/stan_glm.Rd | 4 +- man/truncate.numeric.Rd | 4 +- man/unpad_reTrms.Rd | 3 +- man/unpad_reTrms.array.Rd | 4 +- man/unpad_reTrms.default.Rd | 2 + 41 files changed, 1017 insertions(+), 977 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index f2d353757..7698caf7b 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -35,6 +35,7 @@ jobs: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + TESTTHAT_CPUS: 4 steps: - uses: n1hility/cancel-previous-runs@v2 diff --git a/DESCRIPTION b/DESCRIPTION index eaf27a20d..d68409ba2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,3 +80,4 @@ NeedsCompilation: yes URL: https://mc-stan.org/rstanarm/, https://discourse.mc-stan.org BugReports: https://github.com/stan-dev/rstanarm/issues RoxygenNote: 7.3.1 +Config/testthat/parallel: true diff --git a/R/log_lik.R b/R/log_lik.R index 049528cf5..b30443dab 100644 --- a/R/log_lik.R +++ b/R/log_lik.R @@ -21,7 +21,7 @@ #' \eqn{S} by \eqn{N} pointwise log-likelihood matrix, where \eqn{S} is the size #' of the posterior sample and \eqn{N} is the number of data points, or in the #' case of the \code{stanmvreg} method (when called on \code{\link{stan_jm}} -#' model objects) an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number +#' model objects) an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number #' of individuals. #' #' @aliases log_lik @@ -36,12 +36,12 @@ #' @param offset A vector of offsets. Only required if \code{newdata} is #' specified and an \code{offset} was specified when fitting the model. #' -#' @return For the \code{stanreg} and \code{stanmvreg} methods an \eqn{S} by -#' \eqn{N} matrix, where \eqn{S} is the size of the posterior sample and -#' \eqn{N} is the number of data points. For the \code{stanjm} method +#' @return For the \code{stanreg} and \code{stanmvreg} methods an \eqn{S} by +#' \eqn{N} matrix, where \eqn{S} is the size of the posterior sample and +#' \eqn{N} is the number of data points. For the \code{stanjm} method #' an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number of individuals. -#' -#' +#' +#' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' \donttest{ @@ -73,19 +73,19 @@ log_lik.stanreg <- function(object, newdata = NULL, offset = NULL, ...) { newdata <- validate_newdata(object, newdata, m = NULL) calling_fun <- as.character(sys.call(-1))[1] dots <- list(...) - + if (is.stanmvreg(object)) { m <- dots[["m"]]; if (is.null(m)) STOP_arg_required_for_stanmvreg(m) } else { m <- NULL } - + newdata <- validate_newdata(object, newdata = newdata, m = m) if (is.stansurv(object)) { args <- ll_args.stansurv(object, newdata = newdata, ...) } else { - args <- ll_args.stanreg(object, newdata = newdata, offset = offset, - reloo_or_kfold = calling_fun %in% c("kfold", "reloo"), + args <- ll_args.stanreg(object, newdata = newdata, offset = offset, + reloo_or_kfold = calling_fun %in% c("kfold", "reloo"), ...) } @@ -98,7 +98,7 @@ log_lik.stanreg <- function(object, newdata = NULL, offset = NULL, ...) { FUN = function(i) { as.vector(fun( draws = args$draws, - data_i = args$data[args$data$cids == + data_i = args$data[args$data$cids == unique(args$data$cids)[i], , drop = FALSE] )) } @@ -138,7 +138,7 @@ log_lik.stanreg <- function(object, newdata = NULL, offset = NULL, ...) { #' @export #' @templateVar mArg m #' @template args-m -#' +#' log_lik.stanmvreg <- function(object, m = 1, newdata = NULL, ...) { validate_stanmvreg_object(object) out <- log_lik.stanreg(object, newdata = newdata, m = m, ...) @@ -147,16 +147,16 @@ log_lik.stanmvreg <- function(object, m = 1, newdata = NULL, ...) { #' @rdname log_lik.stanreg #' @export -#' @param newdataLong,newdataEvent Optional data frames containing new data -#' (e.g. holdout data) to use when evaluating the log-likelihood for a -#' model estimated using \code{\link{stan_jm}}. If the fitted model +#' @param newdataLong,newdataEvent Optional data frames containing new data +#' (e.g. holdout data) to use when evaluating the log-likelihood for a +#' model estimated using \code{\link{stan_jm}}. If the fitted model #' was a multivariate joint model (i.e. more than one longitudinal outcome), -#' then \code{newdataLong} is allowed to be a list of data frames. If supplying +#' then \code{newdataLong} is allowed to be a list of data frames. If supplying #' new data, then \code{newdataEvent} should also include variables corresponding #' to the event time and event indicator as these are required for evaluating the -#' log likelihood for the event submodel. For more details, see the description +#' log likelihood for the event submodel. For more details, see the description #' of \code{newdataLong} and \code{newdataEvent} for \code{\link{posterior_survfit}}. -#' +#' log_lik.stanjm <- function(object, newdataLong = NULL, newdataEvent = NULL, ...) { if (!used.sampling(object)) STOP_sampling_only("Pointwise log-likelihood matrix") @@ -169,11 +169,11 @@ log_lik.stanjm <- function(object, newdataLong = NULL, newdataEvent = NULL, ...) stop("Both newdataLong and newdataEvent must be supplied together.") if (!is.null(newdataLong)) { newdatas <- validate_newdatas(object, newdataLong, newdataEvent) - newdataLong <- newdatas[1:M] + newdataLong <- newdatas[1:M] newdataEvent <- newdatas[["Event"]] } pars <- extract_pars(object) # full array of draws - data <- .pp_data_jm(object, newdataLong, newdataEvent) + data <- .pp_data_jm(object, newdataLong, newdataEvent) calling_fun <- as.character(sys.call(-1))[1] reloo_or_kfold <- calling_fun %in% c("kfold", "reloo") val <- .ll_jm(object, data, pars, reloo_or_kfold = reloo_or_kfold, ...) @@ -192,24 +192,19 @@ ll_fun <- function(x, m = NULL) { return(.ll_surv_i) } else if (!is(f, "family") || is_scobit(x)) return(.ll_polr_i) - else if (is_clogit(x)) + else if (is_clogit(x)) return(.ll_clogit_i) - else if (is.nlmer(x)) + else if (is.nlmer(x)) return(.ll_nlmer_i) - + fun <- paste0(".ll_", family(x, m = m)$family, "_i") get(fun, mode = "function") } #' get arguments needed for ll_fun #' @param object stanreg object -#' @param newdata same as posterior predict -#' @param offset vector of offsets (only required if model has offset term and -# newdata is specified) -#' @param m Integer specifying which submodel for stanmvreg objects -#' @param reloo_or_kfold logical. TRUE if ll_args is for reloo or kfold -#' @param ... For models without group-specific terms (i.e., not stan_[g]lmer), -#' if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to +#' @param ... For models without group-specific terms (i.e., not stan_[g]lmer), +#' if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to #' pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a #' workaround in case there are issues with newdata containing factors with #' only a single level. Or for stanmvreg objects, then ... can be used to pass @@ -227,8 +222,8 @@ ll_args <- function(object, ...) UseMethod("ll_args") # newdata is specified) #' @param m Integer specifying which submodel for stanmvreg objects #' @param reloo_or_kfold logical. TRUE if ll_args is for reloo or kfold -#' @param ... For models without group-specific terms (i.e., not stan_[g]lmer), -#' if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to +#' @param ... For models without group-specific terms (i.e., not stan_[g]lmer), +#' if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to #' pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a #' workaround in case there are issues with newdata containing factors with #' only a single level. Or for stanmvreg objects, then ... can be used to pass @@ -237,15 +232,15 @@ ll_args <- function(object, ...) UseMethod("ll_args") #' @return a named list with elements data, draws, S (posterior sample size) and #' N = number of observations #' @export -ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, +ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, reloo_or_kfold = FALSE, ...) { validate_stanreg_object(object) f <- family(object, m = m) draws <- nlist(f) has_newdata <- !is.null(newdata) - + dots <- list(...) - + z_betareg <- NULL if (has_newdata && reloo_or_kfold && !is.mer(object)) { x <- dots$newx @@ -273,11 +268,11 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, if (is.stanmvreg(object) && !is.null(dots$stanmat)) { stanmat <- dots$stanmat # potentially use a stanmat with a single draw } - + if (!is.null(object$dropped_cols)) { x <- x[, !(colnames(x) %in% object$dropped_cols), drop = FALSE] } - + if (!is_polr(object)) { # not polr or scobit model fname <- f$family if (is.nlmer(object)) { @@ -290,7 +285,7 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, } data$i_ <- seq_len(nrow(data)) # for nlmer need access to i inside .ll_nlmer_i return(nlist(data, draws, S = NROW(draws$mu), N = nrow(data))) - + } else if (!is.binomial(fname)) { data <- data.frame(y, x) if (!is.null(z_betareg)) { @@ -310,26 +305,26 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, trials <- 1L } else { trials <- 1 - if (is.factor(y)) + if (is.factor(y)) y <- fac2bin(y) stopifnot(all(y %in% c(0, 1))) } data <- data.frame(y, trials, x) } - nms <- if (is.stanmvreg(object)) + nms <- if (is.stanmvreg(object)) collect_nms(colnames(stanmat), - M = get_M(object), - stub = get_stub(object)) else NULL + M = get_M(object), + stub = get_stub(object)) else NULL beta_sel <- if (is.null(nms)) seq_len(ncol(x)) else nms$y[[m]] draws$beta <- stanmat[, beta_sel, drop = FALSE] m_stub <- get_m_stub(m, stub = get_stub(object)) - if (is.gaussian(fname)) + if (is.gaussian(fname)) draws$sigma <- stanmat[, paste0(m_stub, "sigma")] - if (is.gamma(fname)) + if (is.gamma(fname)) draws$shape <- stanmat[, paste0(m_stub, "shape")] - if (is.ig(fname)) + if (is.ig(fname)) draws$lambda <- stanmat[, paste0(m_stub, "lambda")] - if (is.nb(fname)) + if (is.nb(fname)) draws$size <- stanmat[, paste0(m_stub, "reciprocal_dispersion")] if (is.beta(fname)) { draws$f_phi <- object$family_phi @@ -339,7 +334,7 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, } else { if (has_newdata) { if (!is.null(z_betareg)) { - colnames(data) <- c("y", colnames(get_x(object)), + colnames(data) <- c("y", colnames(get_x(object)), paste0("(phi)_", colnames(z_betareg))) } } else { @@ -360,26 +355,26 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, } data <- data.frame(y, x) draws$beta <- stanmat[, colnames(x), drop = FALSE] - zetas <- grep(pattern = if (length(unique(y)) == 2L) "(Intercept)" else "|", - x = colnames(stanmat), + zetas <- grep(pattern = if (length(unique(y)) == 2L) "(Intercept)" else "|", + x = colnames(stanmat), fixed = TRUE, value = TRUE) draws$zeta <- stanmat[, zetas, drop = FALSE] draws$max_y <- max(y) - if ("alpha" %in% colnames(stanmat)) { + if ("alpha" %in% colnames(stanmat)) { stopifnot(is_scobit(object)) # scobit draws$alpha <- stanmat[, "alpha"] draws$f <- object$method } } - + data$offset <- if (has_newdata) offset else object$offset if (model_has_weights(object)) { - if (is.stanmvreg(object)) + if (is.stanmvreg(object)) STOP_if_stanmvreg("posterior_survfit with weights") data$weights <- object$weights } - + if (is.mer(object)) { b_sel <- if (is.null(nms)) b_names(colnames(stanmat)) else nms$y_b[[m]] b <- stanmat[, b_sel, drop = FALSE] @@ -398,12 +393,12 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, data <- cbind(data, as.matrix(z)[1:NROW(x),, drop = FALSE]) draws$beta <- cbind(draws$beta, b) } - + if (is_clogit(object)) { data$strata <- strata out <- nlist(data, draws, S = NROW(draws$beta), N = nlevels(strata)) } else { - out <- nlist(data, draws, S = NROW(draws$beta), N = nrow(data)) + out <- nlist(data, draws, S = NROW(draws$beta), N = nrow(data)) } return(out) } @@ -411,8 +406,8 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, #' get arguments needed for ll_fun #' @param object stansurv object #' @param newdata same as posterior predict -#' @param ... For models without group-specific terms (i.e., not stan_[g]lmer), -#' if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to +#' @param ... For models without group-specific terms (i.e., not stan_[g]lmer), +#' if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to #' pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a #' workaround in case there are issues with newdata containing factors with #' only a single level. Or for stanmvreg objects, then ... can be used to pass @@ -422,34 +417,34 @@ ll_args.stanreg <- function(object, newdata = NULL, offset = NULL, m = NULL, #' N = number of observations #' @export ll_args.stansurv <- function(object, newdata = NULL, ...) { - + validate_stansurv_object(object) if (is.null(newdata)) { newdata <- get_model_data(object) } newdata <- as.data.frame(newdata) - + # response, ie. a Surv object form <- as.formula(formula(object)) y <- eval(form[[2L]], newdata) - + # outcome, ie. time variables and status indicator t_beg <- make_t(y, type = "beg") # entry time t_end <- make_t(y, type = "end") # exit time t_upp <- make_t(y, type = "upp") # upper time for interval censoring status <- make_d(y) - if (any(status < 0 | status > 3)) + if (any(status < 0 | status > 3)) stop2("Invalid status indicator in Surv object.") - + # delayed entry indicator for each row of data delayed <- as.logical(!t_beg == 0) - + # we reconstruct the design matrices even if no newdata, since it is # too much of a pain to store everything in the fitted model object # (e.g. w/ delayed entry, interval censoring, quadrature points, etc) pp <- pp_data(object, newdata, times = t_end) - + # returned object depends on quadrature if (object$has_quadrature) { pp_qpts_beg <- pp_data(object, newdata, times = t_beg, at_quadpoints = TRUE) @@ -472,7 +467,7 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { data <- data.frame(cids, t_beg, t_end, t_upp, status, delayed) data <- cbind(data, x) } - + # also evaluate random effects structure if relevant if (object$has_bars) { z <- t(pp$z$Zt) @@ -499,20 +494,20 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { draws$has_tve <- pp$has_tve draws$has_bars <- pp$has_bars draws$qnodes <- pp$qnodes - + out <- nlist(data, draws, S = NROW(draws$beta), N = n_distinct(cids)) return(out) } # check intercept for polr models ----------------------------------------- -# Check if a model fit with stan_polr has an intercept (i.e. if it's actually a +# Check if a model fit with stan_polr has an intercept (i.e. if it's actually a # bernoulli model). If it doesn't have an intercept then the intercept column in # x is dropped. This is only necessary if newdata is specified because otherwise # the correct x is taken from the fitted model object. .validate_polr_x <- function(object, x) { x0 <- get_x(object) - has_intercept <- colnames(x0)[1L] == "(Intercept)" + has_intercept <- colnames(x0)[1L] == "(Intercept)" if (!has_intercept && colnames(x)[1L] == "(Intercept)") x <- x[, -1L, drop = FALSE] x @@ -525,7 +520,7 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { val } else { val * w - } + } } .xdata <- function(data) { @@ -538,7 +533,7 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { } # for stan_betareg only -.xdata_beta <- function(data) { +.xdata_beta <- function(data) { sel <- c("y", "weights","offset", "trials") data[, -c(which(colnames(data) %in% sel), grep("(phi)_", colnames(data), fixed = TRUE))] } @@ -556,17 +551,17 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { } # for stan_surv only -.xdata_surv <- function(data) { +.xdata_surv <- function(data) { nms <- colnames(data) sel <- grep("^x__", nms) data[, sel] } -.sdata_surv <- function(data) { +.sdata_surv <- function(data) { nms <- colnames(data) sel <- grep("^s__", nms) data[, sel] } -.zdata_surv <- function(data) { +.zdata_surv <- function(data) { nms <- colnames(data) sel <- grep("^z__", nms) data[, sel] @@ -595,15 +590,15 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { .weighted(val, data_i$weights) } .ll_Gamma_i <- function(data_i, draws) { - val <- dgamma(data_i$y, shape = draws$shape, + val <- dgamma(data_i$y, shape = draws$shape, rate = draws$shape / .mu(data_i,draws), log = TRUE) .weighted(val, data_i$weights) } .ll_inverse.gaussian_i <- function(data_i, draws) { mu <- .mu(data_i, draws) - val <- 0.5 * log(draws$lambda / (2 * pi)) - + val <- 0.5 * log(draws$lambda / (2 * pi)) - 1.5 * log(data_i$y) - - 0.5 * draws$lambda * (data_i$y - mu)^2 / + 0.5 * draws$lambda * (data_i$y - mu)^2 / (data_i$y * mu^2) .weighted(val, data_i$weights) } @@ -619,7 +614,7 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { } else if (y_i == J) { val <- log1p(-linkinv(draws$zeta[, J-1] - eta)) } else { - val <- log(linkinv(draws$zeta[, y_i] - eta) - + val <- log(linkinv(draws$zeta[, y_i] - eta) - linkinv(draws$zeta[, y_i - 1L] - eta)) } } else { @@ -649,38 +644,38 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { } .ll_surv_i <- function(data_i, draws) { - + # fixed effects (time-fixed) part of linear predictor eta <- linear_predictor(draws$beta, .xdata_surv(data_i)) - + # fixed effects (time-varying) part of linear predictor if (draws$has_tve) { eta <- eta + linear_predictor(draws$beta_tve, .sdata_surv(data_i)) } - + # random effects part of linear predictor if (draws$has_bars) { eta <- eta + linear_predictor(draws$b, .zdata_surv(data_i)) - } - + } + # convert linear predictor to log acceleration factor for AFT eta <- switch(get_basehaz_name(draws$basehaz), "exp-aft" = sweep(eta, 1L, -1, `*`), "weibull-aft" = sweep(eta, 1L, -as.vector(draws$aux), `*`), - eta) - + eta) + if (draws$has_quadrature) { - + qnodes <- draws$qnodes status <- data_i[1L, "status"] delayed <- data_i[1L, "delayed"] - + # row indexing of quadrature points in data_i idx_epts <- 1 idx_qpts_beg <- 1 + (qnodes * 0) + (1:qnodes) idx_qpts_end <- 1 + (qnodes * 1) + (1:qnodes) idx_qpts_upp <- 1 + (qnodes * 2) + (1:qnodes) - + # arguments to be used later in evaluating log baseline hazard args <- list(times = data_i$cpts, basehaz = draws$basehaz, @@ -689,7 +684,7 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { # evaluate log hazard lhaz <- eta + do.call(evaluate_log_basehaz, args) - + # evaluate log likelihood if (status == 1) { # uncensored @@ -699,21 +694,21 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { qnodes = qnodes, qwts = data_i$cwts[idx_qpts_end]) ll <- lhaz_epts + lsurv - } else if (status == 0) { + } else if (status == 0) { # right censored lhaz_qpts_end <- lhaz[, idx_qpts_end, drop = FALSE] lsurv <- -quadrature_sum(exp(lhaz_qpts_end), qnodes = qnodes, qwts = data_i$cwts[idx_qpts_end]) ll <- lsurv - } else if (status == 2) { + } else if (status == 2) { # left censored lhaz_qpts_end <- lhaz[, idx_qpts_end, drop = FALSE] lsurv <- -quadrature_sum(exp(lhaz_qpts_end), qnodes = qnodes, qwts = data_i$cwts[idx_qpts_end]) ll <- log(1 - exp(lsurv)) # = log CDF - } else if (status == 3) { + } else if (status == 3) { # interval censored lhaz_qpts_end <- lhaz[, idx_qpts_end, drop = FALSE] lsurv_lower <- -quadrature_sum(exp(lhaz_qpts_end), @@ -725,17 +720,17 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { qwts = data_i$cwts[idx_qpts_upp]) ll <- log(exp(lsurv_lower) - exp(lsurv_upper)) } - if (delayed) { + if (delayed) { # delayed entry lhaz_qpts_beg <- lhaz[, idx_qpts_beg, drop = FALSE] lsurv_beg <- -quadrature_sum(exp(lhaz_qpts_beg), qnodes = qnodes, qwts = data_i$cwts[idx_qpts_beg]) ll <- ll - lsurv_beg - } - + } + } else { # no quadrature - + status <- data_i$status delayed <- data_i$delayed @@ -743,25 +738,25 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { args <- list(basehaz = draws$basehaz, aux = draws$aux, intercept = draws$alpha) - + # evaluate log likelihood - if (status == 1) { + if (status == 1) { # uncensored args$times <- data_i$t_end lhaz <- do.call(evaluate_log_basehaz, args) + eta lsurv <- do.call(evaluate_log_basesurv, args) * exp(eta) ll <- lhaz + lsurv - } else if (status == 0) { + } else if (status == 0) { # right censored args$times <- data_i$t_end lsurv <- do.call(evaluate_log_basesurv, args) * exp(eta) ll <- lsurv - } else if (status == 2) { + } else if (status == 2) { # left censored args$times <- data_i$t_end lsurv <- do.call(evaluate_log_basesurv, args) * exp(eta) ll <- log(1 - exp(lsurv)) # = log CDF - } else if (status == 3) { + } else if (status == 3) { # interval censored args$times <- data_i$t_end lsurv_lower <- do.call(evaluate_log_basesurv, args) * exp(eta) @@ -769,13 +764,13 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { lsurv_upper <- do.call(evaluate_log_basesurv, args) * exp(eta) ll <- log(exp(lsurv_lower) - exp(lsurv_upper)) } - if (delayed) { + if (delayed) { # delayed entry args$times <- data_i$t_beg lsurv_beg <- do.call(evaluate_log_basesurv, args) * exp(eta) ll <- ll - lsurv_beg } - + } return(ll) } @@ -793,8 +788,9 @@ ll_args.stansurv <- function(object, newdata = NULL, ...) { #' @param pars Output from extract_pars #' @param m Integer specifying which submodel #' @param reloo_or_kfold logical. TRUE if ll_args is for reloo or kfold +#' @param ... Additional arguments passed to the method #' @export -ll_args.stanjm <- function(object, data, pars, m = 1, +ll_args.stanjm <- function(object, data, pars, m = 1, reloo_or_kfold = FALSE, ...) { validate_stanjm_object(object) if (model_has_weights(object)) @@ -810,9 +806,9 @@ ll_args.stanjm <- function(object, data, pars, m = 1, y <- data$y[[m]] x <- data$yX[[m]] z <- t(data$yZt[[m]]) - Z_names <- data$yZ_names[[m]] + Z_names <- data$yZ_names[[m]] offset <- data$yOffset[[m]] - } else { + } else { # for stan_mvmer models, log_lik is only ever called for # one submodel at a time, so data is for one submodel y <- data$y @@ -833,7 +829,7 @@ ll_args.stanjm <- function(object, data, pars, m = 1, y <- y[, 1L] } else { trials <- 1 - if (is.factor(y)) + if (is.factor(y)) y <- fac2bin(y) stopifnot(all(y %in% c(0, 1))) } @@ -842,17 +838,17 @@ ll_args.stanjm <- function(object, data, pars, m = 1, } else { dat <- data.frame(y, trials, x) } - } + } dat <- cbind(dat, as.matrix(z)) draws$beta <- stanmat[, nms$y[[m]], drop = FALSE] m_stub <- get_m_stub(m) - if (is.gaussian(fname)) + if (is.gaussian(fname)) draws$sigma <- stanmat[, paste0(m_stub, "sigma")] - if (is.gamma(fname)) + if (is.gamma(fname)) draws$shape <- stanmat[, paste0(m_stub, "shape")] - if (is.ig(fname)) + if (is.ig(fname)) draws$lambda <- stanmat[, paste0(m_stub, "lambda")] - if (is.nb(fname)) + if (is.nb(fname)) draws$size <- stanmat[, paste0(m_stub, "reciprocal_dispersion")] b <- stanmat[, nms$y_b[[m]], drop = FALSE] b <- pp_b_ord(b, Z_names) @@ -866,9 +862,9 @@ ll_args.stanjm <- function(object, data, pars, m = 1, # with elements $basehaz, $family, $assoc # @param data Output from .pp_data_jm # @param pars Output from extract_pars -# @param include_long A logical, if TRUE then the log likelihood for the +# @param include_long A logical, if TRUE then the log likelihood for the # longitudinal submodels are included in the log likelihood calculation. -# @param include_b A logical, if TRUE then the log likelihood for the random +# @param include_b A logical, if TRUE then the log likelihood for the random # effects distribution is also included in the log likelihood calculation. # @param sum A logical. If TRUE then the log likelihood is summed across all # individuals. If FALSE then the log likelihood is returned for each @@ -878,20 +874,20 @@ ll_args.stanjm <- function(object, data, pars, m = 1, # a logical specifying whether the function calling ll_jm was reloo or kfold. # @return Either a matrix, a vector or a scalar, depending on the input types # and whether sum is set to TRUE. -.ll_jm <- function(object, data, pars, include_long = TRUE, +.ll_jm <- function(object, data, pars, include_long = TRUE, include_b = FALSE, sum = FALSE, ...) { - + M <- get_M(object) - + # Log likelihood for event submodel ll_event <- .ll_survival(object, data, pars) - + # Log likelihoods for longitudinal submodels if (include_long) { - ll_long <- lapply(1:M, function(m) + ll_long <- lapply(1:M, function(m) .ll_long(object, data, pars, m = m, ...)) } - + # Log likelihood for random effects submodel # NB this is only used in the Metropolis algorithm in 'posterior_survfit' # when drawing random effects for new individuals. But it is not used @@ -913,7 +909,7 @@ ll_args.stanjm <- function(object, data, pars, m = 1, b <- as.vector(pp_b_ord(b, Z_names)) Sigma_id <- VarCorr(object, stanmat = pars$stanmat)[[id_var]] if (length(cnms) > 1L) { - b2_var <- grep(utils::glob2rx(id_var), names(cnms), + b2_var <- grep(utils::glob2rx(id_var), names(cnms), value = TRUE, invert = TRUE) Sigma_b2 <- VarCorr(object, stanmat = pars$stanmat)[[b2_var]] Sigma_list <- rep(list(Sigma_b2), data$Ni) @@ -927,17 +923,17 @@ ll_args.stanjm <- function(object, data, pars, m = 1, } else { Sigma <- Sigma_id } - ll_b <- -0.5 * (c(determinant(Sigma, logarithm = TRUE)$modulus) + + ll_b <- -0.5 * (c(determinant(Sigma, logarithm = TRUE)$modulus) + (b %*% chol2inv(chol(Sigma)) %*% b)[1] + length(b) * log(2 * pi)) } else { ll_b <- NULL } - + # Check the dimensions of the various components if (is.matrix(ll_event)) { # S * Npat matrices if (include_long) { mats <- unique(sapply(c(ll_long, list(ll_event)), is.matrix)) - dims <- unique(lapply(c(ll_long, list(ll_event)), dim)) + dims <- unique(lapply(c(ll_long, list(ll_event)), dim)) if ((length(dims) > 1L) || (length(mats) > 1L)) stop("Bug found: elements of 'll_long' should be same class and ", "dimension as 'll_event'.") @@ -953,8 +949,8 @@ ll_args.stanjm <- function(object, data, pars, m = 1, } if (include_b && !identical(length(ll_b), length(ll_event))) stop("Bug found: length of 'll_b' should be equal to length of 'll_event'.") - } - + } + # Sum the various components (long + event + random effects) if (include_long) { val <- Reduce('+', c(ll_long, list(ll_event))) @@ -962,18 +958,18 @@ ll_args.stanjm <- function(object, data, pars, m = 1, val <- ll_event } if (include_b && is.matrix(val)) { - val <- sweep(val, 2L, ll_b, `+`) + val <- sweep(val, 2L, ll_b, `+`) } else if (include_b && is.vector(val)) { val <- val + ll_b } - + # Return log likelihood for joint model if (!sum) { return(val) # S * Npat matrix or length Npat vector } else if (is.matrix(val)) { return(rowSums(val)) # length S vector } else { - return(sum(val)) # scalar + return(sum(val)) # scalar } } @@ -983,11 +979,11 @@ ll_args.stanjm <- function(object, data, pars, m = 1, # @param data Output from .pp_data_jm. # @param pars Output from extract_pars. # @param m Integer specifying the longitudinal submodel. -# @param reloo_or_kfold Logical specifying whether the call came from +# @param reloo_or_kfold Logical specifying whether the call came from # reloo or kfold. # @return An S*Npat matrix. .ll_long <- function(object, data, pars, m = 1, reloo_or_kfold = FALSE) { - args <- ll_args.stanjm(object, data, pars, m = m, + args <- ll_args.stanjm(object, data, pars, m = m, reloo_or_kfold = reloo_or_kfold) fun <- ll_fun(object, m = m) ll <- lapply(seq_len(args$N), function(j) as.vector( @@ -996,7 +992,7 @@ ll_args.stanjm <- function(object, data, pars, m = 1, # return S*Npat matrix by summing log-lik for y within each individual res <- apply(ll, 1L, function(row) tapply(row, data$flist[[m]], sum)) res <- if (is.vector(res) & (args$S > 1L)) cbind(res) else t(res) - return(res) + return(res) } # Return survival probability or log-likelihood for event submodel @@ -1004,38 +1000,38 @@ ll_args.stanjm <- function(object, data, pars, m = 1, # @param object A stanjm object. # @param data Output from .pp_data_jm. # @param pars Output from extract_pars. -# @param one_draw A logical specifying whether the parameters provided in the +# @param one_draw A logical specifying whether the parameters provided in the # pars argument are vectors for a single realisation of the parameter (e.g. # a single MCMC draw, or a posterior mean) (TRUE) or a stanmat array (FALSE). -# @param survprob A logical specifying whether to return the survival probability +# @param survprob A logical specifying whether to return the survival probability # (TRUE) or the log likelihood for the event submodel (FALSE). # @param An S by Npat matrix, or a length Npat vector, depending on the inputs -# (where S is the size of the posterior sample and Npat is the number of +# (where S is the size of the posterior sample and Npat is the number of # individuals). .ll_survival <- function(object, data, pars, one_draw = FALSE, survprob = FALSE) { basehaz <- object$basehaz family <- object$family - assoc <- object$assoc + assoc <- object$assoc etimes <- attr(data$assoc_parts, "etimes") estatus <- attr(data$assoc_parts, "estatus") qnodes <- attr(data$assoc_parts, "qnodes") qtimes <- attr(data$assoc_parts, "qtimes") - qwts <- attr(data$assoc_parts, "qwts") + qwts <- attr(data$assoc_parts, "qwts") times <- c(etimes, qtimes) - - # To avoid an error in log(times) replace times equal to zero with a small + + # To avoid an error in log(times) replace times equal to zero with a small # non-zero value. Note that these times correspond to individuals where the, - # event time (etimes) was zero, and therefore the cumhaz (at baseline) will - # be forced to zero for these individuals further down in the code anyhow. - times[times == 0] <- 1E-10 - + # event time (etimes) was zero, and therefore the cumhaz (at baseline) will + # be forced to zero for these individuals further down in the code anyhow. + times[times == 0] <- 1E-10 + # Linear predictor for the survival submodel - e_eta <- linear_predictor(pars$ebeta, data$eXq) - + e_eta <- linear_predictor(pars$ebeta, data$eXq) + # Scaling parameter for linear predictor assoc_as_list <- apply(assoc, 2L, c) scale_assoc <- validate_scale_assoc(object$scale_assoc, assoc_as_list) - + # Add on contribution from assoc structure if (length(pars$abeta)) { M <- get_M(object) @@ -1050,26 +1046,26 @@ ll_args.stanjm <- function(object, data, pars, m = 1, pp_b_ord(if (is.matrix(b_m)) b_m else t(b_m), Z_names_m) }) if (one_draw) { - aXq <- make_assoc_terms(parts = data$assoc_parts, assoc = assoc, + aXq <- make_assoc_terms(parts = data$assoc_parts, assoc = assoc, family = family, beta = pars$beta, b = pars$b) e_eta <- e_eta + scale_assoc * linear_predictor.default(pars$abeta, aXq) } else { - aXq <- make_assoc_terms(parts = data$assoc_parts, assoc = assoc, + aXq <- make_assoc_terms(parts = data$assoc_parts, assoc = assoc, family = family, beta = pars$beta, b = pars$b) for (k in 1:length(aXq)) { e_eta <- e_eta + scale_assoc[k] * sweep(aXq[[k]], 1L, pars$abeta[,k], `*`) } - } + } } - + # Log baseline hazard at etimes (if not NULL) and qtimes - log_basehaz <- evaluate_log_basehaz2(times = times, - basehaz = basehaz, + log_basehaz <- evaluate_log_basehaz2(times = times, + basehaz = basehaz, coefs = pars$bhcoef) - + # Log hazard at etimes (if not NULL) and qtimes - log_haz <- log_basehaz + e_eta - + log_haz <- log_basehaz + e_eta + # Extract log hazard at qtimes only if (is.vector(log_haz)) { q_log_haz <- tail(log_haz, length(qtimes)) @@ -1077,19 +1073,19 @@ ll_args.stanjm <- function(object, data, pars, m = 1, sel_cols <- tail(1:ncol(log_haz), length(qtimes)) q_log_haz <- log_haz[, sel_cols, drop = FALSE] } - + # Evaluate log survival log_surv <- evaluate_log_survival(log_haz = q_log_haz, qnodes = qnodes, qwts = qwts) - + # Force surv prob to 1 (ie. log surv prob to 0) if evaluating # at time t = 0; this avoids possible numerical errors log_surv[etimes == 0] <- 0 - + # Possibly return surv prob at time t (upper limit of integral) if (survprob) - return(exp(log_surv)) - + return(exp(log_surv)) + # Otherwise return log likelihood at time t if (is.null(etimes) || is.null(estatus)) stop("'etimes' and 'estatus' cannot be NULL if 'survprob = FALSE'.") @@ -1105,7 +1101,7 @@ ll_args.stanjm <- function(object, data, pars, m = 1, e_log_haz <- log_haz[, 1:length(etimes), drop = FALSE] return(sweep(e_log_haz, 2L, estatus, `*`) + log_surv) } -} +} # Evaluate the log baseline hazard at the specified times # given the vector or matrix of MCMC draws for the baseline @@ -1117,12 +1113,12 @@ ll_args.stanjm <- function(object, data, pars, m = 1, # @return A vector or matrix, depending on the input type of coefs. evaluate_log_basehaz2 <- function(times, basehaz, coefs) { type <- basehaz$type_name - if (type == "weibull") { + if (type == "weibull") { X <- log(times) # log times B1 <- log(coefs) # log shape B2 <- coefs - 1 # shape - 1 log_basehaz <- as.vector(B1) + linear_predictor(B2,X) - } else if (type == "bs") { + } else if (type == "bs") { X <- predict(basehaz$bs_basis, times) # b-spline basis B <- coefs # b-spline coefs log_basehaz <- linear_predictor(B,X) @@ -1140,7 +1136,7 @@ evaluate_log_basehaz2 <- function(times, basehaz, coefs) { #' individual, evaluated at each of the quadrature points. The #' vector should be ordered such that the first N elements contain #' the log_haz evaluated for each individual at quadrature point 1, -#' then the next N elements are the log_haz evaluated for each +#' then the next N elements are the log_haz evaluated for each #' individual at quadrature point 2, and so on. #' @param qnodes Integer specifying the number of quadrature nodes #' at which the log hazard was evaluated for each individual. @@ -1158,7 +1154,7 @@ evaluate_log_survival <- function(log_haz, qnodes, qwts) { #' individual, evaluated at each of the quadrature points. The #' vector should be ordered such that the first N elements contain #' the log_haz evaluated for each individual at quadrature point 1, -#' then the next N elements are the log_haz evaluated for each +#' then the next N elements are the log_haz evaluated for each #' individual at quadrature point 2, and so on. #' @param qnodes Integer specifying the number of quadrature nodes #' at which the log hazard was evaluated for each individual. @@ -1169,7 +1165,7 @@ evaluate_log_survival.default <- function(log_haz, qnodes, qwts) { # convert log hazard to hazard haz <- exp(log_haz) # apply GK quadrature weights - weighted_haz <- qwts * haz + weighted_haz <- qwts * haz # sum quadrature points for each individual to get cum_haz splitting_vec <- rep(1:qnodes, each = length(haz) / qnodes) cum_haz <- Reduce('+', split(weighted_haz, splitting_vec)) @@ -1185,7 +1181,7 @@ evaluate_log_survival.default <- function(log_haz, qnodes, qwts) { #' individual, evaluated at each of the quadrature points. The #' vector should be ordered such that the first N elements contain #' the log_haz evaluated for each individual at quadrature point 1, -#' then the next N elements are the log_haz evaluated for each +#' then the next N elements are the log_haz evaluated for each #' individual at quadrature point 2, and so on. #' @param qnodes Integer specifying the number of quadrature nodes #' at which the log hazard was evaluated for each individual. @@ -1201,11 +1197,11 @@ evaluate_log_survival.matrix <- function(log_haz, qnodes, qwts) { cum_haz <- Reduce('+', array2list(weighted_haz, nsplits = qnodes)) # return: -cum_haz == log survival probability -cum_haz -} +} #------------- -# Evaluate the log baseline hazard at the specified times given the +# Evaluate the log baseline hazard at the specified times given the # vector or matrix of MCMC draws for the baseline hazard parameters # # @param times A vector of times. @@ -1252,7 +1248,7 @@ log_basehaz_pw <- function(x, coefs, knots) { linear_predictor(coefs, dummy_matrix(x, knots = knots)) } -evaluate_log_haz <- function(times, basehaz, betas, betas_tve, b = NULL, aux, +evaluate_log_haz <- function(times, basehaz, betas, betas_tve, b = NULL, aux, intercept = NULL, x, s = NULL, z = NULL) { eta <- linear_predictor(betas, x) if ((!is.null(s)) && ncol(s)) @@ -1265,19 +1261,19 @@ evaluate_log_haz <- function(times, basehaz, betas, betas_tve, b = NULL, aux, eta <- switch(get_basehaz_name(basehaz), "exp-aft" = sweep(eta, 1L, -1, `*`), "weibull-aft" = sweep(eta, 1L, -as.vector(aux), `*`), - eta) + eta) args <- nlist(times, basehaz, aux, intercept) do.call(evaluate_log_basehaz, args) + eta } evaluate_basehaz <- function(times, basehaz, aux, intercept = NULL) { - exp(evaluate_log_basehaz(times = times, basehaz = basehaz, + exp(evaluate_log_basehaz(times = times, basehaz = basehaz, aux = aux, intercept = intercept)) } #------------- -# Evaluate the log baseline survival at the specified times given the +# Evaluate the log baseline survival at the specified times given the # vector or matrix of MCMC draws for the baseline hazard parameters # # @param times A vector of times. @@ -1315,7 +1311,7 @@ log_basesurv_ms <- function(x, coefs, basis, intercept) { linear_predictor(coefs, basis_matrix(x, basis = basis, integrate = TRUE)) } -evaluate_log_surv <- function(times, basehaz, betas, b = NULL, aux, +evaluate_log_surv <- function(times, basehaz, betas, b = NULL, aux, intercept = NULL, x, z = NULL, ...) { eta <- linear_predictor(betas, x) if (!is.null(z$Zt) && ncol(z$Zt)) { @@ -1334,17 +1330,17 @@ evaluate_log_surv <- function(times, basehaz, betas, b = NULL, aux, #--------------- #' Apply quadrature weights and sum over nodes -#' +#' #' @param x Inputs #' @param qnodes Nodes #' @param qwts Quadrature weights #' @export -quadrature_sum <- function(x, qnodes, qwts) { - UseMethod("quadrature_sum") +quadrature_sum <- function(x, qnodes, qwts) { + UseMethod("quadrature_sum") } #' Apply quadrature weights and sum over nodes -#' +#' #' @param x Inputs #' @param qnodes Nodes #' @param qwts Quadrature weights @@ -1356,7 +1352,7 @@ quadrature_sum.default <- function(x, qnodes, qwts) { } #' Apply quadrature weights and sum over nodes -#' +#' #' @param x Inputs #' @param qnodes Nodes #' @param qwts Quadrature weights @@ -1364,7 +1360,7 @@ quadrature_sum.default <- function(x, qnodes, qwts) { quadrature_sum.matrix <- function(x, qnodes, qwts) { weighted_x <- sweep_multiply(x, qwts, margin = 2L) # apply quadrature weights splitted_x <- array2list(weighted_x, nsplits = qnodes) # split at each quad node - Reduce('+', splitted_x) # sum over the quad nodes + Reduce('+', splitted_x) # sum over the quad nodes } #' Split a vector or matrix into a specified number of segments and return @@ -1373,10 +1369,10 @@ quadrature_sum.matrix <- function(x, qnodes, qwts) { #' #' @param x A vector or matrix. #' @param n_segments Integer specifying the number of segments. -#' @param bycol Logical, should a matrix be split along the column or row margin? +#' @param ... Additional arguments passed to the method. #' @return A list with n_segments elements. -split2 <- function(x, n_segments = 1, ...) { - UseMethod("split2") +split2 <- function(x, n_segments = 1, ...) { + UseMethod("split2") } #' Split a vector or matrix into a specified number of segments and return @@ -1385,11 +1381,11 @@ split2 <- function(x, n_segments = 1, ...) { #' #' @param x A vector #' @param n_segments Integer specifying the number of segments. -#' @param bycol Logical, should a matrix be split along the column or row margin? +#' @param ... Additional arguments passed to the method. #' @export split2.vector <- function(x, n_segments = 1, ...) { len <- length(x) - segment_length <- len %/% n_segments + segment_length <- len %/% n_segments if (!len == (segment_length * n_segments)) stop("Dividing x by n_segments does not result in an integer.") split(x, rep(1:n_segments, each = segment_length)) @@ -1401,10 +1397,11 @@ split2.vector <- function(x, n_segments = 1, ...) { #' @param x A matrix #' @param n_segments Integer specifying the number of segments. #' @param bycol Logical, should a matrix be split along the column or row margin? +#' @param ... Additional arguments passed to the method. #' @export -split2.matrix <- function(x, n_segments = 1, bycol = TRUE) { +split2.matrix <- function(x, n_segments = 1, bycol = TRUE, ...) { len <- if (bycol) ncol(x) else nrow(x) - segment_length <- len %/% n_segments + segment_length <- len %/% n_segments if (!len == (segment_length * n_segments)) stop("Dividing x by n_segments does not result in an integer.") lapply(1:n_segments, function(k) { diff --git a/R/loo-prediction.R b/R/loo-prediction.R index ea840c846..3030bf9e0 100644 --- a/R/loo-prediction.R +++ b/R/loo-prediction.R @@ -159,6 +159,7 @@ loo_predictive_interval.stanreg <- #' #' @export #' @param log_ratios log_ratios for PSIS computation +#' @param ... Additional arguments passed to \code{\link[loo]{psis}} psis.stanreg <- function(log_ratios, ...) { object <- log_ratios message("Running PSIS to compute weights...") diff --git a/R/misc.R b/R/misc.R index b64c8c627..078c7fe53 100644 --- a/R/misc.R +++ b/R/misc.R @@ -1,25 +1,25 @@ # Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Logit and inverse logit -#' +#' #' @export -#' @param x Numeric vector. +#' @param x Numeric vector. #' @return A numeric vector the same length as \code{x}. logit <- function(x) stats::qlogis(x) @@ -27,7 +27,7 @@ logit <- function(x) stats::qlogis(x) #' @export invlogit <- function(x) stats::plogis(x) -# Set arguments for sampling +# Set arguments for sampling # # Prepare a list of arguments to use with \code{rstan::sampling} via # \code{do.call}. @@ -40,25 +40,25 @@ invlogit <- function(x) stats::plogis(x) # @param prior Prior distribution list (can be NULL). # @param ... Other arguments to \code{\link[rstan]{sampling}} not coming from # \code{user_dots} (e.g. \code{data}, \code{pars}, \code{init}, etc.) -# @return A list of arguments to use for the \code{args} argument for +# @return A list of arguments to use for the \code{args} argument for # \code{do.call(sampling, args)}. -set_sampling_args <- function(object, prior, user_dots = list(), +set_sampling_args <- function(object, prior, user_dots = list(), user_adapt_delta = NULL, ...) { args <- list(object = object, ...) unms <- names(user_dots) for (j in seq_along(user_dots)) { args[[unms[j]]] <- user_dots[[j]] } - defaults <- default_stan_control(prior = prior, + defaults <- default_stan_control(prior = prior, adapt_delta = user_adapt_delta) - - if (!"control" %in% unms) { + + if (!"control" %in% unms) { # no user-specified 'control' argument args$control <- defaults - } else { + } else { # user specifies a 'control' argument - if (!is.null(user_adapt_delta)) { - # if user specified adapt_delta argument to stan_* then + if (!is.null(user_adapt_delta)) { + # if user specified adapt_delta argument to stan_* then # set control$adapt_delta to user-specified value args$control$adapt_delta <- user_adapt_delta } else { @@ -71,26 +71,26 @@ set_sampling_args <- function(object, prior, user_dots = list(), } } args$save_warmup <- FALSE - + return(args) } # Default control arguments for sampling -# +# # Called by set_sampling_args to set the default 'control' argument for # \code{rstan::sampling} if none specified by user. This allows the value of # \code{adapt_delta} to depend on the prior. -# +# # @param prior Prior distribution list (can be NULL). # @param adapt_delta User's \code{adapt_delta} argument. # @param max_treedepth Default for \code{max_treedepth}. # @return A list with \code{adapt_delta} and \code{max_treedepth}. -default_stan_control <- function(prior, adapt_delta = NULL, +default_stan_control <- function(prior, adapt_delta = NULL, max_treedepth = 15L) { if (!length(prior)) { if (is.null(adapt_delta)) adapt_delta <- 0.95 } else if (is.null(adapt_delta)) { - adapt_delta <- switch(prior$dist, + adapt_delta <- switch(prior$dist, "R2" = 0.99, "hs" = 0.99, "hs_plus" = 0.99, @@ -103,7 +103,7 @@ default_stan_control <- function(prior, adapt_delta = NULL, # Test if an object inherits a specific stanreg class # -# @param x The object to test. +# @param x The object to test. is.stanreg <- function(x) inherits(x, "stanreg") is.stansurv <- function(x) inherits(x, "stansurv") is.stanmvreg <- function(x) inherits(x, "stanmvreg") @@ -117,35 +117,35 @@ is.mvmer <- function(x) isTRUE(x$stan_function %in% c("stan_jm", "stan_mvmer")) is.surv <- function(x) isTRUE(x$stan_function %in% c("stan_jm", "stan_surv")) # Throw error if object isn't a stanreg object -# +# # @param x The object to test. validate_stanreg_object <- function(x, call. = FALSE) { if (!is.stanreg(x)) - stop("Object is not a stanreg object.", call. = call.) + stop("Object is not a stanreg object.", call. = call.) } # Throw error if object isn't a stanmvreg object -# +# # @param x The object to test. validate_stanmvreg_object <- function(x, call. = FALSE) { if (!is.stanmvreg(x)) - stop("Object is not a stanmvreg object.", call. = call.) + stop("Object is not a stanmvreg object.", call. = call.) } # Throw error if object isn't a stanjm object -# +# # @param x The object to test. validate_stanjm_object <- function(x, call. = FALSE) { if (!is.stanjm(x)) - stop("Object is not a stanjm object.", call. = call.) + stop("Object is not a stanjm object.", call. = call.) } # Throw error if object isn't a stansurv object -# +# # @param x The object to test. validate_stansurv_object <- function(x, call. = FALSE) { if (!is.stansurv(x)) - stop("Object is not a stansurv object.", call. = call.) + stop("Object is not a stansurv object.", call. = call.) } # Test for a given family @@ -164,7 +164,7 @@ is_clogit <- function(object) { is(object, "clogit") } -# test if a stanreg object has class polr +# test if a stanreg object has class polr is_polr <- function(object) { is(object, "polr") } @@ -172,7 +172,7 @@ is_polr <- function(object) { # test if a stanreg object is a scobit model is_scobit <- function(object) { validate_stanreg_object(object) - if (!is_polr(object)) + if (!is_polr(object)) return(FALSE) return("alpha" %in% rownames(object$stan_summary)) } @@ -214,59 +214,59 @@ is.mer <- function(x) { is.nlmer <- function(x) { is.mer(x) && inherits(x, "nlmerMod") } -# Consistent error message to use when something is only available for +# Consistent error message to use when something is only available for # models fit using MCMC # # @param what An optional message to prepend to the default message. STOP_sampling_only <- function(what) { msg <- "only available for models fit using MCMC (algorithm='sampling')." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop(msg, call. = FALSE) } # Consistent error message to use when something is only available for models # fit using MCMC or VB but not optimization -# +# # @param what An optional message to prepend to the default message. STOP_not_optimizing <- function(what) { msg <- "not available for models fit using algorithm='optimizing'." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop(msg, call. = FALSE) } # Consistent error message to use when something is only available for models # fit using MCMC or optimization but not VB -# +# # @param what An optional message to prepend to the default message. STOP_not_VB <- function(what) { msg <- "not available for models fit using algorithm='meanfield|fullrank'." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop(msg, call. = FALSE) } -# Message to issue when fitting model with ADVI but 'QR=FALSE'. +# Message to issue when fitting model with ADVI but 'QR=FALSE'. recommend_QR_for_vb <- function() { message( - "Setting 'QR' to TRUE can often be helpful when using ", - "one of the variational inference algorithms. ", + "Setting 'QR' to TRUE can often be helpful when using ", + "one of the variational inference algorithms. ", "See the documentation for the 'QR' argument." ) } # Issue warning if high rhat values -# +# # @param rhats Vector of rhat values. -# @param threshold Threshold value. If any rhat values are above threshold a +# @param threshold Threshold value. If any rhat values are above threshold a # warning is issued. check_rhats <- function(rhats, threshold = 1.1, check_lp = FALSE) { if (!check_lp) rhats <- rhats[!names(rhats) %in% c("lp__", "log-posterior")] - - if (any(rhats > threshold, na.rm = TRUE)) - warning("Markov chains did not converge! Do not analyze results!", + + if (any(rhats > threshold, na.rm = TRUE)) + warning("Markov chains did not converge! Do not analyze results!", call. = FALSE, noBreaks. = TRUE) } @@ -278,46 +278,46 @@ array1D_check <- function(y) { if (length(dim(y)) == 1L) { nms <- rownames(y) dim(y) <- NULL - if (!is.null(nms)) + if (!is.null(nms)) names(y) <- nms } return(y) } -# Check for a binomial model with Y given as proportion of successes and weights +# Check for a binomial model with Y given as proportion of successes and weights # given as total number of trials -# +# binom_y_prop <- function(y, family, weights) { - if (!is.binomial(family$family)) + if (!is.binomial(family$family)) return(FALSE) - yprop <- NCOL(y) == 1L && - is.numeric(y) && - any(y > 0 & y < 1) && + yprop <- NCOL(y) == 1L && + is.numeric(y) && + any(y > 0 & y < 1) && !any(y < 0 | y > 1) if (!yprop) return(FALSE) - - wtrials <- !identical(weights, double(0)) && - all(weights > 0) && + + wtrials <- !identical(weights, double(0)) && + all(weights > 0) && all(abs(weights - round(weights)) < .Machine$double.eps^0.5) isTRUE(wtrials) } # Convert 2-level factor to 0/1 fac2bin <- function(y) { - if (!is.factor(y)) - stop("Bug found: non-factor as input to fac2bin.", + if (!is.factor(y)) + stop("Bug found: non-factor as input to fac2bin.", call. = FALSE) - if (!identical(nlevels(y), 2L)) - stop("Bug found: factor with nlevels != 2 as input to fac2bin.", + if (!identical(nlevels(y), 2L)) + stop("Bug found: factor with nlevels != 2 as input to fac2bin.", call. = FALSE) as.integer(y != levels(y)[1L]) } # Check weights argument -# +# # @param w The \code{weights} argument specified by user or the result of # calling \code{model.weights} on a model frame. # @return If no error is thrown then \code{w} is returned. @@ -325,14 +325,14 @@ validate_weights <- function(w) { if (missing(w) || is.null(w)) { w <- double(0) } else { - if (!is.numeric(w)) - stop("'weights' must be a numeric vector.", + if (!is.numeric(w)) + stop("'weights' must be a numeric vector.", call. = FALSE) - if (any(w < 0)) - stop("Negative weights are not allowed.", + if (any(w < 0)) + stop("Negative weights are not allowed.", call. = FALSE) } - + return(w) } @@ -361,13 +361,13 @@ validate_offset <- function(o, y) { # already a family) or the family object created from \code{f} is returned (if # \code{f} is a string or function). validate_family <- function(f) { - if (is.character(f)) + if (is.character(f)) f <- get(f, mode = "function", envir = parent.frame(2)) - if (is.function(f)) + if (is.function(f)) f <- f() - if (!is(f, "family")) + if (!is(f, "family")) stop("'family' must be a family.", call. = FALSE) - + return(f) } @@ -401,14 +401,14 @@ has_outcome_variable <- function(f) { # exceptions: constant variable of all 1's is allowed and outcomes with all 0s # or 1s are allowed (e.g., for binomial models) and survival outcomes (e.g., # incase all event indicators are 1s) -# +# # @param mf A model frame or model matrix # @return If no constant variables are found mf is returned, otherwise an error # is thrown. check_constant_vars <- function(mf) { mf1 <- mf if (NCOL(mf[, 1]) == 2 || all(mf[, 1] %in% c(0, 1)) || survival::is.Surv(mf[, 1])) { - mf1 <- mf[, -1, drop=FALSE] + mf1 <- mf[, -1, drop=FALSE] } lu1 <- function(x) !all(x == 1) && length(unique(x)) == 1 @@ -416,8 +416,8 @@ check_constant_vars <- function(mf) { sel <- !colnames(mf1) %in% nocheck is_constant <- apply(mf1[, sel, drop=FALSE], 2, lu1) if (any(is_constant)) { - stop("Constant variable(s) found: ", - paste(names(is_constant)[is_constant], collapse = ", "), + stop("Constant variable(s) found: ", + paste(names(is_constant)[is_constant], collapse = ", "), call. = FALSE) } return(mf) @@ -446,12 +446,12 @@ last_dimnames <- function(x) { # \code{fit$algorithm}). # @return Either \code{"50%"} or \code{"Median"} depending on \code{algorithm}. select_median <- function(algorithm) { - switch(algorithm, + switch(algorithm, sampling = "50%", meanfield = "50%", fullrank = "50%", optimizing = "Median", - stop("Bug found (incorrect algorithm name passed to select_median)", + stop("Bug found (incorrect algorithm name passed to select_median)", call. = FALSE)) } @@ -468,11 +468,11 @@ grep_for_pars <- function(x, regex_pars) { } stopifnot(is.character(regex_pars)) out <- unlist(lapply(seq_along(regex_pars), function(j) { - grep(regex_pars[j], rownames(x$stan_summary), value = TRUE) + grep(regex_pars[j], rownames(x$stan_summary), value = TRUE) })) if (!length(out)) stop("No matches for 'regex_pars'.", call. = FALSE) - + return(out) } @@ -482,11 +482,11 @@ grep_for_pars <- function(x, regex_pars) { # @param pars Character vector of parameter names # @param regex_pars Character vector of patterns collect_pars <- function(x, pars = NULL, regex_pars = NULL) { - if (is.null(pars) && is.null(regex_pars)) + if (is.null(pars) && is.null(regex_pars)) return(NULL) - if (!is.null(pars)) + if (!is.null(pars)) pars[pars == "varying"] <- "b" - if (!is.null(regex_pars)) + if (!is.null(regex_pars)) pars <- c(pars, grep_for_pars(x, regex_pars)) unique(pars) } @@ -515,10 +515,10 @@ posterior_sample_size <- function(x) { if (a == Inf) b else a } -# Maybe broadcast +# Maybe broadcast # # @param x A vector or scalar. -# @param n Number of replications to possibly make. +# @param n Number of replications to possibly make. # @return If \code{x} has no length the \code{0} replicated \code{n} times is # returned. If \code{x} has length 1, the \code{x} replicated \code{n} times # is returned. Otherwise \code{x} itself is returned. @@ -535,22 +535,22 @@ maybe_broadcast <- function(x, n) { # Create a named list using specified names or, if names are omitted, using the # names of the objects in the list # -# @param ... Objects to include in the list. +# @param ... Objects to include in the list. # @return A named list. nlist <- function(...) { m <- match.call() out <- list(...) no_names <- is.null(names(out)) has_name <- if (no_names) FALSE else nzchar(names(out)) - if (all(has_name)) + if (all(has_name)) return(out) nms <- as.character(m)[-1L] if (no_names) { names(out) <- nms } else { names(out)[!has_name] <- nms[!has_name] - } - + } + return(out) } @@ -565,18 +565,18 @@ nlist <- function(...) { # either \code{scale} or \code{default} is returned. set_prior_scale <- function(scale, default, link) { stopifnot(is.numeric(default), is.character(link) || is.null(link)) - if (is.null(scale)) + if (is.null(scale)) scale <- default if (isTRUE(link == "probit")) scale <- scale * dnorm(0) / dlogis(0) - + return(scale) } #' Methods for creating linear predictor #' -#' Make linear predictor vector from x and point estimates for beta, or linear +#' Make linear predictor vector from x and point estimates for beta, or linear #' predictor matrix from x and full posterior sample of beta. #' #' @param beta A vector or matrix or parameter estimates. @@ -590,7 +590,7 @@ linear_predictor <- function(beta, x, offset = NULL) { #' Methods for creating linear predictor #' -#' Make linear predictor vector from x and point estimates for beta, or linear +#' Make linear predictor vector from x and point estimates for beta, or linear #' predictor matrix from x and full posterior sample of beta. #' #' @param beta A vector or matrix or parameter estimates. @@ -602,13 +602,13 @@ linear_predictor.default <- function(beta, x, offset = NULL) { eta <- as.vector(if (NCOL(x) == 1L) x * beta else x %*% beta) if (length(offset)) eta <- eta + offset - + return(eta) } #' Methods for creating linear predictor #' -#' Make linear predictor vector from x and point estimates for beta, or linear +#' Make linear predictor vector from x and point estimates for beta, or linear #' predictor matrix from x and full posterior sample of beta. #' #' @param beta A vector or matrix or parameter estimates. @@ -617,10 +617,10 @@ linear_predictor.default <- function(beta, x, offset = NULL) { #' @return A vector or matrix. #' @export linear_predictor.matrix <- function(beta, x, offset = NULL) { - if (NCOL(beta) == 1L) + if (NCOL(beta) == 1L) beta <- as.matrix(beta) eta <- beta %*% t(x) - if (length(offset)) + if (length(offset)) eta <- sweep(eta, 2L, offset, `+`) return(eta) @@ -628,7 +628,7 @@ linear_predictor.matrix <- function(beta, x, offset = NULL) { #' Extract X, Y or Z from a stanreg object -#' +#' #' @keywords internal #' @export #' @templateVar stanregArg object @@ -687,7 +687,7 @@ get_z.stanmvreg <- function(object, m = NULL, ...) { } #' Extract survival response from a stansurv or stanjm object -#' +#' #' @keywords internal #' @export #' @param object A \code{stansurv} or \code{stanjm} object. @@ -695,17 +695,17 @@ get_z.stanmvreg <- function(object, m = NULL, ...) { #' @return A \code{Surv} object, see \code{?survival::Surv}. get_surv <- function(object, ...) UseMethod("get_surv") #' @export -get_surv.stansurv <- function(object, ...) { +get_surv.stansurv <- function(object, ...) { model.response(model.frame(object)) %ORifNULL% stop("response not found") } #' @export -get_surv.stanjm <- function(object, ...) { +get_surv.stanjm <- function(object, ...) { object$survmod$mod$y %ORifNULL% stop("response not found") } #' Get inverse link function #' -#' @param x A stanreg object, family object, or string. +#' @param x A stanreg object, family object, or string. #' @param ... Other arguments passed to methods. For a \code{stanmvreg} object #' this can be an integer \code{m} specifying the submodel. #' @return The inverse link function associated with x. @@ -714,7 +714,7 @@ linkinv <- function(x, ...) UseMethod("linkinv") #' Get inverse link function #' -#' @param x A stanreg object +#' @param x A stanreg object #' @param ... Other arguments passed to methods. For a \code{stanmvreg} object #' this can be an integer \code{m} specifying the submodel. #' @return The inverse link function associated with x. @@ -725,7 +725,8 @@ linkinv.stanreg <- function(x, ...) { #' Get inverse link function #' -#' @param x A stanmvreg object +#' @param x A stanmvreg object +#' @param m An integer specifying the submodel. #' @param ... Other arguments passed to methods. For a \code{stanmvreg} object #' this can be an integer \code{m} specifying the submodel. #' @return The inverse link function associated with x. @@ -738,7 +739,7 @@ linkinv.stanmvreg <- function(x, m = NULL, ...) { #' Get inverse link function #' -#' @param x A family object +#' @param x A family object #' @param ... Other arguments passed to methods. For a \code{stanmvreg} object #' this can be an integer \code{m} specifying the submodel. #' @return The inverse link function associated with x. @@ -770,15 +771,15 @@ polr_linkinv <- function(x) { } else if (is.character(x) && length(x) == 1L) { method <- x } else { - stop("'x' should be a stanreg object created by stan_polr ", + stop("'x' should be a stanreg object created by stan_polr ", "or a single string.") } - if (is.null(method) || method == "logistic") + if (is.null(method) || method == "logistic") method <- "logit" - + if (method == "loglog") return(pgumbel) - + make.link(method)$linkinv } @@ -789,7 +790,7 @@ make_stan_summary <- function(stanfit) { levs <- c(0.5, 0.8, 0.95) qq <- (1 - levs) / 2 probs <- sort(c(0.5, qq, 1 - qq)) - rstan::summary(stanfit, probs = probs, digits = 10)$summary + rstan::summary(stanfit, probs = probs, digits = 10)$summary } check_reTrms <- function(reTrms) { @@ -801,9 +802,9 @@ check_reTrms <- function(reTrms) { dupe <- reTrms$cnms[[i]] overlap <- dupe %in% original if (any(overlap)) - stop("rstanarm does not permit formulas with duplicate group-specific terms.\n", + stop("rstanarm does not permit formulas with duplicate group-specific terms.\n", "In this case ", nms[i], " is used as a grouping factor multiple times and\n", - dupe[overlap], " is included multiple times.\n", + dupe[overlap], " is included multiple times.\n", "Consider using || or -1 in your formulas to prevent this from happening.") } return(invisible(NULL)) @@ -816,14 +817,14 @@ make_glmerControl <- function(..., ignore_lhs = FALSE, ignore_x_scale = FALSE) { check.nlev.gtr.1 = "stop", check.nobs.vs.rankZ = "ignore", check.nobs.vs.nlev = "ignore", - check.nobs.vs.nRE = "ignore", + check.nobs.vs.nRE = "ignore", check.formula.LHS = if (ignore_lhs) "ignore" else "stop", check.scaleX = if (ignore_x_scale) "ignore" else "warning", - ...) + ...) } # Check if a fitted model (stanreg object) has weights -# +# # @param x stanreg object # @return Logical. Only TRUE if x$weights has positive length and the elements # of x$weights are not all the same. @@ -878,10 +879,10 @@ validate_data <- function(data, if_missing = NULL) { if (!is.data.frame(data)) { stop("'data' must be a data frame.", call. = FALSE) } - + # drop other classes (e.g. 'tbl_df', 'tbl', 'data.table') data <- as.data.frame(data) - + drop_redundant_dims(data) } @@ -889,8 +890,8 @@ validate_data <- function(data, if_missing = NULL) { warn_data_arg_missing <- function() { warning( "Omitting the 'data' argument is not recommended ", - "and may not be allowed in future versions of rstanarm. ", - "Some post-estimation functions (in particular 'update', 'loo', 'kfold') ", + "and may not be allowed in future versions of rstanarm. ", + "Some post-estimation functions (in particular 'update', 'loo', 'kfold') ", "are not guaranteed to work properly unless 'data' is specified as a data frame.", call. = FALSE ) @@ -898,8 +899,8 @@ warn_data_arg_missing <- function() { # Validate newdata argument for posterior_predict, log_lik, etc. # -# Checks for NAs in used variables only (but returns all variables), -# and also drops any unused dimensions in variables (e.g. a one column +# Checks for NAs in used variables only (but returns all variables), +# and also drops any unused dimensions in variables (e.g. a one column # matrix inside a data frame is converted to a vector). # # @param object stanreg object @@ -914,20 +915,20 @@ validate_newdata <- function(object, newdata = NULL, m = NULL) { if (!is.data.frame(newdata)) { stop("If 'newdata' is specified it must be a data frame.", call. = FALSE) } - + # drop other classes (e.g. 'tbl_df', 'tbl') newdata <- as.data.frame(newdata) if (nrow(newdata) == 0) { stop("If 'newdata' is specified it must have more than 0 rows.", call. = FALSE) } - + # only check for NAs in used variables vars <- all.vars(formula(object, m = m)) newdata_check <- newdata[, colnames(newdata) %in% vars, drop=FALSE] if (any(is.na(newdata_check))) { stop("NAs are not allowed in 'newdata'.", call. = FALSE) } - + if (ncol(newdata) > 0) { newdata <- drop_redundant_dims(newdata) } @@ -939,15 +940,15 @@ validate_newdata <- function(object, newdata = NULL, m = NULL) { #---------------------- for stan_{mvmer,jm} only ----------------------------- # Return a list (or vector if unlist = TRUE) which -# contains the embedded elements in list x named y -fetch <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, +# contains the embedded elements in list x named y +fetch <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, pad_length = NULL, unlist = FALSE) { ret <- lapply(x, `[[`, y) if (!is.null(z)) ret <- lapply(ret, `[[`, z) if (!is.null(zz)) ret <- lapply(ret, `[[`, zz) - if (null_to_zero) + if (null_to_zero) ret <- lapply(ret, function(i) ifelse(is.null(i), 0L, i)) if (!is.null(pad_length)) { padding <- rep(list(0L), pad_length - length(ret)) @@ -956,18 +957,18 @@ fetch <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, if (unlist) unlist(ret) else ret } # Wrapper for using fetch with unlist = TRUE -fetch_ <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, +fetch_ <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, pad_length = NULL) { - fetch(x = x, y = y, z = z, zz = zz, null_to_zero = null_to_zero, + fetch(x = x, y = y, z = z, zz = zz, null_to_zero = null_to_zero, pad_length = pad_length, unlist = TRUE) } -# Wrapper for using fetch with unlist = TRUE and +# Wrapper for using fetch with unlist = TRUE and # returning array. Also converts logical to integer. fetch_array <- function(x, y, z = NULL, zz = NULL, null_to_zero = FALSE, pad_length = NULL) { - val <- fetch(x = x, y = y, z = z, zz = zz, null_to_zero = null_to_zero, + val <- fetch(x = x, y = y, z = z, zz = zz, null_to_zero = null_to_zero, pad_length = pad_length, unlist = TRUE) - if (is.logical(val)) + if (is.logical(val)) val <- as.integer(val) as.array(val) } @@ -1005,7 +1006,7 @@ is.lmer <- function(x) { # @return A list of nsplits arrays or matrices array2list <- function(x, nsplits, bycol = TRUE) { len <- if (bycol) ncol(x) else nrow(x) - len_k <- len %/% nsplits + len_k <- len %/% nsplits if (!len == (len_k * nsplits)) stop("Dividing x by nsplits does not result in an integer.") lapply(1:nsplits, function(k) { @@ -1026,7 +1027,7 @@ sweep_multiply <- function(x, y, margin = 2L) { sweep(x, margin, y, `*`) } -# Convert a standardised quadrature node to an unstandardised value based on +# Convert a standardised quadrature node to an unstandardised value based on # the specified integral limits # # @param x An unstandardised quadrature node @@ -1047,7 +1048,7 @@ unstandardise_qpts <- function(x, a, b, na.ok = TRUE) { ((b - a) / 2) * x + ((b + a) / 2) } -# Convert a standardised quadrature weight to an unstandardised value based on +# Convert a standardised quadrature weight to an unstandardised value based on # the specified integral limits # # @param x An unstandardised quadrature weight @@ -1077,7 +1078,7 @@ validate_positive_scalar <- function(x, not_greater_than = NULL) { stop(nm, " cannot be NULL", call. = FALSE) if (!is.numeric(x)) stop(nm, " should be numeric", call. = FALSE) - if (any(x <= 0)) + if (any(x <= 0)) stop(nm, " should be postive", call. = FALSE) if (!is.null(not_greater_than)) { if (!is.numeric(not_greater_than) || (not_greater_than <= 0)) @@ -1087,12 +1088,12 @@ validate_positive_scalar <- function(x, not_greater_than = NULL) { } } -# Return a matrix or list with the median and prob% CrI bounds for +# Return a matrix or list with the median and prob% CrI bounds for # each column of a matrix or 2D array # # @param x A matrix or 2D array # @param prob Value between 0 and 1 indicating the desired width of the CrI -# @param return_matrix Logical, if TRUE then a matrix with three columns is +# @param return_matrix Logical, if TRUE then a matrix with three columns is # returned (med, lb, ub) else if FALSE a list with three elements is returned. median_and_bounds <- function(x, prob, na.rm = FALSE, return_matrix = FALSE) { if (!any(is.matrix(x), is.array(x))) @@ -1122,12 +1123,12 @@ get_m_stub <- function(m, stub = "Long") { # # @param object A stanmvreg object get_stub <- function(object) { - if (is.jm(object)) "Long" else if (is.mvmer(object)) "y" else NULL -} + if (is.jm(object)) "Long" else if (is.mvmer(object)) "y" else NULL +} -# Separates a names object into separate parts based on the longitudinal, +# Separates a names object into separate parts based on the longitudinal, # event, or association parameters. -# +# # @param x Character vector (often rownames(fit$stan_summary)) # @param M An integer specifying the number of longitudinal submodels. # @param stub The character string used at the start of the names of variables @@ -1137,24 +1138,24 @@ get_stub <- function(object) { # to parameters from the M longitudinal submodels, the event submodel # or association parameters. collect_nms <- function(x, M, stub = "Long", ...) { - ppd <- grep(paste0("^", stub, ".{1}\\|mean_PPD"), x, ...) + ppd <- grep(paste0("^", stub, ".{1}\\|mean_PPD"), x, ...) y <- lapply(1:M, function(m) grep(mod2rx(m, stub = stub), x, ...)) - y_extra <- lapply(1:M, function(m) + y_extra <- lapply(1:M, function(m) c(grep(paste0("^", stub, m, "\\|sigma"), x, ...), grep(paste0("^", stub, m, "\\|shape"), x, ...), grep(paste0("^", stub, m, "\\|lambda"), x, ...), - grep(paste0("^", stub, m, "\\|reciprocal_dispersion"), x, ...))) + grep(paste0("^", stub, m, "\\|reciprocal_dispersion"), x, ...))) y <- lapply(1:M, function(m) setdiff(y[[m]], c(y_extra[[m]], ppd[m]))) - e <- grep(mod2rx("^Event"), x, ...) - e_extra <- c(grep("^Event\\|weibull-shape|^Event\\|b-splines-coef|^Event\\|piecewise-coef", x, ...)) + e <- grep(mod2rx("^Event"), x, ...) + e_extra <- c(grep("^Event\\|weibull-shape|^Event\\|b-splines-coef|^Event\\|piecewise-coef", x, ...)) e <- setdiff(e, e_extra) a <- grep(mod2rx("^Assoc"), x, ...) b <- b_names(x, ...) y_b <- lapply(1:M, function(m) b_names_M(x, m, stub = stub, ...)) - alpha <- grep("^.{5}\\|\\(Intercept\\)", x, ...) + alpha <- grep("^.{5}\\|\\(Intercept\\)", x, ...) alpha <- c(alpha, grep(pattern=paste0("^", stub, ".{1}\\|\\(Intercept\\)"), x=x, ...)) - beta <- setdiff(c(unlist(y), e, a), alpha) - nlist(y, y_extra, y_b, e, e_extra, a, b, alpha, beta, ppd) + beta <- setdiff(c(unlist(y), e, a), alpha) + nlist(y, y_extra, y_b, e, e_extra, a, b, alpha, beta, ppd) } # Grep for "b" parameters (ranef), can optionally be specified @@ -1176,8 +1177,8 @@ b_names_M <- function(x, submodel = NULL, stub = "Long", ...) { # # @param x Character vector (often rownames(fit$stan_summary)) # @param submodel Character vector specifying which submodels -# to obtain the coef names for. Can be "Long", "Event", "Assoc", or -# an integer specifying a specific longitudinal submodel. Specifying +# to obtain the coef names for. Can be "Long", "Event", "Assoc", or +# an integer specifying a specific longitudinal submodel. Specifying # NULL selects all submodels. # @param ... Passed to grep beta_names <- function(x, submodel = NULL, ...) { @@ -1219,7 +1220,7 @@ mod2rx <- function(x, stub = "Long") { c("y[1-9]\\|") } else { paste0("^", stub, x, "\\|") - } + } } # Return the number of longitudinal submodels @@ -1239,20 +1240,20 @@ get_M <- function(object) { # list items related to the longitudinal/GLM submodels list_nms <- function(object, M = NULL, stub = "Long") { ok_type <- is.null(object) || is.list(object) || is.vector(object) - if (!ok_type) + if (!ok_type) stop("'object' argument should be a list or vector.") if (is.null(object)) return(object) - if (is.null(M)) + if (is.null(M)) M <- length(object) nms <- paste0(stub, 1:M) - if (length(object) > M) + if (length(object) > M) nms <- c(nms, "Event") names(object) <- nms object } -# Removes the submodel identifying text (e.g. "Long1|", "Event|", etc +# Removes the submodel identifying text (e.g. "Long1|", "Event|", etc # from variable names # # @param x Character vector (often rownames(fit$stan_summary)) from which @@ -1276,17 +1277,17 @@ strip_nms <- function(x, string) { # Check argument contains one of the allowed options check_submodelopt2 <- function(x) { if (!x %in% c("long", "event")) - stop("submodel option must be 'long' or 'event'") + stop("submodel option must be 'long' or 'event'") } check_submodelopt3 <- function(x) { if (!x %in% c("long", "event", "both")) - stop("submodel option must be 'long', 'event' or 'both'") + stop("submodel option must be 'long', 'event' or 'both'") } # Error message when the argument contains an object of the incorrect type STOP_arg <- function(arg_name, type) { - stop(paste0("'", arg_name, "' should be a ", paste0(type, collapse = " or "), - " object or a list of those objects."), call. = FALSE) + stop(paste0("'", arg_name, "' should be a ", paste0(type, collapse = " or "), + " object or a list of those objects."), call. = FALSE) } # Return error msg if both elements of the object are TRUE @@ -1318,7 +1319,7 @@ STOP_id_var_required <- function() { # @param what A character string naming the function not yet implemented STOP_if_stanmvreg <- function(what) { msg <- "not yet implemented for stanmvreg objects." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop2(msg) } @@ -1328,7 +1329,7 @@ STOP_if_stanmvreg <- function(what) { # @param what A character string naming the function not yet implemented STOP_if_stansurv <- function(what) { msg <- "not yet implemented for stansurv objects." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop2(msg) } @@ -1338,18 +1339,18 @@ STOP_if_stansurv <- function(what) { # @param what An optional message to prepend to the default message. STOP_stan_mvmer <- function(what) { msg <- "is not yet implemented for models fit using stan_mvmer." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop2(msg) } -# Consistent error message to use when something that is only available for +# Consistent error message to use when something that is only available for # models fit using stan_jm # # @param what An optional message to prepend to the default message. STOP_jm_only <- function(what) { msg <- "can only be used with stan_jm models." - if (!missing(what)) + if (!missing(what)) msg <- paste(what, msg) stop2(msg) } @@ -1443,7 +1444,7 @@ validate_newdatas <- function(object, newdataLong = NULL, newdataEvent = NULL, } else { # newdataLong only needs the covariates fmL <- formula(object, m = m)[c(1,3)] } - all(!is.na(get_all_vars(fmL, newdataLong[[m]]))) + all(!is.na(get_all_vars(fmL, newdataLong[[m]]))) }) if (!all(nacheck)) stop("'newdataLong' cannot contain NAs.", call. = FALSE) @@ -1467,15 +1468,15 @@ validate_newdatas <- function(object, newdataLong = NULL, newdataEvent = NULL, newdatas <- c(newdatas, list(Event = newdataEvent)) } if (length(newdatas)) { - idvar_check <- sapply(newdatas, function(x) id_var %in% colnames(x)) - if (!all(idvar_check)) + idvar_check <- sapply(newdatas, function(x) id_var %in% colnames(x)) + if (!all(idvar_check)) STOP_no_var(id_var) ids <- lapply(newdatas, function(x) unique(x[[id_var]])) sorted_ids <- lapply(ids, sort) - if (!length(unique(sorted_ids)) == 1L) + if (!length(unique(sorted_ids)) == 1L) stop("The same subject ids should appear in each new data frame.") - if (!length(unique(ids)) == 1L) - stop("The subject ids should be ordered the same in each new data frame.") + if (!length(unique(ids)) == 1L) + stop("The subject ids should be ordered the same in each new data frame.") return(newdatas) } else return(NULL) } @@ -1487,32 +1488,32 @@ validate_newdatas <- function(object, newdataLong = NULL, newdataEvent = NULL, # @param id_var Character string, the name of the ID variable # @return A data frame, or a list of data frames, depending on the input subset_ids <- function(data, ids, id_var) { - + if (is.null(data)) return(NULL) - + is_list <- is(data, "list") - if (!is_list) + if (!is_list) data <- list(data) # convert to list - + is_df <- sapply(data, inherits, "data.frame") - if (!all(is_df)) + if (!all(is_df)) stop("'data' should be a data frame, or list of data frames.") - + data <- lapply(data, function(x) { if (!id_var %in% colnames(x)) STOP_no_var(id_var) sel <- which(!ids %in% x[[id_var]]) - if (length(sel)) - stop("The following 'ids' do not appear in the data: ", + if (length(sel)) + stop("The following 'ids' do not appear in the data: ", paste(ids[[sel]], collapse = ", ")) x[x[[id_var]] %in% ids, , drop = FALSE] }) - + if (is_list) return(data) else return(data[[1]]) } # Return a data.table with a key set using the appropriate id/time/grp variables -# +# # @param data A data frame. # @param id_var The name of the ID variable. # @param grp_var The name of the variable identifying groups clustered within @@ -1525,7 +1526,7 @@ prepare_data_table <- function(data, id_var, time_var, grp_var = NULL) { stop("the 'data.table' package must be installed to use this function") if (!is.data.frame(data)) stop("'data' should be a data frame.") - + # check required vars are in the data if (!id_var %in% colnames(data)) STOP_no_var(id_var) @@ -1533,12 +1534,12 @@ prepare_data_table <- function(data, id_var, time_var, grp_var = NULL) { STOP_no_var(time_var) if (!is.null(grp_var) && (!grp_var %in% colnames(data))) STOP_no_var(grp_var) - + # define and set the key for the data.table - key_vars <- if (!is.null(grp_var)) + key_vars <- if (!is.null(grp_var)) c(id_var, grp_var, time_var) else c(id_var, time_var) dt <- data.table::data.table(data, key = key_vars) - + dt[[time_var]] <- as.numeric(dt[[time_var]]) # ensures no rounding on merge dt[[id_var]] <- factor(dt[[id_var]]) # ensures matching of ids if (!is.null(grp_var)) @@ -1554,12 +1555,12 @@ prepare_data_table <- function(data, id_var, time_var, grp_var = NULL) { # @param times A vector of times to (rolling) merge against. # @param grps An optional vector of groups clustered within patients to # merge against. Only relevant when there is clustering within patient ids. -# @return A data.table formed by a merge of ids, (grps), times, and the closest +# @return A data.table formed by a merge of ids, (grps), times, and the closest # preceding (in terms of times) rows in data. rolling_merge <- function(data, ids, times, grps = NULL) { if (!requireNamespace("data.table")) stop("the 'data.table' package must be installed to use this function") - + # check data.table is keyed key_length <- length(data.table::key(data)) val_length <- if (is.null(grps)) 2L else 3L @@ -1567,19 +1568,19 @@ rolling_merge <- function(data, ids, times, grps = NULL) { stop2("Bug found: data.table should have a key.") if (!key_length == val_length) stop2("Bug found: data.table key is not the same length as supplied keylist.") - + # ensure data types are same as returned by the prepare_data_table function ids <- factor(ids) # ensures matching of ids times <- as.numeric(times) # ensures no rounding on merge - + # carry out the rolling merge against the specified times if (is.null(grps)) { tmp <- data.table::data.table(ids, times) - val <- data[tmp, roll = TRUE, rollends = c(TRUE, TRUE)] + val <- data[tmp, roll = TRUE, rollends = c(TRUE, TRUE)] } else { grps <- factor(grps) tmp <- data.table::data.table(ids, grps, times) - val <- data[tmp, roll = TRUE, rollends = c(TRUE, TRUE)] + val <- data[tmp, roll = TRUE, rollends = c(TRUE, TRUE)] } val } @@ -1590,8 +1591,8 @@ rolling_merge <- function(data, ids, times, grps = NULL) { # which to predict the outcome for each individual # @param t0,t1 Numeric vectors giving the start and end times across which to # generate prediction times -# @param simplify Logical specifying whether to return each increment as a -# column of an array (TRUE) or as an element of a list (FALSE) +# @param simplify Logical specifying whether to return each increment as a +# column of an array (TRUE) or as an element of a list (FALSE) get_time_seq <- function(increments, t0, t1, simplify = TRUE) { val <- sapply(0:(increments - 1), function(x, t0, t1) { t0 + (t1 - t0) * (x / (increments - 1)) @@ -1599,35 +1600,36 @@ get_time_seq <- function(increments, t0, t1, simplify = TRUE) { if (simplify && is.vector(val)) { # need to transform if there is only one individual val <- t(val) - rownames(val) <- if (!is.null(names(t0))) names(t0) else + rownames(val) <- if (!is.null(names(t0))) names(t0) else if (!is.null(names(t1))) names(t1) else NULL } return(val) } #' Extract parameters from stanmat and return as a list -#' +#' #' @param object A stanmvreg or stansurv object -#' @param stanmat A matrix of posterior draws, may be provided if the desired -#' stanmat is only a subset of the draws from as.matrix(object$stanfit) +#' @param ... Additional arguments passed to the method #' @return A named list #' @export -extract_pars <- function(object, ...) { +extract_pars <- function(object, ...) { UseMethod("extract_pars") } #' Extract parameters from stanmat and return as a list -#' +#' #' @param object A stanmvreg or stansurv object -#' @param stanmat A matrix of posterior draws, may be provided if the desired +#' @param stanmat A matrix of posterior draws, may be provided if the desired #' stanmat is only a subset of the draws from as.matrix(object$stanfit) +#' @param means Logical, if TRUE then the posterior means are returned +#' @param ... Additional arguments passed to the method #' @return A named list #' @export -extract_pars.stansurv <- function(object, stanmat = NULL, means = FALSE) { +extract_pars.stansurv <- function(object, stanmat = NULL, means = FALSE, ...) { validate_stansurv_object(object) - if (is.null(stanmat)) - stanmat <- as.matrix(object$stanfit) - if (means) + if (is.null(stanmat)) + stanmat <- as.matrix(object$stanfit) + if (means) stanmat <- t(colMeans(stanmat)) # return posterior means nms_beta <- colnames(object$x) nms_tve <- get_smooth_name(object$s_cpts, type = "smooth_coefs") @@ -1646,18 +1648,20 @@ extract_pars.stansurv <- function(object, stanmat = NULL, means = FALSE) { #' Extract parameters from stanmat and return as a list -#' +#' #' @param object A stanmvreg or stansurv object -#' @param stanmat A matrix of posterior draws, may be provided if the desired +#' @param stanmat A matrix of posterior draws, may be provided if the desired #' stanmat is only a subset of the draws from as.matrix(object$stanfit) +#' @param means Logical, if TRUE then the posterior means are returned +#' @param ... Additional arguments passed to the method #' @return A named list #' @export -extract_pars.stanmvreg <- function(object, stanmat = NULL, means = FALSE) { +extract_pars.stanmvreg <- function(object, stanmat = NULL, means = FALSE, ...) { validate_stanmvreg_object(object) M <- get_M(object) - if (is.null(stanmat)) + if (is.null(stanmat)) stanmat <- as.matrix(object$stanfit) - if (means) + if (means) stanmat <- t(colMeans(stanmat)) # return posterior means nms <- collect_nms(colnames(stanmat), M, stub = get_stub(object)) beta <- lapply(1:M, function(m) stanmat[, nms$y[[m]], drop = FALSE]) @@ -1693,7 +1697,7 @@ rmt <- function(mu, Sigma, df) { dmt <- function(x, mu, Sigma, df) { x_mu <- x - mu p <- length(x) - lgamma(0.5 * (df + p)) - lgamma(0.5 * df) - + lgamma(0.5 * (df + p)) - lgamma(0.5 * df) - 0.5 * p * log(df) - 0.5 * p * log(pi) - 0.5 * c(determinant(Sigma, logarithm = TRUE)$modulus) - 0.5 * (df + p) * log1p((x_mu %*% chol2inv(chol(Sigma)) %*% x_mu)[1] / df) @@ -1722,7 +1726,7 @@ transpose <- function(x) { # @param x A vector of group/factor IDs groups <- function(x) { if (!is.null(x)) { - as.integer(as.factor(x)) + as.integer(as.factor(x)) } else { x } @@ -1747,10 +1751,10 @@ drop_attributes <- function(x, ...) { # @param x The first object to use in the comparison # @param ... Any additional objects to include in the comparison # @param error If TRUE then return an error if all objects aren't -# equal with regard to the 'is.null' test. +# equal with regard to the 'is.null' test. # @return If error = TRUE, then an error if all objects aren't -# equal with regard to the 'is.null' test. Otherwise, a logical -# specifying whether all objects were equal with regard to the +# equal with regard to the 'is.null' test. Otherwise, a logical +# specifying whether all objects were equal with regard to the # 'is.null' test. supplied_together <- function(x, ..., error = FALSE) { dots <- list(...) @@ -1792,7 +1796,7 @@ check_for_intercept <- function(x, logical = FALSE) { } # Drop intercept from a vector/matrix/array of named coefficients -drop_intercept <- function(x) { +drop_intercept <- function(x) { sel <- check_for_intercept(x) if (length(sel) && is.matrix(x)) { x[, -sel, drop = FALSE] @@ -1820,10 +1824,10 @@ array_else_double <- function(x) # Return a matrix of uniform random variables or an empty matrix matrix_of_uniforms <- function(nrow = 0, ncol = 0) { if (nrow == 0 || ncol == 0) { - matrix(0,0,0) + matrix(0,0,0) } else { matrix(runif(nrow * ncol), nrow, ncol) - } + } } # If x is NULL then return an empty object of the specified 'type' @@ -1856,7 +1860,7 @@ convert_null <- function(x, type = c("double", "integer", "matrix", # of columns/rows # @param value The value to use for the padded cells # @return A matrix -pad_matrix <- function(x, cols = NULL, rows = NULL, +pad_matrix <- function(x, cols = NULL, rows = NULL, value = 0L) { nc <- ncol(x) nr <- nrow(x) @@ -1867,7 +1871,7 @@ pad_matrix <- function(x, cols = NULL, rows = NULL, } if (!is.null(rows) && nr < rows) { pad_mat <- matrix(value, rows - nr, nc) - x <- rbind(x, pad_mat) + x <- rbind(x, pad_mat) } x } @@ -1876,7 +1880,7 @@ pad_matrix <- function(x, cols = NULL, rows = NULL, # # @param x A numeric vector. # @param nq Integer specifying the number of quantiles. -# @return A vector of percentiles corresponding to percentages 100*k/m for +# @return A vector of percentiles corresponding to percentages 100*k/m for # k=1,2,...,nq-1. qtile <- function(x, nq = 2) { if (nq > 1) { @@ -1890,8 +1894,8 @@ qtile <- function(x, nq = 2) { } # Return the desired spline basis for the given knot locations -get_basis <- function(x, iknots, bknots = range(x), - degree = 3, intercept = FALSE, +get_basis <- function(x, iknots, bknots = range(x), + degree = 3, intercept = FALSE, type = c("bs", "is", "ms")) { type <- match.arg(type) if (type == "bs") { @@ -2011,9 +2015,9 @@ ulist <- function(...) { unlist(list(...)) } # Return the names for the group specific coefficients # -# @param cnms A named list with the names of the parameters nested within each +# @param cnms A named list with the names of the parameters nested within each # grouping factor. -# @param flevels A named list with the (unique) factor levels nested within each +# @param flevels A named list with the (unique) factor levels nested within each # grouping factor. # @return A character vector. get_ranef_name <- function(cnms, flevels) { @@ -2115,14 +2119,14 @@ get_assoc_name <- function(a_mod, assoc, ...) { if (a[evev,][[m]]) nms <- c(nms, p(stub, ev, ":", indx(evev, m), "|", ev)) if (a[evmv,][[m]]) nms <- c(nms, p(stub, ev, ":", indx(evmv, m), "|", mv)) if (a[es, ][[m]]) nms <- c(nms, p(stub, es )) - if (a[esd, ][[m]]) nms <- c(nms, p(stub, es, ":", cnms(esd, m) )) + if (a[esd, ][[m]]) nms <- c(nms, p(stub, es, ":", cnms(esd, m) )) if (a[ea, ][[m]]) nms <- c(nms, p(stub, ea )) if (a[mv, ][[m]]) nms <- c(nms, p(stub, mv )) - if (a[mvd, ][[m]]) nms <- c(nms, p(stub, mv, ":", cnms(mvd, m) )) + if (a[mvd, ][[m]]) nms <- c(nms, p(stub, mv, ":", cnms(mvd, m) )) if (a[mvev,][[m]]) nms <- c(nms, p(stub, mv, ":", indx(mvev, m), "|", ev)) if (a[mvmv,][[m]]) nms <- c(nms, p(stub, mv, ":", indx(mvmv, m), "|", mv)) if (a[ms, ][[m]]) nms <- c(nms, p(stub, ms )) - if (a[msd, ][[m]]) nms <- c(nms, p(stub, ms, ":", cnms(msd, m) )) + if (a[msd, ][[m]]) nms <- c(nms, p(stub, ms, ":", cnms(msd, m) )) if (a[ma, ][[m]]) nms <- c(nms, p(stub, ma )) } nms @@ -2143,7 +2147,7 @@ get_basehaz <- function(x) { # # @return A character string. get_basehaz_name <- function(x) { - if (is.character(x)) + if (is.character(x)) return(x) if (is.stansurv(x)) return(x$basehaz$type_name) @@ -2171,7 +2175,7 @@ ad <- function(x, ...) as.double (x, ...) am <- function(x, ...) as.matrix (x, ...) aa <- function(x, ...) as.array (x, ...) -# Sample rows from a two-dimensional object +# Sample rows from a two-dimensional object # # @param x The two-dimensional object (e.g. matrix, data frame, array). # @param size Integer specifying the number of rows to sample. @@ -2208,10 +2212,11 @@ sample_stanmat <- function(object, draws = NULL, default_draws = NA) { #' @param con A numeric vector. #' @param lower Scalar, the lower limit for the returned vector. #' @param upper Scalar, the upper limit for the returned vector. +#' @param ... Additional arguments passed to the method. #' #' @return A numeric vector. #' @export -truncate.numeric <- function(con, lower = NULL, upper = NULL) { +truncate.numeric <- function(con, lower = NULL, upper = NULL, ...) { if (!is.null(lower)) con[con < lower] <- lower if (!is.null(upper)) con[con > upper] <- upper con @@ -2248,7 +2253,7 @@ row_means <- function(x, na.rm = FALSE) { if (is.matrix(x)) { return(matrix(mns, ncol = 1)) } else if (is.array(x)) { - return(array(mns, dim = c(nrow(x), 1))) + return(array(mns, dim = c(nrow(x), 1))) } else if (is.data.frame(x)) { return(data.frame(mns)) } else { @@ -2262,7 +2267,7 @@ col_means <- function(x, na.rm = FALSE) { if (is.matrix(x)) { return(matrix(mns, nrow = 1)) } else if (is.array(x)) { - return(array(mns, dim = c(1, ncol(x)))) + return(array(mns, dim = c(1, ncol(x)))) } else { stop2("Cannot handle objects of class: ", class(x)) } @@ -2375,8 +2380,8 @@ mutate_ <- function(x, ...) mutate(x, ..., names_eval = TRUE, n = 5) # # @param x A data frame. # @param ... Character strings; names of the columns of 'x' on which to sort. -# @param skip Logical, if TRUE then any strings in the ...'s that are not -# present as variables in the data frame are ignored, rather than throwing +# @param skip Logical, if TRUE then any strings in the ...'s that are not +# present as variables in the data frame are ignored, rather than throwing # an error - somewhat dangerous, but convenient. # @return A data frame. row_sort <- function(x, ...) { @@ -2390,8 +2395,8 @@ row_sort <- function(x, ...) { # # @param x A data frame. # @param ... Character strings; the desired order of the columns of 'x' by name. -# @param skip Logical, if TRUE then any strings in the ...'s that are not -# present as variables in the data frame are ignored, rather than throwing +# @param skip Logical, if TRUE then any strings in the ...'s that are not +# present as variables in the data frame are ignored, rather than throwing # an error - somewhat dangerous, but convenient. # @return A data frame. col_sort <- function(x, ...) { diff --git a/R/pp_data.R b/R/pp_data.R index 36400c31c..f5b458ea2 100644 --- a/R/pp_data.R +++ b/R/pp_data.R @@ -1,17 +1,17 @@ # Part of the rstanarm package for estimating model parameters # Copyright 2015 Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. @@ -38,7 +38,7 @@ pp_data <- .pp_data(object, newdata = newdata, offset = offset, ...) } - + #------------- for models without lme4 structure ------------------------- .pp_data <- function(object, newdata = NULL, offset = NULL, ...) { @@ -46,7 +46,7 @@ pp_data <- requireNamespace("mgcv", quietly = TRUE) if (is.null(newdata)) x <- predict(object$jam, type = "lpmatrix") else x <- predict(object$jam, newdata = newdata, type = "lpmatrix") - if (is.null(offset)) + if (is.null(offset)) offset <- object$offset %ORifNULL% rep(0, nrow(x)) return(nlist(x, offset)) } @@ -58,27 +58,27 @@ pp_data <- if (inherits(object, "betareg")) { return(nlist(x, offset, z_betareg = object$z)) } - + return(nlist(x, offset)) } offset <- .pp_data_offset(object, newdata, offset) Terms <- delete.response(terms(object)) m <- model.frame(Terms, newdata, xlev = object$xlevels) - if (!is.null(cl <- attr(Terms, "dataClasses"))) + if (!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m) x <- model.matrix(Terms, m, contrasts.arg = object$contrasts) - if (is(object, "polr") && !is_scobit(object)) + if (is(object, "polr") && !is_scobit(object)) x <- x[,colnames(x) != "(Intercept)", drop = FALSE] - + if (inherits(object, "betareg")) { - mf <- model.frame(delete.response(object$terms$precision), - data = newdata, na.action = object$na.action, + mf <- model.frame(delete.response(object$terms$precision), + data = newdata, na.action = object$na.action, xlev = object$levels$precision) z_betareg <- model.matrix(object$terms$precision, mf, contrasts = object$contrasts$precision) return(nlist(x, offset, z_betareg)) } - + return(nlist(x, offset)) } @@ -116,13 +116,13 @@ pp_data <- return(nlist(x, offset = offset, Zt = z$Zt, Z_names = z$Z_names)) } -# the functions below are heavily based on a combination of -# lme4:::predict.merMod and lme4:::mkNewReTrms, although they do also have +# the functions below are heavily based on a combination of +# lme4:::predict.merMod and lme4:::mkNewReTrms, although they do also have # substantial modifications .pp_data_mer_x <- function(object, newdata, m = NULL, ...) { x <- get_x(object, m = m) if (is.null(newdata)) return(x) - form <- if (is.null(m)) attr(object$glmod$fr, "formula") else + form <- if (is.null(m)) attr(object$glmod$fr, "formula") else formula(object, m = m) L <- length(form) form[[L]] <- lme4::nobars(form[[L]]) @@ -134,7 +134,7 @@ pp_data <- mf <- mf[vars] isFac <- vapply(mf, is.factor, FUN.VALUE = TRUE) isFac[attr(Terms, "response")] <- FALSE - orig_levs <- if (length(isFac) == 0) + orig_levs <- if (length(isFac) == 0) NULL else lapply(mf[isFac], levels) mfnew <- model.frame(delete.response(Terms), newdata, xlev = orig_levs) x <- model.matrix(RHS, data = mfnew, contrasts.arg = attr(x, "contrasts")) @@ -142,22 +142,22 @@ pp_data <- } .pp_data_mer_z <- function(object, newdata, re.form = NULL, - allow.new.levels = TRUE, na.action = na.pass, + allow.new.levels = TRUE, na.action = na.pass, m = NULL, ...) { NAcheck <- !is.null(re.form) && !is(re.form, "formula") && is.na(re.form) - fmla0check <- (is(re.form, "formula") && - length(re.form) == 2 && + fmla0check <- (is(re.form, "formula") && + length(re.form) == 2 && identical(re.form[[2]], 0)) if (NAcheck || fmla0check) return(list()) if (is.null(newdata) && is.null(re.form)) { - Z <- get_z(object, m = m) + Z <- get_z(object, m = m) if (!is.stanmvreg(object)) { # Z_names not needed for stanreg with no newdata return(list(Zt = t(Z))) } else { # must supply Z_names for stanmvreg since b pars # might be for multiple submodels and Zt will only - # be for one submodel, so their elements may not + # be for one submodel, so their elements may not # correspond exactly ReTrms <- object$glmod[[m]]$reTrms Z_names <- make_b_nms(ReTrms, m = m, stub = get_stub(object)) @@ -166,7 +166,7 @@ pp_data <- } else if (is.null(newdata)) { rfd <- mfnew <- model.frame(object, m = m) - } + } else if (inherits(object, "gamm4")) { requireNamespace("mgcv", quietly = TRUE) if (is.null(newdata)) x <- predict(object$jam, type = "lpmatrix") @@ -186,7 +186,7 @@ pp_data <- if (!is.null(fixed.na.action)) attr(rfd,"na.action") <- fixed.na.action } - if (is.null(re.form)) + if (is.null(re.form)) re.form <- justRE(formula(object, m = m)) if (!inherits(re.form, "formula")) stop("'re.form' must be NULL, NA, or a formula.") @@ -242,7 +242,7 @@ pp_data <- offset <- .pp_data_offset(object, newdata, offset) group <- with(nlf$reTrms, pad_reTrms(Ztlist, cnms, flist)) - if (!is.null(re.form) && !is(re.form, "formula") && is.na(re.form)) + if (!is.null(re.form) && !is(re.form, "formula") && is.na(re.form)) group$Z@x <- 0 return(nlist(x = nlf$X, offset = offset, Z = group$Z, Z_names = make_b_nms(group), arg1, arg2)) @@ -251,7 +251,7 @@ pp_data <- #------------------ for models fit using stan_surv ----------------------- -.pp_data_surv <- function(object, +.pp_data_surv <- function(object, newdata = NULL, times = NULL, at_quadpoints = FALSE, @@ -259,44 +259,44 @@ pp_data <- formula <- object$formula basehaz <- object$basehaz - + # data with row subsetting etc if (is.null(newdata)) newdata <- get_model_data(object) - + # flags has_tve <- object$has_tve has_quadrature <- object$has_quadrature has_bars <- object$has_bars - + #----- dimensions and times - + if (has_quadrature && at_quadpoints) { - + if (is.null(times)) stop("Bug found: 'times' must be specified.") - + # error check time variables if (!length(times) == nrow(newdata)) stop("Bug found: length of 'times' should equal number rows in the data.") # number of nodes qnodes <- object$qnodes - + # standardised weights and nodes for quadrature qq <- get_quadpoints(nodes = qnodes) qp <- qq$points qw <- qq$weights - + # quadrature points & weights, evaluated for each row of data pts <- uapply(qp, unstandardise_qpts, 0, times) wts <- uapply(qw, unstandardise_qwts, 0, times) - + # id vector for quadrature points ids <- rep(seq_along(times), times = qnodes) - + } else { # predictions don't require quadrature - + pts <- times wts <- rep(NA, length(times)) ids <- seq_along(times) @@ -304,38 +304,38 @@ pp_data <- } #----- time-fixed predictor matrix - + # check all vars are in newdata vars <- all.vars(delete.response(terms(object, fixed.only = FALSE))) miss <- which(!vars %in% colnames(newdata)) if (length(miss)) stop2("The following variables are missing from the data: ", comma(vars[miss])) - + # drop response from fixed effect formula tt <- delete.response(terms(object, fixed.only = TRUE)) - + # make model frame based on time-fixed part of model formula mf <- make_model_frame(tt, newdata, xlevs = object$xlevs)$mf - + # if using quadrature then expand rows of time-fixed predictor matrix if (has_quadrature && at_quadpoints) mf <- rep_rows(mf, times = qnodes) - + # check data classes in the model frame match those used in model fitting - if (!is.null(cl <- attr(tt, "dataClasses"))) + if (!is.null(cl <- attr(tt, "dataClasses"))) .checkMFClasses(cl, mf) # check model frame dimensions are correct (may be errors due to NAs?) if (!length(pts) == nrow(mf)) stop("Bug found: length of 'pts' should equal number rows in model frame.") - + # construct time-fixed predictor matrix x <- make_x(tt, mf, check_constant = FALSE)$x - + #----- time-varying predictor matrix - + if (has_tve) { - + if (all(is.na(pts))) { # temporary replacement to avoid error in creating spline basis pts_tmp <- rep(0, length(pts)) @@ -343,17 +343,17 @@ pp_data <- # else use prediction times or quadrature points pts_tmp <- pts } - + # generate a model frame with time transformations for tve effects mf_s <- make_model_frame(formula$tt_frame, data.frame(times__ = pts_tmp))$mf - + # check model frame dimensions are correct if (!length(pts) == nrow(mf_s)) stop("Bug found: length of 'pts' should equal number rows in model frame.") - + # NB next line avoids dropping terms attribute from 'mf' mf[, colnames(mf_s)] <- mf_s - + # construct time-varying predictor matrix s <- make_s(formula, mf) @@ -361,43 +361,43 @@ pp_data <- # if pts were all NA then replace the time-varying predictor # matrix with all NA, but retain appropriate dimensions s[] <- NaN - } - + } + } else { - + s <- matrix(0, nrow(mf), 0) - + } - + #----- random effects predictor matrix - + if (has_bars) { # drop response from random effects part of model formula tt_z <- delete.response(terms(object, random.only = TRUE)) - + # make model frame based on random effects part of model formula - mf_z <- make_model_frame(formula = tt_z, - data = newdata, - xlevs = object$xlevs, + mf_z <- make_model_frame(formula = tt_z, + data = newdata, + xlevs = object$xlevs, na.action = na.pass)$mf - + # if using quadrature then expand rows if (has_quadrature && at_quadpoints) mf_z <- rep_rows(mf_z, times = qnodes) - + # check model frame dimensions are correct if (!length(pts) == nrow(mf_z)) stop("Bug found: length of 'pts' should equal number rows in model frame.") - + # construct random effects predictor matrix ReTrms <- lme4::mkReTrms(formula$bars, mf_z) z <- nlist(Zt = ReTrms$Zt, Z_names = make_b_nms(ReTrms)) - + } else { - + z <- list() - + } # return object @@ -421,96 +421,96 @@ pp_data <- # log-likelihood in post-estimation functions for a \code{stan_jm} model # # @param object A stanmvreg object -# @param newdataLong A data frame or list of data frames with the new +# @param newdataLong A data frame or list of data frames with the new # covariate data for the longitudinal submodel # @param newdataEvent A data frame with the new covariate data for the # event submodel # @param ids An optional vector of subject IDs specifying which individuals # should be included in the returned design matrices. # @param etimes An optional vector of times at which the event submodel -# design matrices should be evaluated (also used to determine the +# design matrices should be evaluated (also used to determine the # quadrature times). If NULL then times are taken to be the eventimes in # the fitted object (if newdataEvent is NULL) or in newdataEvent. # @param long_parts,event_parts A logical specifying whether to return the # design matrices for the longitudinal and/or event submodels. # @param response Logical specifying whether the newdataLong requires the # response variable. -# @return A named list (with components M, Npat, ndL, ndE, yX, tZt, -# yZnames, eXq, assoc_parts) -.pp_data_jm <- function(object, - newdataLong = NULL, - newdataEvent = NULL, - ids = NULL, - etimes = NULL, - long_parts = TRUE, - event_parts = TRUE, +# @return A named list (with components M, Npat, ndL, ndE, yX, tZt, +# yZnames, eXq, assoc_parts) +.pp_data_jm <- function(object, + newdataLong = NULL, + newdataEvent = NULL, + ids = NULL, + etimes = NULL, + long_parts = TRUE, + event_parts = TRUE, response = TRUE, needs_time_var = TRUE) { - + M <- get_M(object) - + id_var <- object$id_var time_var <- object$time_var - + if (!is.null(newdataLong) || !is.null(newdataEvent)) - newdatas <- validate_newdatas(object, - newdataLong, - newdataEvent, + newdatas <- validate_newdatas(object, + newdataLong, + newdataEvent, response = response, needs_time_var = needs_time_var) - + # prediction data for longitudinal submodels - ndL <- if (is.null(newdataLong)) + ndL <- if (is.null(newdataLong)) get_model_data(object)[1:M] else newdatas[1:M] - + # prediction data for event submodel - ndE <- if (is.null(newdataEvent)) - get_model_data(object)[["Event"]] else newdatas[["Event"]] - + ndE <- if (is.null(newdataEvent)) + get_model_data(object)[["Event"]] else newdatas[["Event"]] + # possibly subset if (!is.null(ids)) { ndL <- subset_ids(ndL, ids, id_var) ndE <- subset_ids(ndE, ids, id_var) } id_list <- unique(ndE[[id_var]]) # unique subject id list - + # evaluate the last known survival time and status if (!is.null(newdataEvent) && is.null(etimes)) { - # prediction data for the event submodel was provided but + # prediction data for the event submodel was provided but # no event times were explicitly specified by the user, so # they must be evaluated using the data frame surv <- eval(formula(object, m = "Event")[[2L]], ndE) etimes <- unclass(surv)[,"time"] - estatus <- unclass(surv)[,"status"] + estatus <- unclass(surv)[,"status"] } else if (is.null(etimes)) { - # if no prediction data was provided then event times are + # if no prediction data was provided then event times are # taken from the fitted model etimes <- object$eventtime[as.character(id_list)] estatus <- object$status[as.character(id_list)] - } else { - # otherwise, event times ('etimes') are only directly specified for dynamic - # predictions via posterior_survfit in which case the 'etimes' correspond + } else { + # otherwise, event times ('etimes') are only directly specified for dynamic + # predictions via posterior_survfit in which case the 'etimes' correspond # to the last known survival time and therefore we assume everyone has survived - # up to that point (ie, set estatus = 0 for all individuals), this is true + # up to that point (ie, set estatus = 0 for all individuals), this is true # even if there is an event indicated in the data supplied by the user. estatus <- rep(0, length(etimes)) } res <- nlist(M, Npat = length(id_list), ndL, ndE) - + if (long_parts && event_parts) lapply(ndL, function(x) { if (!time_var %in% colnames(x)) STOP_no_var(time_var) - if (!id_var %in% colnames(x)) + if (!id_var %in% colnames(x)) STOP_no_var(id_var) if (any(x[[time_var]] < 0)) stop2("Values for the time variable (", time_var, ") should not be negative.") mt <- tapply(x[[time_var]], factor(x[[id_var]]), max) if (any(mt > etimes)) stop2("There appears to be observation times in the longitudinal data that ", - "are later than the event time specified in the 'etimes' argument.") - }) - + "are later than the event time specified in the 'etimes' argument.") + }) + # response and design matrices for longitudinal submodels if (long_parts) { y <- lapply(1:M, function(m) eval(formula(object, m = m)[[2L]], ndL[[m]])) @@ -522,7 +522,7 @@ pp_data <- flist <- lapply(ndL, function(x) factor(x[[id_var]])) res <- c(res, nlist(y, yX, yZt, yZ_names, yOffset, flist)) } - + # design matrices for event submodel and association structure if (event_parts) { qnodes <- object$qnodes @@ -542,13 +542,13 @@ pp_data <- grp_stuff <- object$grp_stuff[[m]] if (grp_stuff$has_grp) { grp_stuff <- get_extra_grp_info( # update grp_info with new data - grp_stuff, flist = ymf, id_var = id_var, + grp_stuff, flist = ymf, id_var = id_var, grp_assoc = grp_stuff$grp_assoc) } ymf <- prepare_data_table(ymf, id_var = id_var, time_var = time_var, grp_var = grp_stuff$grp_var) make_assoc_parts( - ymf, assoc = object$assoc[,m], id_var = id_var, time_var = time_var, + ymf, assoc = object$assoc[,m], id_var = id_var, time_var = time_var, ids = id_rep, times = times, grp_stuff = grp_stuff, use_function = pp_data, object = object, m = m) }) @@ -556,7 +556,7 @@ pp_data <- assoc_parts <- do.call("structure", assoc_attr) res <- c(res, nlist(eXq, assoc_parts)) } - + return(res) } @@ -564,20 +564,19 @@ pp_data <- #' (1) only includes variables used in the model formula #' (2) only includes rows contained in the glmod/coxmod model frames #' (3) ensures that additional variables that are required -#' such as the ID variable or variables used in the +#' such as the ID variable or variables used in the #' interaction-type association structures, are included. #' -#' It is necessary to drop unneeded variables though so that -#' errors are not encountered if the original data contained +#' It is necessary to drop unneeded variables though so that +#' errors are not encountered if the original data contained #' NA values for variables unrelated to the model formula. -#' We generate a data frame here for in-sample predictions +#' We generate a data frame here for in-sample predictions #' rather than using a model frame, since some quantities will #' need to be recalculated at quadrature points etc, for example #' in posterior_survfit. #' #' @param object A stansurv, stanmvreg or stanjm object. -#' @param m Integer specifying which submodel to get the -#' prediction data frame for (for stanmvreg or stanjm objects). +#' @param ... Additional arguments passed to the method #' @return A data frame or list of data frames with all the #' (unevaluated) variables required for predictions. #' @export @@ -587,18 +586,19 @@ get_model_data <- function(object, ...) UseMethod("get_model_data") #' (1) only includes variables used in the model formula #' (2) only includes rows contained in the glmod/coxmod model frames #' (3) ensures that additional variables that are required -#' such as the ID variable or variables used in the +#' such as the ID variable or variables used in the #' interaction-type association structures, are included. #' -#' It is necessary to drop unneeded variables though so that -#' errors are not encountered if the original data contained +#' It is necessary to drop unneeded variables though so that +#' errors are not encountered if the original data contained #' NA values for variables unrelated to the model formula. -#' We generate a data frame here for in-sample predictions +#' We generate a data frame here for in-sample predictions #' rather than using a model frame, since some quantities will #' need to be recalculated at quadrature points etc, for example #' in posterior_survfit. #' #' @param object A stansurv object. +#' @param ... Additional arguments passed to the method #' @return A data frame or list of data frames with all the #' (unevaluated) variables required for predictions. #' @export @@ -613,47 +613,48 @@ get_model_data.stansurv <- function(object, ...) { #' (1) only includes variables used in the model formula #' (2) only includes rows contained in the glmod/coxmod model frames #' (3) ensures that additional variables that are required -#' such as the ID variable or variables used in the +#' such as the ID variable or variables used in the #' interaction-type association structures, are included. #' -#' It is necessary to drop unneeded variables though so that -#' errors are not encountered if the original data contained +#' It is necessary to drop unneeded variables though so that +#' errors are not encountered if the original data contained #' NA values for variables unrelated to the model formula. -#' We generate a data frame here for in-sample predictions +#' We generate a data frame here for in-sample predictions #' rather than using a model frame, since some quantities will #' need to be recalculated at quadrature points etc, for example #' in posterior_survfit. #' #' @param object A stanmvreg. +#' @param ... Additional arguments passed to the method #' @param m Integer specifying which submodel to get the #' prediction data frame for (for stanmvreg or stanjm objects). #' @return A data frame or list of data frames with all the #' (unevaluated) variables required for predictions. #' @export get_model_data.stanmvreg <- function(object, m = NULL, ...) { - + validate_stanmvreg_object(object) M <- get_M(object) terms <- terms(object, fixed.only = FALSE) - + # identify variables to add to the terms objects if (is.jm(object)) { extra_vars <- lapply(1:M, function(m) { - # for each submodel loop over the four possible assoc + # for each submodel loop over the four possible assoc # interaction formulas and collect any variables used forms_m <- object$assoc["which_formulas",][[m]] uapply(forms_m, function(x) { if (length(x)) { - rownames(attr(terms.formula(x), "factors")) + rownames(attr(terms.formula(x), "factors")) } else NULL }) }) # also ensure that id_var is in the event data extra_vars$Event <- object$id_var - + if (!identical(length(terms), length(extra_vars))) stop2("Bug found: terms and extra_vars should be same length.") - + # add the extra variables to the terms formula for each submodel terms <- xapply(terms, extra_vars, FUN = function(x, y) { lhs <- x[[2L]] @@ -662,20 +663,20 @@ get_model_data.stanmvreg <- function(object, m = NULL, ...) { rhs <- c(rhs, y) reformulate(rhs, response = lhs) }) - + datas <- c(object$dataLong, list(object$dataEvent)) } else { datas <- object$data } - + # identify rows that were in the model frame row_nms <- lapply(model.frame(object), rownames) - + # drop rows and variables not required for predictions mfs <- xapply(w = terms, x = datas, y = row_nms, - FUN = function(w, x, y) + FUN = function(w, x, y) get_all_vars(w, x)[y, , drop = FALSE]) - + mfs <- list_nms(mfs, M, stub = get_stub(object)) if (is.null(m)) mfs else mfs[[m]] } @@ -690,7 +691,7 @@ null_or_zero <- function(x) { .pp_data_offset <- function(object, newdata = NULL, offset = NULL) { if (is.null(newdata)) { # get offset from model object (should be null if no offset) - if (is.null(offset)) + if (is.null(offset)) offset <- object$offset %ORifNULL% model.offset(model.frame(object)) } else { if (!is.null(offset)) @@ -698,12 +699,12 @@ null_or_zero <- function(x) { else { # if newdata specified but not offset then confirm that model wasn't fit # with an offset (warning, not error) - if (!is.null(object$call$offset) || - !null_or_zero(object$offset) || + if (!is.null(object$call$offset) || + !null_or_zero(object$offset) || !null_or_zero(model.offset(model.frame(object)))) { warning( - "'offset' argument is NULL but it looks like you estimated ", - "the model using an offset term.", + "'offset' argument is NULL but it looks like you estimated ", + "the model using an offset term.", call. = FALSE ) } diff --git a/R/predictive_error.R b/R/predictive_error.R index 6cada3ce9..b9e066b04 100644 --- a/R/predictive_error.R +++ b/R/predictive_error.R @@ -1,76 +1,76 @@ # Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' In-sample or out-of-sample predictive errors -#' -#' This is a convenience function for computing \eqn{y - y^{rep}}{y - yrep} -#' (in-sample, for observed \eqn{y}) or \eqn{y - \tilde{y}}{y - ytilde} -#' (out-of-sample, for new or held-out \eqn{y}). The method for stanreg objects +#' +#' This is a convenience function for computing \eqn{y - y^{rep}}{y - yrep} +#' (in-sample, for observed \eqn{y}) or \eqn{y - \tilde{y}}{y - ytilde} +#' (out-of-sample, for new or held-out \eqn{y}). The method for stanreg objects #' calls \code{\link{posterior_predict}} internally, whereas the method for #' matrices accepts the matrix returned by \code{posterior_predict} as input and #' can be used to avoid multiple calls to \code{posterior_predict}. -#' +#' #' @aliases predictive_error #' @export -#' -#' @param object Either a fitted model object returned by one of the -#' \pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg -#' object}) or, for the matrix method, a matrix of draws from the -#' posterior predictive distribution returned by +#' +#' @param object Either a fitted model object returned by one of the +#' \pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg +#' object}) or, for the matrix method, a matrix of draws from the +#' posterior predictive distribution returned by #' \code{\link{posterior_predict}}. -#' @param newdata,draws,seed,offset,re.form Optional arguments passed to +#' @param newdata,draws,seed,offset,re.form Optional arguments passed to #' \code{\link{posterior_predict}}. For binomial models, please see the #' \strong{Note} section below if \code{newdata} will be specified. #' @template args-dots-ignored -#' -#' @return A \code{draws} by \code{nrow(newdata)} matrix. If \code{newdata} is +#' +#' @return A \code{draws} by \code{nrow(newdata)} matrix. If \code{newdata} is #' not specified then it will be \code{draws} by \code{nobs(object)}. -#' -#' @note The \strong{Note} section in \code{\link{posterior_predict}} about +#' +#' @note The \strong{Note} section in \code{\link{posterior_predict}} about #' \code{newdata} for binomial models also applies for #' \code{predictive_error}, with one important difference. For -#' \code{posterior_predict} if the left-hand side of the model formula is -#' \code{cbind(successes, failures)} then the particular values of -#' \code{successes} and \code{failures} in \code{newdata} don't matter, only +#' \code{posterior_predict} if the left-hand side of the model formula is +#' \code{cbind(successes, failures)} then the particular values of +#' \code{successes} and \code{failures} in \code{newdata} don't matter, only #' that they add to the desired number of trials. \strong{This is not the case #' for} \code{predictive_error}. For \code{predictive_error} the particular #' value of \code{successes} matters because it is used as \eqn{y} when #' computing the error. -#' +#' #' @seealso \code{\link[=posterior_predict.stanreg]{posterior_predict}} to draw #' from the posterior predictive distribution without computing predictive #' errors. -#' +#' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' if (!exists("example_model")) example(example_model) #' err1 <- predictive_error(example_model, draws = 50) #' hist(err1) -#' +#' #' # Using newdata with a binomial model #' formula(example_model) #' nd <- data.frame( -#' size = c(10, 20), -#' incidence = c(5, 10), -#' period = factor(c(1,2)), +#' size = c(10, 20), +#' incidence = c(5, 10), +#' period = factor(c(1,2)), #' herd = c(1, 15) #' ) #' err2 <- predictive_error(example_model, newdata = nd, draws = 10, seed = 1234) -#' +#' #' # stanreg vs matrix methods #' fit <- stan_glm(mpg ~ wt, data = mtcars, iter = 300) #' preds <- posterior_predict(fit, seed = 123) @@ -93,14 +93,14 @@ predictive_error.stanreg <- stop("'predictive_error' is not currently available for stan_polr.") if ("y" %in% names(list(...))) stop("Argument 'y' should not be specified if 'object' is a stanreg object.") - + y <- if (is.null(newdata)) get_y(object) else eval(formula(object)[[2L]], newdata) - + fam <- family(object)$family if (is.binomial(fam) && NCOL(y) == 2) y <- y[, 1] - + ytilde <- posterior_predict( object, newdata = newdata, @@ -114,11 +114,11 @@ predictive_error.stanreg <- #' @rdname predictive_error.stanreg #' @export -#' @param y For the matrix method only, a vector of \eqn{y} values the -#' same length as the number of columns in the matrix used as \code{object}. -#' The method for stanreg objects takes \code{y} directly from the fitted +#' @param y For the matrix method only, a vector of \eqn{y} values the +#' same length as the number of columns in the matrix used as \code{object}. +#' The method for stanreg objects takes \code{y} directly from the fitted #' model object. -#' +#' predictive_error.matrix <- function(object, y, ...) { NextMethod("predictive_error") } @@ -135,12 +135,15 @@ predictive_error.ppd <- function(object, y, ...) { #' the prediction error. Can be an integer, or for \code{\link{stan_mvmer}} #' models it can be \code{"y1"}, \code{"y2"}, etc, or for \code{\link{stan_jm}} #' models it can be \code{"Event"}, \code{"Long1"}, \code{"Long2"}, etc. -#' @param t,u Only relevant for \code{\link{stan_jm}} models and when \code{m = "Event"}. +#' @param t,u Only relevant for \code{\link{stan_jm}} models and when \code{m = "Event"}. #' The argument \code{t} specifies the time up to which individuals must have survived #' as well as being the time up to which the longitudinal data in \code{newdata} -#' is available. The argument \code{u} specifies the time at which the +#' is available. The argument \code{u} specifies the time at which the #' prediction error should be calculated (i.e. the time horizon). -#' +#' @param newdataLong Data frame containing the longitudinal data +#' @param newdataEvent Data frame containing the event data +#' @param lossfn A character string specifying the loss function to use. +#' predictive_error.stanmvreg <- function(object, newdataLong = NULL, @@ -158,14 +161,14 @@ predictive_error.stanmvreg <- stop("Argument 'y' should not be specified if 'object' is a stanmvreg object.") if (!is.jm(object)) stop("This function is currently only implemented for stan_jm models.") - if (missing(t)) + if (missing(t)) t <- NULL if (missing(u)) u <- NULL M <- get_M(object) - + if (m == "Event") { # prediction error for event submodel - + if (!is.surv(object)) stop("No event submodel was found in the fitted object.") if (is.null(t) || is.null(u)) @@ -173,7 +176,7 @@ predictive_error.stanmvreg <- "prediction error for the event submodel.") if (u <= t) stop("'u' must be greater than 't'.") - + # Construct prediction data # ndL: dataLong to be used in predictions # ndE: dataEvent to be used in predictions @@ -186,9 +189,9 @@ predictive_error.stanmvreg <- } else { # user specified newdata newdatas <- validate_newdatas(object, newdataLong, newdataEvent) ndL <- newdatas[1:M] - ndE <- newdatas[["Event"]] + ndE <- newdatas[["Event"]] } - + # Subset prediction data to only include # observations prior to time t fm_LHS <- formula(object, m = "Event")[[2L]] @@ -198,7 +201,7 @@ predictive_error.stanmvreg <- ndL <- lapply(ndL, function(x) { sel <- which(x[[object$time_var]] > t) x[sel, , drop = FALSE] - }) + }) id_var <- object$id_var ids <- ndE[[id_var]] for (i in 1:length(ndL)) @@ -210,15 +213,15 @@ predictive_error.stanmvreg <- ndL <- lapply(ndL, function(x) { x[x[[id_var]] %in% ids, , drop = FALSE] }) - + # Observed y: event status at time u event_dvar <- as.character(fm_LHS[[length(fm_LHS)]]) y <- ndE[, c(id_var, event_tvar, event_dvar), drop = FALSE] # Predicted y: conditional survival probability at time u ytilde <- posterior_survfit( - object, - newdataLong = ndL, + object, + newdataLong = ndL, newdataEvent = ndE, times = u, last_time = t, @@ -234,28 +237,28 @@ predictive_error.stanmvreg <- loss <- switch(lossfn, square = function(x) {x*x}, absolute = function(x) {abs(x)}) - + y$dummy <- as.integer(y[[event_tvar]] > u) y$status <- as.integer(y[[event_dvar]]) - y$res <- + y$res <- y$dummy * loss(1 - y$survpred) + y$status * (1 - y$dummy) * loss(0 - y$survpred) + (1 - y$status) * (1 - y$dummy) * ( - y$survpred_eventtime * loss(1- y$survpred) + + y$survpred_eventtime * loss(1- y$survpred) + (1 - y$survpred_eventtime) + loss(0- y$survpred) ) return(list(PE = mean(y$res), N = nrow(y))) - + } else { # prediction error for longitudinal submodel - + y <- if (is.null(newdataLong)) - get_y(object, m = m) else + get_y(object, m = m) else eval(formula(object, m = m)[[2L]], newdataLong) - + fam <- family(object, m = m)$family if (is.binomial(fam) && NCOL(y) == 2) - y <- y[, 1] - + y <- y[, 1] + ytilde <- posterior_predict( object, m = m, @@ -265,7 +268,7 @@ predictive_error.stanmvreg <- seed = seed, re.form = re.form ) - + return(predictive_error(ytilde, y = y)) } } diff --git a/R/stan_gamm4.R b/R/stan_gamm4.R index 0583cb823..ba8c52ac1 100644 --- a/R/stan_gamm4.R +++ b/R/stan_gamm4.R @@ -1,27 +1,27 @@ # Part of the rstanarm package for estimating model parameters # Copyright (C) 2016 Simon N. Wood # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Bayesian generalized linear additive models with optional group-specific #' terms via Stan -#' +#' #' \if{html}{\figure{stanlogo.png}{options: width="25" alt="https://mc-stan.org/about/logo/"}} #' Bayesian inference for GAMMs with flexible priors. -#' +#' #' @export #' @templateVar fun stan_gamm4 #' @templateVar pkg gamm4 @@ -37,38 +37,38 @@ #' @template args-adapt_delta #' @template args-QR #' @template args-sparse -#' -#' @param formula,random,family,data,knots,drop.unused.levels Same as for +#' +#' @param formula,random,family,data,knots,drop.unused.levels Same as for #' \code{\link[gamm4]{gamm4}}. \emph{We strongly advise against #' omitting the \code{data} argument}. Unless \code{data} is specified (and is #' a data frame) many post-estimation functions (including \code{update}, #' \code{loo}, \code{kfold}) are not guaranteed to work properly. -#' @param subset,weights,na.action Same as \code{\link[stats]{glm}}, +#' @param subset,weights,na.action Same as \code{\link[stats]{glm}}, #' but rarely specified. -#' @param ... Further arguments passed to \code{\link[rstan:stanmodel-method-sampling]{sampling}} (e.g. +#' @param ... Further arguments passed to \code{\link[rstan:stanmodel-method-sampling]{sampling}} (e.g. #' \code{iter}, \code{chains}, \code{cores}, etc.) or to #' \code{\link[rstan:stanmodel-method-vb]{vb}} (if \code{algorithm} is \code{"meanfield"} or #' \code{"fullrank"}). #' @param prior_covariance Cannot be \code{NULL}; see \code{\link{decov}} for #' more information about the default arguments. #' -#' @details The \code{stan_gamm4} function is similar in syntax to -#' \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package. But rather than performing +#' @details The \code{stan_gamm4} function is similar in syntax to +#' \code{\link[gamm4]{gamm4}} in the \pkg{gamm4} package. But rather than performing #' (restricted) maximum likelihood estimation with the \pkg{lme4} package, -#' the \code{stan_gamm4} function utilizes MCMC to perform Bayesian -#' estimation. The Bayesian model adds priors on the common regression -#' coefficients (in the same way as \code{\link{stan_glm}}), priors on the +#' the \code{stan_gamm4} function utilizes MCMC to perform Bayesian +#' estimation. The Bayesian model adds priors on the common regression +#' coefficients (in the same way as \code{\link{stan_glm}}), priors on the #' standard deviations of the smooth terms, and a prior on the decomposition -#' of the covariance matrices of any group-specific parameters (as in +#' of the covariance matrices of any group-specific parameters (as in #' \code{\link{stan_glmer}}). Estimating these models via MCMC avoids #' the optimization issues that often crop up with GAMMs and provides better -#' estimates for the uncertainty in the parameter estimates. -#' +#' estimates for the uncertainty in the parameter estimates. +#' #' See \code{\link[gamm4]{gamm4}} for more information about the model #' specicification and \code{\link{priors}} for more information about the #' priors on the main coefficients. The \code{formula} should include at least #' one smooth term, which can be specified in any way that is supported by the -#' \code{\link[mgcv]{jagam}} function in the \pkg{mgcv} package. The +#' \code{\link[mgcv]{jagam}} function in the \pkg{mgcv} package. The #' \code{prior_smooth} argument should be used to specify a prior on the unknown #' standard deviations that govern how smooth the smooth function is. The #' \code{prior_covariance} argument can be used to specify the prior on the @@ -77,30 +77,30 @@ #' group-specific terms to implement the departure from linearity in the smooth #' terms, but that is not the case for \code{stan_gamm4} where the group-specific #' terms are exactly the same as in \code{\link{stan_glmer}}. -#' +#' #' The \code{plot_nonlinear} function creates a ggplot object with one facet for #' each smooth function specified in the call to \code{stan_gamm4} in the case -#' where all smooths are univariate. A subset of the smooth functions can be +#' where all smooths are univariate. A subset of the smooth functions can be #' specified using the \code{smooths} argument, which is necessary to plot a #' bivariate smooth or to exclude the bivariate smooth and plot the univariate -#' ones. In the bivariate case, a plot is produced using +#' ones. In the bivariate case, a plot is produced using #' \code{\link[ggplot2]{geom_contour}}. In the univariate case, the resulting -#' plot is conceptually similar to \code{\link[mgcv]{plot.gam}} except the -#' outer lines here demark the edges of posterior uncertainty intervals +#' plot is conceptually similar to \code{\link[mgcv]{plot.gam}} except the +#' outer lines here demark the edges of posterior uncertainty intervals #' (credible intervals) rather than confidence intervals and the inner line #' is the posterior median of the function rather than the function implied -#' by a point estimate. To change the colors used in the plot see +#' by a point estimate. To change the colors used in the plot see #' \code{\link[bayesplot:bayesplot-colors]{color_scheme_set}}. -#' -#' @references -#' Crainiceanu, C., Ruppert D., and Wand, M. (2005). Bayesian analysis for +#' +#' @references +#' Crainiceanu, C., Ruppert D., and Wand, M. (2005). Bayesian analysis for #' penalized spline regression using WinBUGS. \emph{Journal of Statistical -#' Software}. \strong{14}(14), 1--22. -#' \url{https://www.jstatsoft.org/article/view/v014i14} -#' +#' Software}. \strong{14}(14), 1--22. +#' \doi{10.18637/jss.v014.i14} +#' #' @seealso The vignette for \code{stan_glmer}, which also discusses #' \code{stan_gamm4}. \url{https://mc-stan.org/rstanarm/articles/} -#' +#' #' @examples #' if (.Platform$OS.type != "windows" || .Platform$r_arch != "i386") { #' # from example(gamm4, package = "gamm4"), prefixing gamm4() call with stan_ @@ -110,7 +110,7 @@ #' dat$fac <- fac <- as.factor(sample(1:20, 400, replace = TRUE)) #' dat$y <- dat$y + model.matrix(~ fac - 1) %*% rnorm(20) * .5 #' -#' br <- stan_gamm4(y ~ s(x0) + x1 + s(x2), data = dat, random = ~ (1 | fac), +#' br <- stan_gamm4(y ~ s(x0) + x1 + s(x2), data = dat, random = ~ (1 | fac), #' chains = 1, iter = 500) # for example speed #' print(br) #' plot_nonlinear(br) @@ -138,14 +138,14 @@ stan_gamm4 <- adapt_delta = NULL, QR = FALSE, sparse = FALSE) { - + data <- validate_data(data, if_missing = list()) family <- validate_family(family) - + if (length(mgcv::interpret.gam(formula)$smooth.spec) == 0) { stop("Formula must have at least one smooth term to use stan_gamm4.", call. = FALSE) } - + if (!is.null(random)) { fake.formula <- as.character(mgcv::interpret.gam(formula)$fake.formula) form <- paste(fake.formula[2], fake.formula[1], fake.formula[3], @@ -163,7 +163,7 @@ stan_gamm4 <- weights <- validate_weights(weights) glmod <- NULL } - + if (family$family == "binomial") { data$temp_y <- rep(1, NROW(data)) # work around jagam bug temp_formula <- update(formula, temp_y ~ .) @@ -171,13 +171,13 @@ stan_gamm4 <- file = tempfile(fileext = ".jags"), weights = NULL, na.action = na.action, offset = NULL, knots = knots, drop.unused.levels = drop.unused.levels, diagonalize = TRUE) - + if (!is.null(random)) { y <- data[, as.character(formula[2L])] } else { y <- eval(formula[[2L]], data) } - + if (binom_y_prop(y, family, weights)) { y1 <- as.integer(as.vector(y) * weights) y <- cbind(y1, y0 = weights - y1) @@ -209,20 +209,20 @@ stan_gamm4 <- if (any(sapply(S, length) > 1)) S <- unlist(S, recursive = FALSE) names(S) <- names(jd$pregam$sp) X <- X[,mark, drop = FALSE] - + for (s in seq_along(S)) { - # sometimes elements of S are lists themselves that need to be unpacked + # sometimes elements of S are lists themselves that need to be unpacked # before passing to stan_glm.fit (https://github.com/stan-dev/rstanarm/issues/362) if (is.list(S[[s]])) S[[s]] <- do.call(cbind, S[[s]]) } X <- c(list(X), S) - + if (is.null(prior)) prior <- list() if (is.null(prior_intercept)) prior_intercept <- list() if (is.null(prior_aux)) prior_aux <- list() if (is.null(prior_smooth)) prior_smooth <- list() - + if (is.null(random)) { group <- list() prior_covariance <- list() @@ -232,23 +232,23 @@ stan_gamm4 <- group$decov <- prior_covariance } algorithm <- match.arg(algorithm) - + stanfit <- stan_glm.fit(x = X, y = y, weights = weights, offset = offset, family = family, prior = prior, prior_intercept = prior_intercept, prior_aux = prior_aux, prior_smooth = prior_smooth, - prior_PD = prior_PD, algorithm = algorithm, + prior_PD = prior_PD, algorithm = algorithm, adapt_delta = adapt_delta, group = group, QR = QR, ...) if (algorithm != "optimizing" && !is(stanfit, "stanfit")) return(stanfit) if (family$family == "Beta regression") family$family <- "beta" X <- do.call(cbind, args = X) if (is.null(random)) Z <- Matrix::Matrix(nrow = NROW(y), ncol = 0, sparse = TRUE) else { - Z <- pad_reTrms(Ztlist = group$Ztlist, cnms = group$cnms, + Z <- pad_reTrms(Ztlist = group$Ztlist, cnms = group$cnms, flist = group$flist)$Z colnames(Z) <- b_names(names(stanfit), value = TRUE) } - XZ <- cbind(X, Z) + XZ <- cbind(X, Z) # make jam object with point estimates, see ?mgcv::sim2jam mat <- as.matrix(stanfit) @@ -263,8 +263,8 @@ stan_gamm4 <- XWX <- t(X) %*% (w * X) jd$pregam$edf <- rowSums(jd$pregam$Vp * t(XWX)) / jd$pregam$sig2 class(jd$pregam) <- c("jam", "gam") - fit <- nlist(stanfit, family, formula, offset, weights, - x = XZ, y = y, data, terms = jd$pregam$terms, + fit <- nlist(stanfit, family, formula, offset, weights, + x = XZ, y = y, data, terms = jd$pregam$terms, model = if (is.null(random)) jd$pregam$model else glmod$fr, call = match.call(expand.dots = TRUE), algorithm, glmod = glmod, @@ -283,25 +283,25 @@ stan_gamm4 <- #' include all smooth terms. #' @param prob For univarite smooths, a scalar between 0 and 1 governing the #' width of the uncertainty interval. -#' @param facet_args An optional named list of arguments passed to +#' @param facet_args An optional named list of arguments passed to #' \code{\link[ggplot2]{facet_wrap}} (other than the \code{facets} argument). -#' @param alpha,size For univariate smooths, passed to +#' @param alpha,size For univariate smooths, passed to #' \code{\link[ggplot2]{geom_ribbon}}. For bivariate smooths, \code{size/2} is #' passed to \code{\link[ggplot2]{geom_contour}}. -#' +#' #' @return \code{plot_nonlinear} returns a ggplot object. -#' +#' #' @importFrom ggplot2 aes_ aes_string facet_wrap ggplot geom_contour geom_line geom_ribbon labs scale_color_gradient2 -#' -plot_nonlinear <- function(x, smooths, ..., - prob = 0.9, facet_args = list(), +#' +plot_nonlinear <- function(x, smooths, ..., + prob = 0.9, facet_args = list(), alpha = 1, size = 0.75) { validate_stanreg_object(x) if (!is(x, "gamm4")) stop("Plot only available for models fit using the stan_gamm4 function.") on.exit(message("try plot(x$jam) instead")) scheme <- bayesplot::color_scheme_get() - + XZ <- x$x XZ <- XZ[,!grepl("_NEW_", colnames(XZ), fixed = TRUE)] labels <- sapply(x$jam$smooth, "[[", "label") @@ -309,15 +309,15 @@ plot_nonlinear <- function(x, smooths, ..., names(x$jam$smooth) <- labels names(xnames) <- labels fs <- sapply(x$jam$smooth, FUN = "inherits", what = "fs.interaction") - + if (!missing(smooths)) { found <- smooths %in% labels if (all(!found)) { - stop("All specified terms are invalid. Valid terms are: ", - paste(grep(",", labels, fixed = TRUE, value = TRUE, invert = TRUE), + stop("All specified terms are invalid. Valid terms are: ", + paste(grep(",", labels, fixed = TRUE, value = TRUE, invert = TRUE), collapse = ", ")) } else if (any(!found)) { - warning("The following specified terms were not found and ignored: ", + warning("The following specified terms were not found and ignored: ", paste(smooths[!found], collapse = ", ")) } labels <- smooths[found] @@ -325,10 +325,10 @@ plot_nonlinear <- function(x, smooths, ..., if (!is.matrix(xnames)) xnames <- xnames[found] } else smooths <- 1:length(labels) - + B <- as.matrix(x)[, colnames(XZ), drop = FALSE] original <- x$jam$model - + bivariate <- any(grepl(",", labels, fixed = TRUE)) if (bivariate && !any(fs)) { if (length(labels) > 1) { @@ -354,16 +354,16 @@ plot_nonlinear <- function(x, smooths, ..., xz <- XZ[, grepl(labels, colnames(XZ), fixed = TRUE), drop = FALSE] plot_data$z <- apply(linear_predictor.matrix(b, xz), 2, FUN = median) return( - ggplot(plot_data, aes_(x = ~x, y = ~y, z = ~z)) + - geom_contour(aes_string(color = "..level.."), size = size/2) + - labs(x = xnames[1], y = xnames[2]) + + ggplot(plot_data, aes_(x = ~x, y = ~y, z = ~z)) + + geom_contour(aes_string(color = "..level.."), size = size/2) + + labs(x = xnames[1], y = xnames[2]) + scale_color_gradient2(low = scheme[[1]], - mid = scheme[[3]], + mid = scheme[[3]], high = scheme[[6]]) + bayesplot::theme_default() ) } - + df_list <- lapply(x$jam$smooth[smooths], FUN = function(s) { incl <- s$first.para:s$last.para b <- B[, incl, drop = FALSE] @@ -408,21 +408,21 @@ plot_nonlinear <- function(x, smooths, ..., } }) plot_data <- do.call(rbind, df_list) - + facet_args[["facets"]] <- ~ term if (is.null(facet_args[["scales"]])) facet_args[["scales"]] <- "free" if (is.null(facet_args[["strip.position"]])) facet_args[["strip.position"]] <- "left" - on.exit(NULL) - ggplot(plot_data, aes_(x = ~ predictor)) + - geom_ribbon(aes_(ymin = ~ lower, ymax = ~ upper), + on.exit(NULL) + ggplot(plot_data, aes_(x = ~ predictor)) + + geom_ribbon(aes_(ymin = ~ lower, ymax = ~ upper), fill = scheme[[1]], color = scheme[[2]], - alpha = alpha, size = size) + - geom_line(aes_(y = ~ middle), color = scheme[[5]], - size = 0.75 * size, lineend = "round") + - labs(y = NULL) + - do.call(facet_wrap, facet_args) + + alpha = alpha, size = size) + + geom_line(aes_(y = ~ middle), color = scheme[[5]], + size = 0.75 * size, lineend = "round") + + labs(y = NULL) + + do.call(facet_wrap, facet_args) + bayesplot::theme_default() } diff --git a/R/stan_glm.fit.R b/R/stan_glm.fit.R index 282461dff..889802c84 100644 --- a/R/stan_glm.fit.R +++ b/R/stan_glm.fit.R @@ -1,16 +1,16 @@ # Part of the rstanarm package for estimating model parameters # Copyright (C) 2013, 2014, 2015, 2016, 2017 Trustees of Columbia University -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. @@ -22,10 +22,10 @@ #' @param group A list, possibly of length zero (the default), but otherwise #' having the structure of that produced by \code{\link[lme4]{mkReTrms}} to #' indicate the group-specific part of the model. In addition, this list must -#' have elements for the \code{regularization}, \code{concentration} +#' have elements for the \code{regularization}, \code{concentration} #' \code{shape}, and \code{scale} components of a \code{\link{decov}} #' prior for the covariance matrices among the group-specific coefficients. -#' @param importance_resampling Logical scalar indicating whether to use +#' @param importance_resampling Logical scalar indicating whether to use #' importance resampling when approximating the posterior distribution with #' a multivariate normal around the posterior mode, which only applies #' when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} @@ -35,10 +35,10 @@ #' when \code{importance_resampling=TRUE}. #' @importFrom lme4 mkVarCorr #' @importFrom loo psis -stan_glm.fit <- - function(x, y, - weights = rep(1, NROW(y)), - offset = rep(0, NROW(y)), +stan_glm.fit <- + function(x, y, + weights = rep(1, NROW(y)), + offset = rep(0, NROW(y)), family = gaussian(), ..., prior = default_prior_coef(family), @@ -47,19 +47,19 @@ stan_glm.fit <- prior_smooth = exponential(autoscale = FALSE), prior_ops = NULL, group = list(), - prior_PD = FALSE, - algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), + prior_PD = FALSE, + algorithm = c("sampling", "optimizing", "meanfield", "fullrank"), mean_PPD = algorithm != "optimizing" && !prior_PD, - adapt_delta = NULL, - QR = FALSE, + adapt_delta = NULL, + QR = FALSE, sparse = FALSE, importance_resampling = algorithm != "sampling", keep_every = algorithm != "sampling") { - - # prior_ops deprecated but make sure it still works until + + # prior_ops deprecated but make sure it still works until # removed in future release if (!is.null(prior_ops)) { - tmp <- .support_deprecated_prior_options(prior, prior_intercept, + tmp <- .support_deprecated_prior_options(prior, prior_intercept, prior_aux, prior_ops) prior <- tmp[["prior"]] prior_intercept <- tmp[["prior_intercept"]] @@ -77,18 +77,18 @@ stan_glm.fit <- supported_families_err[supported_families_err == "Beta regression"] <- "mgcv::betar" stop("'family' must be one of ", paste(supported_families_err, collapse = ", ")) } - + supported_links <- supported_glm_links(supported_families[fam]) link <- which(supported_links == family$link) - if (!length(link)) + if (!length(link)) stop("'link' must be one of ", paste(supported_links, collapse = ", ")) - + if (binom_y_prop(y, family, weights)) { stop("To specify 'y' as proportion of successes and 'weights' as ", "number of trials please use stan_glm rather than calling ", "stan_glm.fit directly.", call. = FALSE) } - + y <- validate_glm_outcome_support(y, family) trials <- NULL if (is.binomial(family$family) && NCOL(y) == 2L) { @@ -101,15 +101,15 @@ stan_glm.fit <- } # useless assignments to pass R CMD check - has_intercept <- + has_intercept <- prior_df <- prior_df_for_intercept <- prior_df_for_aux <- prior_df_for_smooth <- prior_dist <- prior_dist_for_intercept <- prior_dist_for_aux <- prior_dist_for_smooth <- prior_mean <- prior_mean_for_intercept <- prior_mean_for_aux <- prior_mean_for_smooth <- prior_scale <- prior_scale_for_intercept <- prior_scale_for_aux <- prior_scale_for_smooth <- - prior_autoscale <- prior_autoscale_for_intercept <- prior_autoscale_for_aux <- - prior_autoscale_for_smooth <- global_prior_scale <- global_prior_df <- slab_df <- + prior_autoscale <- prior_autoscale_for_intercept <- prior_autoscale_for_aux <- + prior_autoscale_for_smooth <- global_prior_scale <- global_prior_df <- slab_df <- slab_scale <- NULL - + if (is.list(x)) { x_stuff <- center_x(x[[1]], sparse) smooth_map <- unlist(lapply(1:(length(x) - 1L), FUN = function(j) { @@ -126,11 +126,11 @@ stan_glm.fit <- assign(i, x_stuff[[i]]) nvars <- ncol(xtemp) - ok_dists <- nlist("normal", student_t = "t", "cauchy", "hs", "hs_plus", + ok_dists <- nlist("normal", student_t = "t", "cauchy", "hs", "hs_plus", "laplace", "lasso", "product_normal") ok_intercept_dists <- ok_dists[1:3] ok_aux_dists <- c(ok_dists[1:3], exponential = "exponential") - + # prior distributions prior_stuff <- handle_glm_prior( prior, @@ -139,12 +139,12 @@ stan_glm.fit <- default_scale = 2.5, ok_dists = ok_dists ) - # prior_{dist, mean, scale, df, dist_name, autoscale}, + # prior_{dist, mean, scale, df, dist_name, autoscale}, # global_prior_df, global_prior_scale, slab_df, slab_scale for (i in names(prior_stuff)) assign(i, prior_stuff[[i]]) - if (isTRUE(is.list(prior_intercept)) && + if (isTRUE(is.list(prior_intercept)) && isTRUE(prior_intercept$default)) { m_y <- 0 if (family$family == "gaussian" && family$link == "identity") { @@ -163,7 +163,7 @@ stan_glm.fit <- names(prior_intercept_stuff) <- paste0(names(prior_intercept_stuff), "_for_intercept") for (i in names(prior_intercept_stuff)) assign(i, prior_intercept_stuff[[i]]) - + prior_aux_stuff <- handle_glm_prior( prior_aux, @@ -179,9 +179,9 @@ stan_glm.fit <- stop("'prior_aux' cannot be NULL if 'prior_PD' is TRUE.") prior_aux_stuff$prior_scale_for_aux <- Inf } - for (i in names(prior_aux_stuff)) + for (i in names(prior_aux_stuff)) assign(i, prior_aux_stuff[[i]]) - + if (ncol(S) > 0) { # prior_{dist, mean, scale, df, dist_name, autoscale}_for_smooth prior_smooth_stuff <- handle_glm_prior( @@ -191,7 +191,7 @@ stan_glm.fit <- link = NULL, ok_dists = ok_aux_dists ) - + names(prior_smooth_stuff) <- paste0(names(prior_smooth_stuff), "_for_smooth") if (is.null(prior_smooth)) { if (prior_PD) @@ -200,7 +200,7 @@ stan_glm.fit <- } for (i in names(prior_smooth_stuff)) assign(i, prior_smooth_stuff[[i]]) - + prior_scale_for_smooth <- array(prior_scale_for_smooth) } else { prior_dist_for_smooth <- 0L @@ -208,7 +208,7 @@ stan_glm.fit <- prior_scale_for_smooth <- array(NA_real_, dim = 0) prior_df_for_smooth <- array(NA_real_, dim = 0) } - + famname <- supported_families[fam] is_bernoulli <- is.binomial(famname) && all(y %in% 0:1) && is.null(trials) is_nb <- is.nb(famname) @@ -217,7 +217,7 @@ stan_glm.fit <- is_ig <- is.ig(famname) is_beta <- is.beta(famname) is_continuous <- is_gaussian || is_gamma || is_ig || is_beta - + # require intercept for certain family and link combinations if (!has_intercept) { linkname <- supported_links[link] @@ -225,36 +225,36 @@ stan_glm.fit <- is_gamma && linkname == "inverse" || is.binomial(famname) && linkname == "log" if (needs_intercept) - stop("To use this combination of family and link ", + stop("To use this combination of family and link ", "the model must have an intercept.") } - + # allow prior_PD even if no y variable if (is.null(y)) { if (!prior_PD) { stop("Outcome variable must be specified if 'prior_PD' is not TRUE.") } else { y <- fake_y_for_prior_PD(N = NROW(x), family = family) - if (is_gaussian && + if (is_gaussian && (prior_autoscale || prior_autoscale_for_intercept || prior_autoscale_for_aux)) { message("'y' not specified, will assume sd(y)=1 when calculating scaled prior(s). ") } } } - - + + if (is_gaussian) { ss <- sd(y) - if (prior_dist > 0L && prior_autoscale) + if (prior_dist > 0L && prior_autoscale) prior_scale <- ss * prior_scale - if (prior_dist_for_intercept > 0L && prior_autoscale_for_intercept) + if (prior_dist_for_intercept > 0L && prior_autoscale_for_intercept) prior_scale_for_intercept <- ss * prior_scale_for_intercept if (prior_dist_for_aux > 0L && prior_autoscale_for_aux) prior_scale_for_aux <- ss * prior_scale_for_aux } if (!QR && prior_dist > 0L && prior_autoscale) { min_prior_scale <- 1e-12 - prior_scale <- pmax(min_prior_scale, prior_scale / + prior_scale <- pmax(min_prior_scale, prior_scale / apply(xtemp, 2L, FUN = function(x) { num.categories <- length(unique(x)) x.scale <- 1 @@ -266,11 +266,11 @@ stan_glm.fit <- return(x.scale) })) } - prior_scale <- + prior_scale <- as.array(pmin(.Machine$double.xmax, prior_scale)) - prior_scale_for_intercept <- + prior_scale_for_intercept <- min(.Machine$double.xmax, prior_scale_for_intercept) - + if (QR) { if (ncol(xtemp) <= 1) stop("'QR' can only be specified when there are multiple predictors.") @@ -286,17 +286,17 @@ stan_glm.fit <- colnames(xtemp) <- cn xbar <- c(xbar %*% R_inv) } - + if (length(weights) > 0 && all(weights == 1)) weights <- double() if (length(offset) > 0 && all(offset == 0)) offset <- double() - + # create entries in the data block of the .stan file standata <- nlist( N = nrow(xtemp), K = ncol(xtemp), xbar = as.array(xbar), dense_X = !sparse, - family = stan_family_number(famname), + family = stan_family_number(famname), link, has_weights = length(weights) > 0, has_offset = length(offset) > 0, @@ -310,14 +310,14 @@ stan_glm.fit <- prior_dist_for_intercept, prior_scale_for_intercept = c(prior_scale_for_intercept), prior_mean_for_intercept = c(prior_mean_for_intercept), - prior_df_for_intercept = c(prior_df_for_intercept), + prior_df_for_intercept = c(prior_df_for_intercept), global_prior_df, global_prior_scale, slab_df, slab_scale, # for hs priors z_dim = 0, # betareg data link_phi = 0, betareg_z = array(0, dim = c(nrow(xtemp), 0)), has_intercept_z = 0, zbar = array(0, dim = c(0)), - prior_dist_z = 0, prior_mean_z = integer(), prior_scale_z = integer(), + prior_dist_z = 0, prior_mean_z = integer(), prior_scale_z = integer(), prior_df_z = integer(), global_prior_scale_z = 0, global_prior_df_z = 0, prior_dist_for_intercept_z = 0, prior_mean_for_intercept_z = 0, prior_scale_for_intercept_z = 0, prior_df_for_intercept_z = 0, @@ -334,7 +334,7 @@ stan_glm.fit <- # make a copy of user specification before modifying 'group' (used for keeping # track of priors) user_covariance <- if (!length(group)) NULL else group[["decov"]] - + if (length(group) && length(group$flist)) { if (length(group$strata)) { standata$clogit <- TRUE @@ -361,7 +361,7 @@ stan_glm.fit <- flist = group$flist) Z <- group$Z p <- sapply(group$cnms, FUN = length) - l <- sapply(attr(group$flist, "assign"), function(i) + l <- sapply(attr(group$flist, "assign"), function(i) nlevels(group$flist[[i]])) t <- length(l) b_nms <- make_b_nms(group) @@ -393,10 +393,10 @@ stan_glm.fit <- standata$shape <- as.array(maybe_broadcast(decov$shape, t)) standata$scale <- as.array(maybe_broadcast(decov$scale, t)) standata$len_concentration <- sum(p[p > 1]) - standata$concentration <- + standata$concentration <- as.array(maybe_broadcast(decov$concentration, sum(p[p > 1]))) standata$len_regularization <- sum(p > 1) - standata$regularization <- + standata$regularization <- as.array(maybe_broadcast(decov$regularization, sum(p > 1))) standata$special_case <- all(sapply(group$cnms, FUN = function(x) { length(x) == 1 && x == "(Intercept)" @@ -433,7 +433,7 @@ stan_glm.fit <- standata$input <- double() standata$Dose <- double() } - + if (!is_bernoulli) { if (sparse) { parts <- extract_sparse_parts(xtemp) @@ -467,8 +467,8 @@ stan_glm.fit <- standata$len_y <- length(y) stanfit <- stanmodels$continuous } else if (is.binomial(famname)) { - standata$prior_scale_for_aux <- - if (!length(group) || prior_scale_for_aux == Inf) + standata$prior_scale_for_aux <- + if (!length(group) || prior_scale_for_aux == Inf) 0 else prior_scale_for_aux standata$prior_mean_for_aux <- 0 standata$prior_df_for_aux <- 0 @@ -492,18 +492,18 @@ stan_glm.fit <- } else { standata$X0 <- array(xtemp[y0, , drop = FALSE], dim = c(1, sum(y0), ncol(xtemp))) standata$X1 <- array(xtemp[y1, , drop = FALSE], dim = c(1, sum(y1), ncol(xtemp))) - standata$nnz_X0 = 0L + standata$nnz_X0 = 0L standata$w_X0 = double(0) standata$v_X0 = integer(0) standata$u_X0 = integer(0) - standata$nnz_X1 = 0L + standata$nnz_X1 = 0L standata$w_X1 = double(0) standata$v_X1 = integer(0) standata$u_X1 = integer(0) } - if (length(weights)) { + if (length(weights)) { # nocov start - # this code is unused because weights are interpreted as number of + # this code is unused because weights are interpreted as number of # trials for binomial glms standata$weights0 <- weights[y0] standata$weights1 <- weights[y1] @@ -543,7 +543,7 @@ stan_glm.fit <- } else { stop(paste(famname, "is not supported.")) } - + prior_info <- summarize_glm_prior( user_prior = prior_stuff, user_prior_intercept = prior_intercept_stuff, @@ -556,8 +556,8 @@ stan_glm.fit <- adjusted_prior_aux_scale = prior_scale_for_aux, family = family ) - - pars <- c(if (has_intercept) "alpha", + + pars <- c(if (has_intercept) "alpha", "beta", if (ncol(S)) "beta_smooth", if (length(group)) "b", @@ -572,7 +572,7 @@ stan_glm.fit <- optimizing_args$data <- standata optimizing_args$constrained <- TRUE optimizing_args$importance_resampling <- importance_resampling - if (is.null(optimizing_args$tol_rel_grad)) + if (is.null(optimizing_args$tol_rel_grad)) optimizing_args$tol_rel_grad <- 10000L out <- do.call(optimizing, args = optimizing_args) check_stanfit(out) @@ -592,10 +592,10 @@ stan_glm.fit <- new_names[mark] <- colnames(S) } new_names[new_names == "alpha[1]"] <- "(Intercept)" - new_names[grepl("aux(\\[1\\])?$", new_names)] <- + new_names[grepl("aux(\\[1\\])?$", new_names)] <- if (is_gaussian) "sigma" else if (is_gamma) "shape" else - if (is_ig) "lambda" else + if (is_ig) "lambda" else if (is_nb) "reciprocal_dispersion" else if (is_beta) "(phi)" else NA names(out$par) <- new_names @@ -607,34 +607,34 @@ stan_glm.fit <- p <- suppressWarnings(psis(lr, r_eff = 1)) p$log_weights <- p$log_weights-log_sum_exp(p$log_weights) theta_pareto_k <- suppressWarnings(apply(out$theta_tilde, 2L, function(col) { - if (all(is.finite(col))) - psis(log1p(col ^ 2) / 2 + lr, r_eff = 1)$diagnostics$pareto_k + if (all(is.finite(col))) + psis(log1p(col ^ 2) / 2 + lr, r_eff = 1)$diagnostics$pareto_k else NaN })) ## todo: change fixed threshold to an option if (p$diagnostics$pareto_k > 1) { - warning("Pareto k diagnostic value is ", + warning("Pareto k diagnostic value is ", round(p$diagnostics$pareto_k, digits = 2), - ". Resampling is disabled. ", - "Decreasing tol_rel_grad may help if optimization has terminated prematurely. ", + ". Resampling is disabled. ", + "Decreasing tol_rel_grad may help if optimization has terminated prematurely. ", "Otherwise consider using sampling.", call. = FALSE, immediate. = TRUE) importance_resampling <- FALSE - } else if (p$diagnostics$pareto_k > 0.7) { - warning("Pareto k diagnostic value is ", - round(p$diagnostics$pareto_k, digits = 2), + } else if (p$diagnostics$pareto_k > 0.7) { + warning("Pareto k diagnostic value is ", + round(p$diagnostics$pareto_k, digits = 2), ". Resampling is unreliable. ", - "Increasing the number of draws or decreasing tol_rel_grad may help.", + "Increasing the number of draws or decreasing tol_rel_grad may help.", call. = FALSE, immediate. = TRUE) } - out$psis <- nlist(pareto_k = p$diagnostics$pareto_k, + out$psis <- nlist(pareto_k = p$diagnostics$pareto_k, n_eff = p$diagnostics$n_eff / keep_every) } else { theta_pareto_k <- rep(NaN,length(new_names)) importance_resampling <- FALSE } ## importance_resampling - if (importance_resampling) { - ir_idx <- .sample_indices(exp(p$log_weights), + if (importance_resampling) { + ir_idx <- .sample_indices(exp(p$log_weights), n_draws = ceiling(optimizing_args$draws / keep_every)) out$theta_tilde <- out$theta_tilde[ir_idx,] out$ir_idx <- ir_idx @@ -652,19 +652,19 @@ stan_glm.fit <- out$diagnostics <- cbind(mcse, theta_pareto_k, n_eff) colnames(out$diagnostics) <- c("mcse", "khat", "n_eff") ## end: psis diagnostics and SIR - out$stanfit <- suppressMessages(sampling(stanfit, data = standata, + out$stanfit <- suppressMessages(sampling(stanfit, data = standata, chains = 0)) return(structure(out, prior.info = prior_info, dropped_cols = x_stuff$dropped_cols)) - + } else { if (algorithm == "sampling") { sampling_args <- set_sampling_args( - object = stanfit, - prior = prior, - user_dots = list(...), - user_adapt_delta = adapt_delta, - data = standata, - pars = pars, + object = stanfit, + prior = prior, + user_dots = list(...), + user_adapt_delta = adapt_delta, + data = standata, + pars = pars, show_messages = FALSE) stanfit <- do.call(rstan::sampling, sampling_args) } else { @@ -686,7 +686,7 @@ stan_glm.fit <- check <- try(check_stanfit(stanfit)) if (!isTRUE(check)) return(standata) if (QR) { - thetas <- extract(stanfit, pars = "beta", inc_warmup = TRUE, + thetas <- extract(stanfit, pars = "beta", inc_warmup = TRUE, permuted = FALSE) betas <- apply(thetas, 1:2, FUN = function(theta) R_inv %*% theta) end <- tail(dim(betas), 1L) @@ -696,21 +696,21 @@ stan_glm.fit <- } } if (standata$len_theta_L) { - thetas <- extract(stanfit, pars = "theta_L", inc_warmup = TRUE, + thetas <- extract(stanfit, pars = "theta_L", inc_warmup = TRUE, permuted = FALSE) cnms <- group$cnms nc <- sapply(cnms, FUN = length) nms <- names(cnms) Sigma <- apply(thetas, 1:2, FUN = function(theta) { Sigma <- mkVarCorr(sc = 1, cnms, nc, theta, nms) - unlist(sapply(Sigma, simplify = FALSE, + unlist(sapply(Sigma, simplify = FALSE, FUN = function(x) x[lower.tri(x, TRUE)])) }) l <- length(dim(Sigma)) end <- tail(dim(Sigma), 1L) shift <- grep("^theta_L", names(stanfit@sim$samples[[1]]))[1] - 1L if (l == 3) for (chain in 1:end) for (param in 1:nrow(Sigma)) { - stanfit@sim$samples[[chain]][[shift + param]] <- Sigma[param, , chain] + stanfit@sim$samples[[chain]][[shift + param]] <- Sigma[param, , chain] } else for (chain in 1:end) { stanfit@sim$samples[[chain]][[shift + 1]] <- Sigma[, chain] @@ -724,18 +724,18 @@ stan_glm.fit <- } Sigma_nms <- unlist(Sigma_nms) } - new_names <- c(if (has_intercept) "(Intercept)", + new_names <- c(if (has_intercept) "(Intercept)", colnames(xtemp), if (ncol(S)) colnames(S), if (length(group) && length(group$flist)) c(paste0("b[", b_nms, "]")), - if (is_gaussian) "sigma", - if (is_gamma) "shape", + if (is_gaussian) "sigma", + if (is_gamma) "shape", if (is_ig) "lambda", if (is_nb) "reciprocal_dispersion", if (is_beta) "(phi)", if (ncol(S)) paste0("smooth_sd[", names(x)[-1], "]"), if (standata$len_theta_L) paste0("Sigma[", Sigma_nms, "]"), - if (mean_PPD && !standata$clogit) "mean_PPD", + if (mean_PPD && !standata$clogit) "mean_PPD", "log-posterior") stanfit@sim$fnames_oi <- new_names return(structure(stanfit, prior.info = prior_info, dropped_cols = x_stuff$dropped_cols)) @@ -792,17 +792,17 @@ validate_glm_outcome_support <- function(y, family) { if (is.character(y)) { stop("Outcome variable can't be type 'character'.", call. = FALSE) } - + if (is.null(y)) { return(y) } - + .is_count <- function(x) { all(x >= 0) && all(abs(x - round(x)) < .Machine$double.eps^0.5) } - + fam <- family$family - + if (!is.binomial(fam)) { # make sure y has ok dimensions (matrix only allowed for binomial models) if (length(dim(y)) > 1) { @@ -810,19 +810,19 @@ validate_glm_outcome_support <- function(y, family) { y <- y[, 1] } else { stop("Except for binomial models the outcome variable ", - "should not have multiple columns.", + "should not have multiple columns.", call. = FALSE) } } - + # check that values match support for non-binomial models if (is.gaussian(fam)) { return(y) } else if (is.gamma(fam) && any(y <= 0)) { - stop("All outcome values must be positive for gamma models.", + stop("All outcome values must be positive for gamma models.", call. = FALSE) } else if (is.ig(fam) && any(y <= 0)) { - stop("All outcome values must be positive for inverse-Gaussian models.", + stop("All outcome values must be positive for inverse-Gaussian models.", call. = FALSE) } else if (is.poisson(fam) && !.is_count(y)) { stop("All outcome values must be counts for Poisson models", @@ -833,12 +833,12 @@ validate_glm_outcome_support <- function(y, family) { } } else { # binomial models if (NCOL(y) == 1L) { - if (is.numeric(y) || is.logical(y)) + if (is.numeric(y) || is.logical(y)) y <- as.integer(y) - if (is.factor(y)) + if (is.factor(y)) y <- fac2bin(y) - if (!all(y %in% c(0L, 1L))) - stop("All outcome values must be 0 or 1 for Bernoulli models.", + if (!all(y %in% c(0L, 1L))) + stop("All outcome values must be 0 or 1 for Bernoulli models.", call. = FALSE) } else if (isTRUE(NCOL(y) == 2L)) { if (!.is_count(y)) @@ -846,11 +846,11 @@ validate_glm_outcome_support <- function(y, family) { call. = FALSE) } else { stop("For binomial models the outcome should be a vector or ", - "a matrix with 2 columns.", + "a matrix with 2 columns.", call. = FALSE) } } - + return(y) } @@ -867,7 +867,7 @@ fake_y_for_prior_PD <- function(N, family) { # valid for all discrete cases fake_y <- rep_len(c(0, 1), N) } else { - # valid for gamma, inverse gaussian, beta + # valid for gamma, inverse gaussian, beta fake_y <- runif(N) } return(fake_y) @@ -876,7 +876,7 @@ fake_y_for_prior_PD <- function(N, family) { # Add extra level _NEW_ to each group -# +# # @param Ztlist ranef indicator matrices # @param cnms group$cnms # @param flist group$flist @@ -886,7 +886,7 @@ pad_reTrms <- function(Ztlist, cnms, flist) { p <- sapply(cnms, FUN = length) n <- ncol(Ztlist[[1]]) for (i in attr(flist, "assign")) { - levels(flist[[i]]) <- c(gsub(" ", "_", levels(flist[[i]])), + levels(flist[[i]]) <- c(gsub(" ", "_", levels(flist[[i]])), paste0("_NEW_", names(flist)[i])) } for (i in 1:length(p)) { @@ -900,8 +900,7 @@ pad_reTrms <- function(Ztlist, cnms, flist) { #' #' @param x A matrix or array (e.g. the posterior sample or matrix of summary #' stats) -#' @param columns Do the columns (TRUE) or rows (FALSE) correspond to the -#' variables? +#' @param ... Additional arguments (not used) #' @export unpad_reTrms <- function(x, ...) UseMethod("unpad_reTrms") @@ -909,6 +908,7 @@ unpad_reTrms <- function(x, ...) UseMethod("unpad_reTrms") #' #' @param x A matrix (e.g. the posterior sample or matrix of summary #' stats) +#' @param ... Additional arguments (not used) #' @export unpad_reTrms.default <- function(x, ...) { if (is.matrix(x) || is.array(x)) @@ -921,22 +921,23 @@ unpad_reTrms.default <- function(x, ...) { #' #' @param x An array (e.g. the posterior sample or matrix of summary #' stats) -#' @param columns Do the columns (TRUE) or rows (FALSE) correspond to the +#' @param columns Do the columns (TRUE) or rows (FALSE) correspond to the #' variables? +#' @param ... Additional arguments (not used) #' @export unpad_reTrms.array <- function(x, columns = TRUE, ...) { ndim <- length(dim(x)) if (ndim > 3) stop("'x' should be a matrix or 3-D array") - - nms <- if (columns) + + nms <- if (columns) last_dimnames(x) else rownames(x) keep <- !grepl("_NEW_", nms, fixed = TRUE) if (length(dim(x)) == 2) { - x_keep <- if (columns) + x_keep <- if (columns) x[, keep, drop = FALSE] else x[keep, , drop = FALSE] } else { - x_keep <- if (columns) + x_keep <- if (columns) x[, , keep, drop = FALSE] else x[keep, , , drop = FALSE] } return(x_keep) @@ -953,24 +954,24 @@ make_b_nms <- function(group, m = NULL, stub = "Long") { if (length(nms_i) == 1) { b_nms <- c(b_nms, paste0(m_stub, nms_i, ":", levels(group$flist[[nm]]))) } else { - b_nms <- c(b_nms, c(t(sapply(paste0(m_stub, nms_i), paste0, ":", + b_nms <- c(b_nms, c(t(sapply(paste0(m_stub, nms_i), paste0, ":", levels(group$flist[[nm]]))))) } } - return(b_nms) + return(b_nms) } # Create "prior.info" attribute needed for prior_summary() # -# @param user_* The user's prior, prior_intercept, prior_covariance, and +# @param user_* The user's prior, prior_intercept, prior_covariance, and # prior_aux specifications. For prior and prior_intercept these should be # passed in after broadcasting the df/location/scale arguments if necessary. # @param has_intercept T/F, does model have an intercept? # @param has_predictors T/F, does model have predictors? # @param adjusted_prior_*_scale adjusted scales computed if using autoscaled priors # @param family Family object. -# @return A named list with components 'prior', 'prior_intercept', and possibly +# @return A named list with components 'prior', 'prior_intercept', and possibly # 'prior_covariance' and 'prior_aux' each of which itself is a list # containing the needed values for prior_summary. summarize_glm_prior <- @@ -978,14 +979,14 @@ summarize_glm_prior <- user_prior_intercept, user_prior_aux, user_prior_covariance, - has_intercept, + has_intercept, has_predictors, adjusted_prior_scale, - adjusted_prior_intercept_scale, + adjusted_prior_intercept_scale, adjusted_prior_aux_scale, family) { rescaled_coef <- - user_prior$prior_autoscale && + user_prior$prior_autoscale && has_predictors && !is.na(user_prior$prior_dist_name) && !all(user_prior$prior_scale == adjusted_prior_scale) @@ -997,7 +998,7 @@ summarize_glm_prior <- rescaled_aux <- user_prior_aux$prior_autoscale_for_aux && !is.na(user_prior_aux$prior_dist_name_for_aux) && (user_prior_aux$prior_scale_for_aux != adjusted_prior_aux_scale) - + if (has_predictors && user_prior$prior_dist_name %in% "t") { if (all(user_prior$prior_df == 1)) { user_prior$prior_dist_name <- "cauchy" @@ -1021,7 +1022,7 @@ summarize_glm_prior <- } } prior_list <- list( - prior = + prior = if (!has_predictors) NULL else with(user_prior, list( dist = prior_dist_name, location = prior_mean, @@ -1032,7 +1033,7 @@ summarize_glm_prior <- ("student_t", "hs", "hs_plus", "lasso", "product_normal")) prior_df else NULL )), - prior_intercept = + prior_intercept = if (!has_intercept) NULL else with(user_prior_intercept, list( dist = prior_dist_name_for_intercept, location = prior_mean_for_intercept, @@ -1045,28 +1046,28 @@ summarize_glm_prior <- ) if (length(user_prior_covariance)) prior_list$prior_covariance <- user_prior_covariance - + aux_name <- .rename_aux(family) - prior_list$prior_aux <- if (is.na(aux_name)) + prior_list$prior_aux <- if (is.na(aux_name)) NULL else with(user_prior_aux, list( dist = prior_dist_name_for_aux, - location = if (!is.na(prior_dist_name_for_aux) && + location = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux != "exponential") prior_mean_for_aux else NULL, - scale = if (!is.na(prior_dist_name_for_aux) && + scale = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux != "exponential") prior_scale_for_aux else NULL, adjusted_scale = if (rescaled_aux) adjusted_prior_aux_scale else NULL, - df = if (!is.na(prior_dist_name_for_aux) && + df = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux %in% "student_t") - prior_df_for_aux else NULL, - rate = if (!is.na(prior_dist_name_for_aux) && + prior_df_for_aux else NULL, + rate = if (!is.na(prior_dist_name_for_aux) && prior_dist_name_for_aux %in% "exponential") 1 / prior_scale_for_aux else NULL, aux_name = aux_name )) - + return(prior_list) } @@ -1075,7 +1076,7 @@ summarize_glm_prior <- fam <- family$family if (is.gaussian(fam)) "sigma" else if (is.gamma(fam)) "shape" else - if (is.ig(fam)) "lambda" else + if (is.ig(fam)) "lambda" else if (is.nb(fam)) "reciprocal_dispersion" else NA } diff --git a/R/stanreg_list.R b/R/stanreg_list.R index b1b9c5944..89d93a376 100644 --- a/R/stanreg_list.R +++ b/R/stanreg_list.R @@ -1,23 +1,23 @@ # Part of the rstanarm package for estimating model parameters # Copyright (C) 2015, 2016, 2017 Trustees of Columbia University -# +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 3 # of the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #' Create lists of fitted model objects, combine them, or append new models to #' existing lists of models. -#' +#' #' @export #' @param ... Objects to combine into a \code{"stanreg_list"}, #' \code{"stanmvreg_list"}, or \code{"stanjm_list"}. Can be fitted model @@ -32,14 +32,14 @@ #' @return A list of class \code{"stanreg_list"}, \code{"stanmvreg_list"}, or #' \code{"stanjm_list"}, containing the fitted model objects and some metadata #' stored as attributes. -#' +#' #' @seealso \code{\link{loo_model_weights}} for usage of \code{stanreg_list}. -#' +#' stanreg_list <- function(..., model_names = NULL) { mods <- list(...) names(mods) <- stanreg_list_names( - model_names, - n_models = length(mods), + model_names, + n_models = length(mods), call_dots = match.call(expand.dots = FALSE)$... ) .stanreg_list(mods, model_class = "stanreg") @@ -50,8 +50,8 @@ stanreg_list <- function(..., model_names = NULL) { stanmvreg_list <- function(..., model_names = NULL) { mods <- list(...) names(mods) <- stanreg_list_names( - model_names, - n_models = length(mods), + model_names, + n_models = length(mods), call_dots = match.call(expand.dots = FALSE)$... ) .stanreg_list(mods, model_class = "stanmvreg") @@ -62,8 +62,8 @@ stanmvreg_list <- function(..., model_names = NULL) { stanjm_list <- function(..., model_names = NULL) { mods <- list(...) names(mods) <- stanreg_list_names( - model_names, - n_models = length(mods), + model_names, + n_models = length(mods), call_dots = match.call(expand.dots = FALSE)$... ) .stanreg_list(mods, model_class = "stanjm") @@ -87,9 +87,9 @@ print.stanreg_list <- function(x, ...) { } cat(cl, " with ", length(x), " models: \n\n") df <- data.frame( - name = attr(x, "names"), - family = unname(attr(x, "families")), - formula = sapply(x, function(y) formula_string(formula(y))), + name = attr(x, "names"), + family = unname(attr(x, "families")), + formula = sapply(x, function(y) formula_string(formula(y))), row.names = seq_along(x) ) print(df, right = FALSE, ...) @@ -100,14 +100,14 @@ print.stanreg_list <- function(x, ...) { #' Create, combine, or append new models to a stanreg_list, stanmvreg_list, or #' stanjm_list object. -#' +#' #' @noRd #' @param mods List of objects to combine. Can be fitted model objects (stanreg, #' stanmvreg, stanjm) or stan*_list objects. -#' @param model_class The type of objects to allow. +#' @param model_class The type of objects to allow. #' @return A stanreg_list, stanmvreg_list, or stanjm_list with one component per #' model and attributes containing various metadata about the models. -#' +#' .stanreg_list <- function(mods, model_class = c("stanreg", "stanmvreg", "stanjm")) { stopifnot(length(mods) >= 1, is.list(mods)) model_class <- match.arg(model_class) @@ -121,39 +121,39 @@ print.stanreg_list <- function(x, ...) { out <- stanreg_list_combine(mods, model_class = model_class) } else { .stopifnot_valid_objects(mods, valid_for = "append", model_class = model_class) - out <- stanreg_list_append(base_list = mods[[1]], mods = mods[-1], + out <- stanreg_list_append(base_list = mods[[1]], mods = mods[-1], model_class = model_class) } - + # set model_name attributes of loo/waic/kfold objects to stanreg_list names out <- rename_loos.stanreg_list(out) - + return(out) } #' Create a stanreg_list from list of fitted model objects #' -#' @noRd -#' @param mods List of fitted model objects. +#' @noRd +#' @param mods List of fitted model objects. #' @param model_class What type of list is it? ('stanreg', 'stanmvreg', 'stanjm') #' @return A stanreg_list object stanreg_list_create <- function(mods, model_class) { list_class <- unique(c(paste0(model_class, "_list"), "stanreg_list")) - structure(mods, + structure(mods, class = list_class, - names = names(mods), + names = names(mods), families = stanreg_list_families(mods) ) } #' Combine existing stanreg_list objects #' -#' @noRd +#' @noRd #' @param lists List of stanreg_list objects. #' @param model_class What type of list is it? ('stanreg', 'stanmvreg', 'stanjm') #' @return A stanreg_list object -#' +#' stanreg_list_combine <- function(lists, model_class) { N_models_per_list <- sapply(lists, length) N_models <- sum(N_models_per_list) @@ -161,13 +161,13 @@ stanreg_list_combine <- function(lists, model_class) { classes <- lapply(lists, class) classes <- sapply(classes, function(x) x[1]) if (!all(classes == classes[1])) { - stop("Can't combine ", classes[1], " with ", + stop("Can't combine ", classes[1], " with ", paste(unique(classes[-1]), collapse = ", ")) } - + new_names <- unlist(lapply(lists, attr, "names", exact = TRUE), use.names = FALSE) new_families <- unlist(lapply(lists, attr, "families", exact = TRUE), use.names = FALSE) - + new_list <- vector(mode = "list", length = N_models) pos <- 1 for (j in seq_along(lists)) { @@ -178,7 +178,7 @@ stanreg_list_combine <- function(lists, model_class) { } structure( - new_list, + new_list, class = unique(c(paste0(model_class, "_list"), "stanreg_list")), names = new_names, families = new_families @@ -187,12 +187,12 @@ stanreg_list_combine <- function(lists, model_class) { #' Append new models to an existing stanreg_list object #' -#' @noRd +#' @noRd #' @param base_list The existing stanreg_list to append the new models to. #' @param mods List of fitted model objects to append to the existing list. #' @param model_class What type of list is it? ('stanreg', 'stanmvreg', 'stanjm') #' @return A stanreg_list object -#' +#' stanreg_list_append <- function(base_list, mods, model_class) { new_list <- stanreg_list_create(mods, model_class = model_class) stanreg_list_combine(list(base_list, new_list), model_class = model_class) @@ -211,16 +211,16 @@ is.stanjm_list <- function(x) is.stanreg_list(x) && inherits(x, "stanjm_list") model_class <- match.arg(model_class) list_class <- paste0(model_class, "_list") error_msg <- paste0( - "For ", list_class,"() objects in '...' must: ", + "For ", list_class,"() objects in '...' must: ", "\n(1) all be ", model_class, " objects, or", - "\n(2) all be ", list_class, " objects, or", - "\n(3) be one ", list_class, " object followed by all ", + "\n(2) all be ", list_class, " objects, or", + "\n(3) be one ", list_class, " object followed by all ", model_class, " objects" ) - + is_model_class <- sapply(mods, FUN = match.fun(paste0("is.", model_class))) is_list_class <- sapply(mods, FUN = match.fun(paste0("is.", list_class))) - + throw_error <- (valid_for == "create" && !all(is_model_class)) || @@ -228,7 +228,7 @@ is.stanjm_list <- function(x) is.stanreg_list(x) && inherits(x, "stanjm_list") !all(is_list_class)) || (valid_for == "append" && !(is_list_class[1] && all(is_model_class[-1]))) - + if (throw_error) { stop(error_msg, call. = FALSE) } @@ -236,14 +236,14 @@ is.stanjm_list <- function(x) is.stanreg_list(x) && inherits(x, "stanjm_list") -#' Determine names of the models in a stanreg_list -#' @noRd +#' Determine names of the models in a stanreg_list +#' @noRd #' @param user_model_names Either NULL or user-specified model_names argument #' @param n_models The number of models in the stanreg_list #' @param call_dots The result of match.call(expand.dots = FALSE)$... #' @return Either the user-specified model names or names inferred from the #' names of the fitted model objects passed to '...'. -#' +#' stanreg_list_names <- function(user_model_names, n_models, call_dots) { if (!is.null(user_model_names)) { stopifnot(is.character(user_model_names)) @@ -252,16 +252,16 @@ stanreg_list_names <- function(user_model_names, n_models, call_dots) { } nms <- user_model_names } else { - nms <- sapply(call_dots, FUN = deparse) + nms <- sapply(call_dots, FUN = deparse) } return(nms) } -#' Determine the families of the models in a stanreg_list -#' @noRd +#' Determine the families of the models in a stanreg_list +#' @noRd #' @param mods List of fitted model objects #' @return Character vector of family names -#' +#' stanreg_list_families <- function(mods) { fams <- sapply(mods, FUN = function(x) { fam <- family(x) @@ -273,23 +273,25 @@ stanreg_list_families <- function(mods) { -#' loo/waic/kfold objects created by rstanarm have a model_name attribute. -#' when a stanreg_list is created those attributes should be changed to match -#' the names of the models used for the stanreg_list in case user has specified +#' loo/waic/kfold objects created by rstanarm have a model_name attribute. +#' when a stanreg_list is created those attributes should be changed to match +#' the names of the models used for the stanreg_list in case user has specified #' the model_names argument -#' -#' @param loo/waic/kfold object +#' +#' @param x loo/waic/kfold object +#' @param ... Additional arguments #' @return Renamed object -#' +#' #' @export rename_loos <- function(x,...) UseMethod("rename_loos") #' Change model_name attributes of a loo/waic/kfold object stored in a stanreg object, -#' -#' @param loo/waic/kfold object +#' +#' @param x loo/waic/kfold object #' @param new_model_name new name for object +#' @param ... Additional arguments #' @return Renamed object -#' +#' #' @export rename_loos.stanreg <- function(x, new_model_name,...) { for (criterion in c("loo", "waic", "kfold")) { @@ -300,12 +302,13 @@ rename_loos.stanreg <- function(x, new_model_name,...) { return(x) } -#' Change model_name attributes of loo/waic/kfold objects to correspond to +#' Change model_name attributes of loo/waic/kfold objects to correspond to #' model names used for stanreg_list -#' -#' @param loo/waic/kfold object +#' +#' @param x loo/waic/kfold object +#' @param ... Additional arguments #' @return Renamed object -#' +#' #' @export rename_loos.stanreg_list <- function(x, ...) { for (j in seq_along(x)) { diff --git a/man/evaluate_log_survival.Rd b/man/evaluate_log_survival.Rd index 1c77c6218..d92b21419 100644 --- a/man/evaluate_log_survival.Rd +++ b/man/evaluate_log_survival.Rd @@ -13,7 +13,7 @@ evaluate_log_survival(log_haz, qnodes, qwts) individual, evaluated at each of the quadrature points. The vector should be ordered such that the first N elements contain the log_haz evaluated for each individual at quadrature point 1, -then the next N elements are the log_haz evaluated for each +then the next N elements are the log_haz evaluated for each individual at quadrature point 2, and so on.} \item{qnodes}{Integer specifying the number of quadrature nodes diff --git a/man/evaluate_log_survival.default.Rd b/man/evaluate_log_survival.default.Rd index e1d6404e3..47b5ce9b7 100644 --- a/man/evaluate_log_survival.default.Rd +++ b/man/evaluate_log_survival.default.Rd @@ -13,7 +13,7 @@ hazard coeffients / parameters} individual, evaluated at each of the quadrature points. The vector should be ordered such that the first N elements contain the log_haz evaluated for each individual at quadrature point 1, -then the next N elements are the log_haz evaluated for each +then the next N elements are the log_haz evaluated for each individual at quadrature point 2, and so on.} \item{qnodes}{Integer specifying the number of quadrature nodes diff --git a/man/evaluate_log_survival.matrix.Rd b/man/evaluate_log_survival.matrix.Rd index 19fa09714..d4475232b 100644 --- a/man/evaluate_log_survival.matrix.Rd +++ b/man/evaluate_log_survival.matrix.Rd @@ -13,7 +13,7 @@ hazard coeffients / parameters} individual, evaluated at each of the quadrature points. The vector should be ordered such that the first N elements contain the log_haz evaluated for each individual at quadrature point 1, -then the next N elements are the log_haz evaluated for each +then the next N elements are the log_haz evaluated for each individual at quadrature point 2, and so on.} \item{qnodes}{Integer specifying the number of quadrature nodes diff --git a/man/extract_pars.Rd b/man/extract_pars.Rd index 08c54819d..fe00a17e8 100644 --- a/man/extract_pars.Rd +++ b/man/extract_pars.Rd @@ -9,8 +9,7 @@ extract_pars(object, ...) \arguments{ \item{object}{A stanmvreg or stansurv object} -\item{stanmat}{A matrix of posterior draws, may be provided if the desired -stanmat is only a subset of the draws from as.matrix(object$stanfit)} +\item{...}{Additional arguments passed to the method} } \value{ A named list diff --git a/man/extract_pars.stanmvreg.Rd b/man/extract_pars.stanmvreg.Rd index 18688387c..8baa5ebae 100644 --- a/man/extract_pars.stanmvreg.Rd +++ b/man/extract_pars.stanmvreg.Rd @@ -4,13 +4,17 @@ \alias{extract_pars.stanmvreg} \title{Extract parameters from stanmat and return as a list} \usage{ -\method{extract_pars}{stanmvreg}(object, stanmat = NULL, means = FALSE) +\method{extract_pars}{stanmvreg}(object, stanmat = NULL, means = FALSE, ...) } \arguments{ \item{object}{A stanmvreg or stansurv object} -\item{stanmat}{A matrix of posterior draws, may be provided if the desired +\item{stanmat}{A matrix of posterior draws, may be provided if the desired stanmat is only a subset of the draws from as.matrix(object$stanfit)} + +\item{means}{Logical, if TRUE then the posterior means are returned} + +\item{...}{Additional arguments passed to the method} } \value{ A named list diff --git a/man/extract_pars.stansurv.Rd b/man/extract_pars.stansurv.Rd index fb594d1df..4d07da83a 100644 --- a/man/extract_pars.stansurv.Rd +++ b/man/extract_pars.stansurv.Rd @@ -4,13 +4,17 @@ \alias{extract_pars.stansurv} \title{Extract parameters from stanmat and return as a list} \usage{ -\method{extract_pars}{stansurv}(object, stanmat = NULL, means = FALSE) +\method{extract_pars}{stansurv}(object, stanmat = NULL, means = FALSE, ...) } \arguments{ \item{object}{A stanmvreg or stansurv object} -\item{stanmat}{A matrix of posterior draws, may be provided if the desired +\item{stanmat}{A matrix of posterior draws, may be provided if the desired stanmat is only a subset of the draws from as.matrix(object$stanfit)} + +\item{means}{Logical, if TRUE then the posterior means are returned} + +\item{...}{Additional arguments passed to the method} } \value{ A named list diff --git a/man/get_model_data.Rd b/man/get_model_data.Rd index 64ce90608..4299eead3 100644 --- a/man/get_model_data.Rd +++ b/man/get_model_data.Rd @@ -6,7 +6,7 @@ (1) only includes variables used in the model formula (2) only includes rows contained in the glmod/coxmod model frames (3) ensures that additional variables that are required - such as the ID variable or variables used in the + such as the ID variable or variables used in the interaction-type association structures, are included.} \usage{ get_model_data(object, ...) @@ -14,18 +14,17 @@ get_model_data(object, ...) \arguments{ \item{object}{A stansurv, stanmvreg or stanjm object.} -\item{m}{Integer specifying which submodel to get the -prediction data frame for (for stanmvreg or stanjm objects).} +\item{...}{Additional arguments passed to the method} } \value{ A data frame or list of data frames with all the (unevaluated) variables required for predictions. } \description{ -It is necessary to drop unneeded variables though so that -errors are not encountered if the original data contained +It is necessary to drop unneeded variables though so that +errors are not encountered if the original data contained NA values for variables unrelated to the model formula. -We generate a data frame here for in-sample predictions +We generate a data frame here for in-sample predictions rather than using a model frame, since some quantities will need to be recalculated at quadrature points etc, for example in posterior_survfit. diff --git a/man/get_model_data.stanmvreg.Rd b/man/get_model_data.stanmvreg.Rd index bb413ff32..0fa6ab7ba 100644 --- a/man/get_model_data.stanmvreg.Rd +++ b/man/get_model_data.stanmvreg.Rd @@ -6,7 +6,7 @@ (1) only includes variables used in the model formula (2) only includes rows contained in the glmod/coxmod model frames (3) ensures that additional variables that are required - such as the ID variable or variables used in the + such as the ID variable or variables used in the interaction-type association structures, are included.} \usage{ \method{get_model_data}{stanmvreg}(object, m = NULL, ...) @@ -16,16 +16,18 @@ \item{m}{Integer specifying which submodel to get the prediction data frame for (for stanmvreg or stanjm objects).} + +\item{...}{Additional arguments passed to the method} } \value{ A data frame or list of data frames with all the (unevaluated) variables required for predictions. } \description{ -It is necessary to drop unneeded variables though so that -errors are not encountered if the original data contained +It is necessary to drop unneeded variables though so that +errors are not encountered if the original data contained NA values for variables unrelated to the model formula. -We generate a data frame here for in-sample predictions +We generate a data frame here for in-sample predictions rather than using a model frame, since some quantities will need to be recalculated at quadrature points etc, for example in posterior_survfit. diff --git a/man/get_model_data.stansurv.Rd b/man/get_model_data.stansurv.Rd index dd7f49d86..132cf8a70 100644 --- a/man/get_model_data.stansurv.Rd +++ b/man/get_model_data.stansurv.Rd @@ -6,23 +6,25 @@ (1) only includes variables used in the model formula (2) only includes rows contained in the glmod/coxmod model frames (3) ensures that additional variables that are required - such as the ID variable or variables used in the + such as the ID variable or variables used in the interaction-type association structures, are included.} \usage{ \method{get_model_data}{stansurv}(object, ...) } \arguments{ \item{object}{A stansurv object.} + +\item{...}{Additional arguments passed to the method} } \value{ A data frame or list of data frames with all the (unevaluated) variables required for predictions. } \description{ -It is necessary to drop unneeded variables though so that -errors are not encountered if the original data contained +It is necessary to drop unneeded variables though so that +errors are not encountered if the original data contained NA values for variables unrelated to the model formula. -We generate a data frame here for in-sample predictions +We generate a data frame here for in-sample predictions rather than using a model frame, since some quantities will need to be recalculated at quadrature points etc, for example in posterior_survfit. diff --git a/man/linear_predictor.Rd b/man/linear_predictor.Rd index d241e4d25..9da54357c 100644 --- a/man/linear_predictor.Rd +++ b/man/linear_predictor.Rd @@ -17,6 +17,6 @@ linear_predictor(beta, x, offset = NULL) A vector or matrix. } \description{ -Make linear predictor vector from x and point estimates for beta, or linear +Make linear predictor vector from x and point estimates for beta, or linear predictor matrix from x and full posterior sample of beta. } diff --git a/man/linear_predictor.default.Rd b/man/linear_predictor.default.Rd index 0996c86bf..a64831d7f 100644 --- a/man/linear_predictor.default.Rd +++ b/man/linear_predictor.default.Rd @@ -17,6 +17,6 @@ A vector or matrix. } \description{ -Make linear predictor vector from x and point estimates for beta, or linear +Make linear predictor vector from x and point estimates for beta, or linear predictor matrix from x and full posterior sample of beta. } diff --git a/man/linear_predictor.matrix.Rd b/man/linear_predictor.matrix.Rd index 09c72cd73..9a00a63ee 100644 --- a/man/linear_predictor.matrix.Rd +++ b/man/linear_predictor.matrix.Rd @@ -17,6 +17,6 @@ A vector or matrix. } \description{ -Make linear predictor vector from x and point estimates for beta, or linear +Make linear predictor vector from x and point estimates for beta, or linear predictor matrix from x and full posterior sample of beta. } diff --git a/man/linkinv.stanmvreg.Rd b/man/linkinv.stanmvreg.Rd index 4c4693a14..fa0b0d2a2 100644 --- a/man/linkinv.stanmvreg.Rd +++ b/man/linkinv.stanmvreg.Rd @@ -9,6 +9,8 @@ \arguments{ \item{x}{A stanmvreg object} +\item{m}{An integer specifying the submodel.} + \item{...}{Other arguments passed to methods. For a \code{stanmvreg} object this can be an integer \code{m} specifying the submodel.} } diff --git a/man/ll_args.Rd b/man/ll_args.Rd index fd6b4810f..2c7ebfa14 100644 --- a/man/ll_args.Rd +++ b/man/ll_args.Rd @@ -9,21 +9,13 @@ ll_args(object, ...) \arguments{ \item{object}{stanreg object} -\item{...}{For models without group-specific terms (i.e., not stan_[g]lmer), -if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to +\item{...}{For models without group-specific terms (i.e., not stan_[g]lmer), +if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a workaround in case there are issues with newdata containing factors with only a single level. Or for stanmvreg objects, then ... can be used to pass 'stanmat', which may be a matrix with a reduced number of draws (potentially just a single MCMC draw).} - -\item{newdata}{same as posterior predict} - -\item{offset}{vector of offsets (only required if model has offset term and} - -\item{m}{Integer specifying which submodel for stanmvreg objects} - -\item{reloo_or_kfold}{logical. TRUE if ll_args is for reloo or kfold} } \value{ a named list with elements data, draws, S (posterior sample size) and diff --git a/man/ll_args.stanjm.Rd b/man/ll_args.stanjm.Rd index c28cca1b0..6fa07a10a 100644 --- a/man/ll_args.stanjm.Rd +++ b/man/ll_args.stanjm.Rd @@ -19,6 +19,8 @@ posterior_survfit, since it doesn't require repeated calls to pp_data.} \item{m}{Integer specifying which submodel} \item{reloo_or_kfold}{logical. TRUE if ll_args is for reloo or kfold} + +\item{...}{Additional arguments passed to the method} } \description{ Alternative ll_args method for stanjm objects that allows data and pars to be diff --git a/man/ll_args.stanreg.Rd b/man/ll_args.stanreg.Rd index 5994cf6e4..21f35b62c 100644 --- a/man/ll_args.stanreg.Rd +++ b/man/ll_args.stanreg.Rd @@ -24,8 +24,8 @@ \item{reloo_or_kfold}{logical. TRUE if ll_args is for reloo or kfold} -\item{...}{For models without group-specific terms (i.e., not stan_[g]lmer), -if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to +\item{...}{For models without group-specific terms (i.e., not stan_[g]lmer), +if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a workaround in case there are issues with newdata containing factors with only a single level. Or for stanmvreg objects, then ... can be used to pass diff --git a/man/ll_args.stansurv.Rd b/man/ll_args.stansurv.Rd index 9895548ea..76ae6bcdb 100644 --- a/man/ll_args.stansurv.Rd +++ b/man/ll_args.stansurv.Rd @@ -11,8 +11,8 @@ \item{newdata}{same as posterior predict} -\item{...}{For models without group-specific terms (i.e., not stan_[g]lmer), -if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to +\item{...}{For models without group-specific terms (i.e., not stan_[g]lmer), +if reloo_or_kfold is TRUE and 'newdata' is specified then ... is used to pass 'newx' and 'stanmat' from reloo or kfold (bypassing pp_data). This is a workaround in case there are issues with newdata containing factors with only a single level. Or for stanmvreg objects, then ... can be used to pass diff --git a/man/log_lik.stanreg.Rd b/man/log_lik.stanreg.Rd index 850892b5e..fb2cfacf9 100644 --- a/man/log_lik.stanreg.Rd +++ b/man/log_lik.stanreg.Rd @@ -28,20 +28,20 @@ specified and an \code{offset} was specified when fitting the model.} \item{m}{Integer specifying the number or name of the submodel} -\item{newdataLong, newdataEvent}{Optional data frames containing new data -(e.g. holdout data) to use when evaluating the log-likelihood for a -model estimated using \code{\link{stan_jm}}. If the fitted model +\item{newdataLong, newdataEvent}{Optional data frames containing new data +(e.g. holdout data) to use when evaluating the log-likelihood for a +model estimated using \code{\link{stan_jm}}. If the fitted model was a multivariate joint model (i.e. more than one longitudinal outcome), -then \code{newdataLong} is allowed to be a list of data frames. If supplying +then \code{newdataLong} is allowed to be a list of data frames. If supplying new data, then \code{newdataEvent} should also include variables corresponding to the event time and event indicator as these are required for evaluating the -log likelihood for the event submodel. For more details, see the description +log likelihood for the event submodel. For more details, see the description of \code{newdataLong} and \code{newdataEvent} for \code{\link{posterior_survfit}}.} } \value{ -For the \code{stanreg} and \code{stanmvreg} methods an \eqn{S} by - \eqn{N} matrix, where \eqn{S} is the size of the posterior sample and - \eqn{N} is the number of data points. For the \code{stanjm} method +For the \code{stanreg} and \code{stanmvreg} methods an \eqn{S} by + \eqn{N} matrix, where \eqn{S} is the size of the posterior sample and + \eqn{N} is the number of data points. For the \code{stanjm} method an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number of individuals. } \description{ @@ -49,7 +49,7 @@ For models fit using MCMC only, the \code{log_lik} method returns the \eqn{S} by \eqn{N} pointwise log-likelihood matrix, where \eqn{S} is the size of the posterior sample and \eqn{N} is the number of data points, or in the case of the \code{stanmvreg} method (when called on \code{\link{stan_jm}} -model objects) an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number +model objects) an \eqn{S} by \eqn{Npat} matrix where \eqn{Npat} is the number of individuals. } \examples{ diff --git a/man/predictive_error.stanreg.Rd b/man/predictive_error.stanreg.Rd index cbabe73f5..3a0354989 100644 --- a/man/predictive_error.stanreg.Rd +++ b/man/predictive_error.stanreg.Rd @@ -38,53 +38,59 @@ ) } \arguments{ -\item{object}{Either a fitted model object returned by one of the -\pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg -object}) or, for the matrix method, a matrix of draws from the -posterior predictive distribution returned by +\item{object}{Either a fitted model object returned by one of the +\pkg{rstanarm} modeling functions (a \link[=stanreg-objects]{stanreg +object}) or, for the matrix method, a matrix of draws from the +posterior predictive distribution returned by \code{\link{posterior_predict}}.} -\item{newdata, draws, seed, offset, re.form}{Optional arguments passed to +\item{newdata, draws, seed, offset, re.form}{Optional arguments passed to \code{\link{posterior_predict}}. For binomial models, please see the \strong{Note} section below if \code{newdata} will be specified.} \item{...}{Currently ignored.} -\item{y}{For the matrix method only, a vector of \eqn{y} values the -same length as the number of columns in the matrix used as \code{object}. -The method for stanreg objects takes \code{y} directly from the fitted +\item{y}{For the matrix method only, a vector of \eqn{y} values the +same length as the number of columns in the matrix used as \code{object}. +The method for stanreg objects takes \code{y} directly from the fitted model object.} +\item{newdataLong}{Data frame containing the longitudinal data} + +\item{newdataEvent}{Data frame containing the event data} + \item{m}{For \code{stanmvreg} models, the submodel for which to calculate the prediction error. Can be an integer, or for \code{\link{stan_mvmer}} models it can be \code{"y1"}, \code{"y2"}, etc, or for \code{\link{stan_jm}} models it can be \code{"Event"}, \code{"Long1"}, \code{"Long2"}, etc.} -\item{t, u}{Only relevant for \code{\link{stan_jm}} models and when \code{m = "Event"}. +\item{t, u}{Only relevant for \code{\link{stan_jm}} models and when \code{m = "Event"}. The argument \code{t} specifies the time up to which individuals must have survived as well as being the time up to which the longitudinal data in \code{newdata} -is available. The argument \code{u} specifies the time at which the +is available. The argument \code{u} specifies the time at which the prediction error should be calculated (i.e. the time horizon).} + +\item{lossfn}{A character string specifying the loss function to use.} } \value{ -A \code{draws} by \code{nrow(newdata)} matrix. If \code{newdata} is +A \code{draws} by \code{nrow(newdata)} matrix. If \code{newdata} is not specified then it will be \code{draws} by \code{nobs(object)}. } \description{ -This is a convenience function for computing \eqn{y - y^{rep}}{y - yrep} -(in-sample, for observed \eqn{y}) or \eqn{y - \tilde{y}}{y - ytilde} -(out-of-sample, for new or held-out \eqn{y}). The method for stanreg objects +This is a convenience function for computing \eqn{y - y^{rep}}{y - yrep} +(in-sample, for observed \eqn{y}) or \eqn{y - \tilde{y}}{y - ytilde} +(out-of-sample, for new or held-out \eqn{y}). The method for stanreg objects calls \code{\link{posterior_predict}} internally, whereas the method for matrices accepts the matrix returned by \code{posterior_predict} as input and can be used to avoid multiple calls to \code{posterior_predict}. } \note{ -The \strong{Note} section in \code{\link{posterior_predict}} about +The \strong{Note} section in \code{\link{posterior_predict}} about \code{newdata} for binomial models also applies for \code{predictive_error}, with one important difference. For - \code{posterior_predict} if the left-hand side of the model formula is - \code{cbind(successes, failures)} then the particular values of - \code{successes} and \code{failures} in \code{newdata} don't matter, only + \code{posterior_predict} if the left-hand side of the model formula is + \code{cbind(successes, failures)} then the particular values of + \code{successes} and \code{failures} in \code{newdata} don't matter, only that they add to the desired number of trials. \strong{This is not the case for} \code{predictive_error}. For \code{predictive_error} the particular value of \code{successes} matters because it is used as \eqn{y} when @@ -99,9 +105,9 @@ hist(err1) # Using newdata with a binomial model formula(example_model) nd <- data.frame( - size = c(10, 20), - incidence = c(5, 10), - period = factor(c(1,2)), + size = c(10, 20), + incidence = c(5, 10), + period = factor(c(1,2)), herd = c(1, 15) ) err2 <- predictive_error(example_model, newdata = nd, draws = 10, seed = 1234) diff --git a/man/psis.stanreg.Rd b/man/psis.stanreg.Rd index d15fb5bd2..47e59c2c4 100644 --- a/man/psis.stanreg.Rd +++ b/man/psis.stanreg.Rd @@ -8,6 +8,8 @@ } \arguments{ \item{log_ratios}{log_ratios for PSIS computation} + +\item{...}{Additional arguments passed to \code{\link[loo]{psis}}} } \description{ Perform importance sampling with stanreg objects diff --git a/man/rename_loos.Rd b/man/rename_loos.Rd index c6c1aae70..e1303765c 100644 --- a/man/rename_loos.Rd +++ b/man/rename_loos.Rd @@ -2,22 +2,24 @@ % Please edit documentation in R/stanreg_list.R \name{rename_loos} \alias{rename_loos} -\title{loo/waic/kfold objects created by rstanarm have a model_name attribute. -when a stanreg_list is created those attributes should be changed to match -the names of the models used for the stanreg_list in case user has specified +\title{loo/waic/kfold objects created by rstanarm have a model_name attribute. +when a stanreg_list is created those attributes should be changed to match +the names of the models used for the stanreg_list in case user has specified the model_names argument} \usage{ rename_loos(x, ...) } \arguments{ -\item{loo/waic/kfold}{object} +\item{x}{loo/waic/kfold object} + +\item{...}{Additional arguments} } \value{ Renamed object } \description{ -loo/waic/kfold objects created by rstanarm have a model_name attribute. -when a stanreg_list is created those attributes should be changed to match -the names of the models used for the stanreg_list in case user has specified +loo/waic/kfold objects created by rstanarm have a model_name attribute. +when a stanreg_list is created those attributes should be changed to match +the names of the models used for the stanreg_list in case user has specified the model_names argument } diff --git a/man/rename_loos.stanreg.Rd b/man/rename_loos.stanreg.Rd index 73c93b4e4..2116db888 100644 --- a/man/rename_loos.stanreg.Rd +++ b/man/rename_loos.stanreg.Rd @@ -7,9 +7,11 @@ \method{rename_loos}{stanreg}(x, new_model_name, ...) } \arguments{ +\item{x}{loo/waic/kfold object} + \item{new_model_name}{new name for object} -\item{loo/waic/kfold}{object} +\item{...}{Additional arguments} } \value{ Renamed object diff --git a/man/rename_loos.stanreg_list.Rd b/man/rename_loos.stanreg_list.Rd index 53d191d46..a9f39f6b1 100644 --- a/man/rename_loos.stanreg_list.Rd +++ b/man/rename_loos.stanreg_list.Rd @@ -2,18 +2,20 @@ % Please edit documentation in R/stanreg_list.R \name{rename_loos.stanreg_list} \alias{rename_loos.stanreg_list} -\title{Change model_name attributes of loo/waic/kfold objects to correspond to +\title{Change model_name attributes of loo/waic/kfold objects to correspond to model names used for stanreg_list} \usage{ \method{rename_loos}{stanreg_list}(x, ...) } \arguments{ -\item{loo/waic/kfold}{object} +\item{x}{loo/waic/kfold object} + +\item{...}{Additional arguments} } \value{ Renamed object } \description{ -Change model_name attributes of loo/waic/kfold objects to correspond to +Change model_name attributes of loo/waic/kfold objects to correspond to model names used for stanreg_list } diff --git a/man/split2.Rd b/man/split2.Rd index 1ea45cd75..734ec8155 100644 --- a/man/split2.Rd +++ b/man/split2.Rd @@ -13,7 +13,7 @@ split2(x, n_segments = 1, ...) \item{n_segments}{Integer specifying the number of segments.} -\item{bycol}{Logical, should a matrix be split along the column or row margin?} +\item{...}{Additional arguments passed to the method.} } \value{ A list with n_segments elements. diff --git a/man/split2.matrix.Rd b/man/split2.matrix.Rd index 1ea57609c..9950e8700 100644 --- a/man/split2.matrix.Rd +++ b/man/split2.matrix.Rd @@ -6,7 +6,7 @@ each segment as an element of a list. The matrix method allows splitting across the column (bycol = TRUE) or row margin (bycol = FALSE).} \usage{ -\method{split2}{matrix}(x, n_segments = 1, bycol = TRUE) +\method{split2}{matrix}(x, n_segments = 1, bycol = TRUE, ...) } \arguments{ \item{x}{A matrix} @@ -14,6 +14,8 @@ across the column (bycol = TRUE) or row margin (bycol = FALSE).} \item{n_segments}{Integer specifying the number of segments.} \item{bycol}{Logical, should a matrix be split along the column or row margin?} + +\item{...}{Additional arguments passed to the method.} } \description{ Split a vector or matrix into a specified number of segments and return diff --git a/man/split2.vector.Rd b/man/split2.vector.Rd index 771753ae5..504902c84 100644 --- a/man/split2.vector.Rd +++ b/man/split2.vector.Rd @@ -13,7 +13,7 @@ across the column (bycol = TRUE) or row margin (bycol = FALSE).} \item{n_segments}{Integer specifying the number of segments.} -\item{bycol}{Logical, should a matrix be split along the column or row margin?} +\item{...}{Additional arguments passed to the method.} } \description{ Split a vector or matrix into a specified number of segments and return diff --git a/man/stan_glm.Rd b/man/stan_glm.Rd index a53ae7c61..474bc8f89 100644 --- a/man/stan_glm.Rd +++ b/man/stan_glm.Rd @@ -268,11 +268,11 @@ to the appropriate length.} \item{group}{A list, possibly of length zero (the default), but otherwise having the structure of that produced by \code{\link[lme4]{mkReTrms}} to indicate the group-specific part of the model. In addition, this list must -have elements for the \code{regularization}, \code{concentration} +have elements for the \code{regularization}, \code{concentration} \code{shape}, and \code{scale} components of a \code{\link{decov}} prior for the covariance matrices among the group-specific coefficients.} -\item{importance_resampling}{Logical scalar indicating whether to use +\item{importance_resampling}{Logical scalar indicating whether to use importance resampling when approximating the posterior distribution with a multivariate normal around the posterior mode, which only applies when \code{algorithm} is \code{"optimizing"} but defaults to \code{TRUE} diff --git a/man/truncate.numeric.Rd b/man/truncate.numeric.Rd index cc8153778..5c964256f 100644 --- a/man/truncate.numeric.Rd +++ b/man/truncate.numeric.Rd @@ -4,7 +4,7 @@ \alias{truncate.numeric} \title{Method to truncate a numeric vector at defined limits} \usage{ -\method{truncate}{numeric}(con, lower = NULL, upper = NULL) +\method{truncate}{numeric}(con, lower = NULL, upper = NULL, ...) } \arguments{ \item{con}{A numeric vector.} @@ -12,6 +12,8 @@ \item{lower}{Scalar, the lower limit for the returned vector.} \item{upper}{Scalar, the upper limit for the returned vector.} + +\item{...}{Additional arguments passed to the method.} } \value{ A numeric vector. diff --git a/man/unpad_reTrms.Rd b/man/unpad_reTrms.Rd index 1fe3033f7..dcdbb83f4 100644 --- a/man/unpad_reTrms.Rd +++ b/man/unpad_reTrms.Rd @@ -10,8 +10,7 @@ unpad_reTrms(x, ...) \item{x}{A matrix or array (e.g. the posterior sample or matrix of summary stats)} -\item{columns}{Do the columns (TRUE) or rows (FALSE) correspond to the -variables?} +\item{...}{Additional arguments (not used)} } \description{ Drop the extra reTrms from a matrix x diff --git a/man/unpad_reTrms.array.Rd b/man/unpad_reTrms.array.Rd index 001e96f4c..edb6e0463 100644 --- a/man/unpad_reTrms.array.Rd +++ b/man/unpad_reTrms.array.Rd @@ -10,8 +10,10 @@ \item{x}{An array (e.g. the posterior sample or matrix of summary stats)} -\item{columns}{Do the columns (TRUE) or rows (FALSE) correspond to the +\item{columns}{Do the columns (TRUE) or rows (FALSE) correspond to the variables?} + +\item{...}{Additional arguments (not used)} } \description{ Drop the extra reTrms from an array x diff --git a/man/unpad_reTrms.default.Rd b/man/unpad_reTrms.default.Rd index ae2acaea1..71f6d5229 100644 --- a/man/unpad_reTrms.default.Rd +++ b/man/unpad_reTrms.default.Rd @@ -9,6 +9,8 @@ \arguments{ \item{x}{A matrix (e.g. the posterior sample or matrix of summary stats)} + +\item{...}{Additional arguments (not used)} } \description{ Drop the extra reTrms from a matrix x