diff --git a/DESCRIPTION b/DESCRIPTION index 6efe8639..ce462430 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,5 +56,5 @@ LazyData: false URL: https://mc-stan.org/posterior/, https://discourse.mc-stan.org/ BugReports: https://github.com/stan-dev/posterior/issues Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 824c5036..62cf616b 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -141,9 +141,15 @@ S3method(ess_tail,default) S3method(ess_tail,rvar) S3method(extract_variable,default) S3method(extract_variable,draws) +S3method(extract_variable,draws_df) +S3method(extract_variable,draws_list) S3method(extract_variable,draws_rvars) +S3method(extract_variable_array,default) +S3method(extract_variable_array,draws) S3method(extract_variable_matrix,default) S3method(extract_variable_matrix,draws) +S3method(extract_variable_matrix,draws_df) +S3method(extract_variable_matrix,draws_list) S3method(extract_variable_matrix,draws_rvars) S3method(format,rvar) S3method(format_glimpse,rvar) @@ -435,6 +441,7 @@ export(ess_sd) export(ess_tail) export(example_draws) export(extract_variable) +export(extract_variable_array) export(extract_variable_matrix) export(for_each_draw) export(is_draws) diff --git a/NEWS.md b/NEWS.md index a6909143..03f5a1a8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,15 @@ weights before adding to a draws object. * Matrix multiplication of `rvar`s can now be done with the base matrix multiplication operator (`%*%`) instead of `%**%` in R >= 4.3. +* `variables()`, `variables<-()`, `set_variables()`, and `nvariables()` now + support a `with_indices` argument, which determines whether variable names + are retrieved/set with (`"x[1]"`, `"x[2]"` ...) or without (`"x"`) indices + (#208). +* Add `extract_variable_array()` function to extract variables with indices + into arrays of iterations x chains x any remaining dimensions (#340). +* For types that support `factor` variables (`draws_df`, `draws_list`, and + `draws_rvars`), `extract_variable()` and `extract_variable_matrix()` can + now return `factor`s. # posterior 1.5.0 diff --git a/R/as_draws_rvars.R b/R/as_draws_rvars.R index 5ba88826..fd7a558a 100755 --- a/R/as_draws_rvars.R +++ b/R/as_draws_rvars.R @@ -54,36 +54,28 @@ as_draws_rvars.draws_matrix <- function(x, ...) { } #' Helper for as_draws_rvars.draws_matrix and as_draws_rvars.draws_df() -#' @param x_at A function taking a logical vector along variables(x) and returning a matrix of draws +#' @param x_at A function taking a numeric vector of indices along variables(x) and returning a matrix of draws #' @noRd .as_draws_rvars.draws_matrix <- function(x, ..., x_at = function(var_i) unclass(x[, var_i, drop = FALSE])) { .variables <- variables(x, reserved = TRUE) + .nchains <- nchains(x) if (ndraws(x) == 0) { return(empty_draws_rvars(.variables)) } # split x[y,z] names into base name and indices - # - # ----- base name -> vars_indices[[i]][[2]] - # ||||| lazy-matched (.*? not .*) so that indices match as much as they can - # ||||| - # ||||| ---- optional indices -> vars_indices[[i]][[3]] - # ||||| |||| - matches <- regexec("^(.*?)(?:\\[(.*)\\])?$", .variables) - vars_indices <- regmatches(.variables, matches) - vars <- vapply(vars_indices, `[[`, i = 2, character(1)) + vars <- split_variable_names(.variables) + vars$i <- seq_along(.variables) # pull out each var into its own rvar - var_names <- unique(vars) - rvars_list <- lapply(var_names, function(var) { - var_i <- vars == var - var_matrix <- x_at(var_i) + vars_by_base_name <- vctrs::vec_split(vars, vars$base_name) + rvars_list <- lapply(vars_by_base_name$val, function(var) { + var_matrix <- x_at(var$i) attr(var_matrix, "nchains") <- NULL - var_indices <- vars_indices[var_i] - if (ncol(var_matrix) == 1 && nchar(var_indices[[1]][[3]]) == 0) { + if (ncol(var_matrix) == 1 && nchar(var$indices[[1]]) == 0) { # single variable, no indices - out <- rvar(var_matrix) + out <- rvar(var_matrix, nchains = .nchains) dimnames(out) <- NULL } else { # variable with indices => we need to reshape the array @@ -92,8 +84,7 @@ as_draws_rvars.draws_matrix <- function(x, ...) { # first, pull out the list of indices into a data frame # where each column is an index variable - indices <- vapply(var_indices, `[[`, i = 3, character(1)) - indices <- as.data.frame(do.call(rbind, strsplit(indices, ",")), + indices <- as.data.frame(do.call(rbind, split_indices(var$indices)), stringsAsFactors = FALSE) unique_indices <- vector("list", length(indices)) .dimnames <- vector("list", length(indices)) @@ -131,35 +122,31 @@ as_draws_rvars.draws_matrix <- function(x, ...) { # (2) if some combination of indices is missing (say x[2,1] isn't # in the input) that cell in the array gets an NA - # Use expand.grid to get all cells in output array. We reverse indices - # here because it helps us do the sort after the merge, where - # we need to sort in reverse order of the indices (because - # the value of the last index should move slowest) - all_indices <- expand.grid(rev(unique_indices)) + # Use expand.grid to get all cells in output array in the appropriate + # order (value of the last index should move slowest), and save that order + # in $order so we can restore it after the merge + all_indices <- expand.grid(unique_indices, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) + all_indices$order <- seq_len(nrow(all_indices)) # merge with all.x = TRUE (left join) to fill in missing cells with NA indices <- merge(all_indices, cbind(indices, index = seq_len(nrow(indices))), all.x = TRUE, sort = FALSE) # need to do the sort manually after merge because when sort = TRUE, merge # sorts factors as if they were strings, and we need factors to be sorted as factors - indices <- indices[do.call(order, as.list(indices[, -ncol(indices), drop = FALSE])),] + # (and merge does not guarantee it keeps the original order in `x`) + indices <- indices[order(indices$order), ] # re-sort the array and fill in missing cells with NA var_matrix <- var_matrix[, indices$index, drop = FALSE] # convert to rvar and adjust dimensions - out <- rvar(var_matrix) + out <- rvar(var_matrix, nchains = .nchains) dim(out) <- unname(lengths(unique_indices)) dimnames(out) <- .dimnames } out }) - names(rvars_list) <- var_names - out <- .as_draws_rvars(rvars_list, ...) - .nchains <- nchains(x) - for (i in seq_along(out)) { - nchains_rvar(out[[i]]) <- .nchains - } - out + names(rvars_list) <- vars_by_base_name$key + .as_draws_rvars(rvars_list, ...) } #' @rdname draws_rvars @@ -175,10 +162,7 @@ as_draws_rvars.draws_df <- function(x, ...) { data_frame_to_matrix <- function(df) { if (any(vapply(df, is.factor, logical(1)))) { # as.matrix() does not convert factor columns correctly, must do this ourselves - while_preserving_dims( - function(df) do.call(function(...) vctrs::vec_c(..., .name_spec = rlang::zap()), df), - df - ) + copy_dims(df, vctrs::vec_c(!!!df, .name_spec = rlang::zap())) } else { as.matrix(df) } diff --git a/R/draws-index.R b/R/draws-index.R index 5f1109eb..beb83fa3 100644 --- a/R/draws-index.R +++ b/R/draws-index.R @@ -1,28 +1,19 @@ #' Index `draws` objects #' -#' Index variables, iterations, chains, and draws. +#' Index iterations, chains, and draws of [`draws`] objects. #' #' @name draws-index #' @template args-methods-x -#' @template args-methods-dots -#' @param value (character vector) For `variables(x) <- value`, the new variable -#' names to use. #' #' @details -#' The methods `variables()`, `iteration_ids()`, `chain_ids()`, and `draw_ids()` return -#' vectors of all variables, iterations, chains, and draws, respectively. In -#' contrast, the methods `nvariables()`, `niterations()`, `nchains()`, and +#' The methods `iteration_ids()`, `chain_ids()`, and `draw_ids()` return +#' vectors of all iterations, chains, and draws, respectively. In +#' contrast, the methods `niterations()`, `nchains()`, and #' `ndraws()` return the number of variables, iterations, chains, and draws, #' respectively. #' -#' `variables(x) <- value` allows you to modify the vector of variable names, -#' similar to how `names(x) <- value` works for vectors and lists. For renaming -#' specific variables, [set_variables()] works equivalently, but is more intuitive when using the pipe operator. [rename_variables()] may offer a more convenient approach. -#' #' @return #' -#' For `variables()`, a character vector. -#' #' For `iteration_ids()`, `chain_ids()`, and `draw_ids()`, an integer vector. #' #' For `niterations()`, `nchains()`, and `ndraws()`, a scalar integer. @@ -32,10 +23,6 @@ #' @examples #' x <- example_draws() #' -#' variables(x) -#' nvariables(x) -#' variables(x) <- letters[1:nvariables(x)] -#' #' iteration_ids(x) #' niterations(x) #' @@ -47,133 +34,6 @@ #' NULL -#' @rdname draws-index -#' @export -variables <- function(x, ...) { - UseMethod("variables") -} - -#' @export -variables.NULL <- function(x, ...) { - NULL -} - -#' @export -variables.draws_matrix <- function(x, reserved = FALSE, ...) { - remove_reserved_variable_names(colnames(x), reserved) -} - -#' @export -variables.draws_array <- function(x, reserved = FALSE, ...) { - remove_reserved_variable_names(dimnames(x)[[3L]], reserved) -} - -#' @export -variables.draws_df <- function(x, reserved = FALSE, ...) { - # reserved_df_variables are special data.frame columns - # which should never be included as variables - out <- names(x)[!names(x) %in% reserved_df_variables()] - remove_reserved_variable_names(out, reserved) -} - -#' @export -variables.draws_list <- function(x, reserved = FALSE, ...) { - if (!length(x)) { - return(character(0)) - } - remove_reserved_variable_names(names(x[[1]]), reserved) -} - -#' @export -variables.draws_rvars <- function(x, reserved = FALSE, ...) { - remove_reserved_variable_names(names(x), reserved) -} - -# remove reserved variable names -remove_reserved_variable_names <- function(variables, reserved) { - reserved <- as_one_logical(reserved) - if (!reserved && length(variables)) { - # can't use setdiff() here as in the edge case where someone - # manually creates duplicate columns it will give incorrect results - variables <- variables[!variables %in% reserved_variables()] - } - variables -} - -#' @rdname draws-index -#' @export -`variables<-` <- function(x, value) { - UseMethod("variables<-") -} - -#' @export -`variables<-.draws_matrix` <- function(x, value) { - check_new_variables(value) - colnames(x) <- value - x -} - -#' @export -`variables<-.draws_array` <- function(x, value) { - check_new_variables(value) - dimnames(x)[[3L]] <- value - x -} - -#' @export -`variables<-.draws_df` <- function(x, value) { - check_new_variables(value) - names(x)[!names(x) %in% reserved_df_variables()] <- value - x -} - -#' @export -`variables<-.draws_list` <- function(x, value) { - check_new_variables(value) - for (i in seq_along(x)) { - names(x[[i]]) <- value - } - x -} - -#' @export -`variables<-.draws_rvars` <- function(x, value) { - check_new_variables(value) - names(x) <- value - x -} - -#' Set variable names in `draws` objects -#' -#' Set variable names for all variables in a [`draws`] object. Useful -#' when using pipe operators. -#' -#' @param x (draws) A [`draws`] object. -#' @param variables (character) new variable names. -#' @template args-methods-dots -#' -#' @return Returns a [`draws`] object of the same format as `x`, with -#' variables named as specified. -#' -#' @seealso [`variables`] -#' @examples -#' x <- as_draws(matrix(rnorm(100), ncol = 2)) -#' variables(x) -#' -#' x <- set_variables(x, c("theta[1]", "theta[2]")) -#' variables(x) -#' -#' # this is equivalent to -#' variables(x) <- c("theta[1]", "theta[2]") -#' variables(x) -#' -#' @export -set_variables <- function(x, variables, ...) { - variables(x) <- variables - return(x) -} - - #' @rdname draws-index #' @export iteration_ids <- function(x) { @@ -318,21 +178,6 @@ draw_ids.rvar <- function(x) { as.integer(out) } -#' @rdname draws-index -#' @export -nvariables <- function(x, ...) { - UseMethod("nvariables") -} -#' @export -nvariables.NULL <- function(x, ...) { - 0 -} - -#' @export -nvariables.draws <- function(x, ...) { - length(variables(x, ...)) -} - #' @rdname draws-index #' @export niterations <- function(x) { @@ -471,106 +316,6 @@ ndraws.rvar <- function(x) { # internal ---------------------------------------------------------------- -# check validity of existing variable names: e.g., that -# all `variables` exist in `x` and that no `variables`are reserved words -# Additionally, this returns the cannonical name, so e.g. "theta" will get -# converted to c("theta[1]", "theta[2]", ...) if those variables exist. -# @param regex should 'variables' be treated as regular expressions? -# @param scalar_only should only scalar variables be matched? -check_existing_variables <- function(variables, x, regex = FALSE, - scalar_only = FALSE, exclude = FALSE) { - check_draws_object(x) - if (is.null(variables)) { - return(NULL) - } - - regex <- as_one_logical(regex) - scalar_only <- as_one_logical(scalar_only) - exclude <- as_one_logical(exclude) - variables <- unique(as.character(variables)) - all_variables <- variables(x, reserved = TRUE) - - if (regex) { - tmp <- named_list(variables) - for (i in seq_along(variables)) { - tmp[[i]] <- grep(variables[i], all_variables) - } - # regular expressions are not required to match anything - missing_variables <- NULL - variables <- as.character(all_variables[unique(unlist(tmp))]) - } else if (!scalar_only) { - # need to find variables that are matched by either a scalar or vector - # variable in x and what the matching variable is, while keeping original - # order of input `variables` - - # find scalar variables (1-to-1 match between all_variables and variables) - scalar_input_ixs <- match(all_variables, variables) - # find vector variable matches (match all_variables with the indexing stripped) - all_variables_base <- all_variables - # exclude already matched scalar variables - all_variables_base[!is.na(scalar_input_ixs)] <- NA_character_ - all_variables_base <- gsub("\\[.*\\]$", "", all_variables_base, perl = TRUE) - vector_input_ixs <- match(all_variables_base, variables) - # compose the vector of indices of matched input variables - input_ixs <- c(scalar_input_ixs[!is.na(scalar_input_ixs)], - vector_input_ixs[!is.na(vector_input_ixs)]) - # compose the vector of indices of matched all_variables - all_var_ixs <- seq_along(all_variables) - all_var_matched_ixs <- c(all_var_ixs[!is.na(scalar_input_ixs)], - all_var_ixs[!is.na(vector_input_ixs)]) - # select missed input variables - missing_vars_mask <- rep_len(TRUE, length(variables)) - missing_vars_mask[input_ixs] <- FALSE - missing_variables <- variables[missing_vars_mask] - # select matched all_variables maintaining the input variables order - variables <- all_variables[all_var_matched_ixs[order(input_ixs, all_var_matched_ixs)]] - } else { - missing_variables <- setdiff(variables, all_variables) - } - variables <- check_reserved_variables(variables) - if (length(missing_variables)) { - stop_no_call("The following variables are missing in the draws object: ", - comma(missing_variables)) - } - - # handle excluding variables for subset_draws - if (exclude) { - variables <- setdiff(all_variables, variables) - } - - invisible(variables) -} - -# check validity of new variables: e.g., that there are -# no duplicates in `variables` and that they do not use -# reserved words -check_new_variables <- function(variables) { - # use anyDuplicated() for the check since it is faster than any(duplicated(x)) and - # we shouldn't expect to take this branch often (since it is an error) - if (anyDuplicated(variables)) { - duplicates = unique(variables[duplicated(variables)]) - stop_no_call( - "Duplicate variable names are not allowed in draws objects.\n", - "The following variable names are duplicates:\n", - comma(duplicates) - ) - } - check_reserved_variables(variables) -} - -# check variables do not make use of reserved words -check_reserved_variables <- function(variables) { - assert_character(variables) - # for now only check reserved columns used in 'draws_df' objects - # other reserved variables such as '.log_weight' may be overwritten - # this has the advantage that power users can directly add such variables - used_reserved_variables <- intersect(reserved_df_variables(), variables) - if (length(used_reserved_variables)) { - stop_no_call("Variable names ", comma(used_reserved_variables), " are reserved.") - } - invisible(variables) -} - # check validity of iteration indices # @param unique should the returned IDs be unique? check_iteration_ids <- function(iteration_ids, x, unique = TRUE, exclude = FALSE) { @@ -599,7 +344,7 @@ check_iteration_ids <- function(iteration_ids, x, unique = TRUE, exclude = FALSE if (exclude) { iteration_ids <- setdiff(iteration_ids(x), iteration_ids) } - + invisible(iteration_ids) } @@ -630,7 +375,7 @@ check_chain_ids <- function(chain_ids, x, unique = TRUE, exclude = FALSE) { if (exclude) { chain_ids <- setdiff(chain_ids(x), chain_ids) } - + invisible(chain_ids) } @@ -661,6 +406,6 @@ check_draw_ids <- function(draw_ids, x, unique = TRUE, exclude = FALSE) { if (exclude) { draw_ids <- setdiff(draw_ids(x), draw_ids) } - + invisible(draw_ids) } diff --git a/R/extract_variable.R b/R/extract_variable.R index 3517fa1a..11c19143 100644 --- a/R/extract_variable.R +++ b/R/extract_variable.R @@ -3,10 +3,10 @@ #' Extract a vector of draws of a single variable. #' #' @template args-methods-x -#' @param variable (string) The name of the variable to extract. +#' @template args-extract-variable #' @template args-methods-dots -#' @return A numeric vector of length equal to the number of draws. -#' +#' @return A vector of length equal to the number of draws. +#' @family variable extraction methods #' @examples #' x <- example_draws() #' mu <- extract_variable(x, variable = "mu") @@ -33,17 +33,32 @@ extract_variable.draws <- function(x, variable, ...) { as.vector(out) } +#' @rdname extract_variable +#' @export +extract_variable.draws_df <- function(x, variable, ...) { + variable <- as_one_character(variable) + out <- .subset_draws(x, variable = variable, reserved = FALSE) + out[[variable]] +} + +#' @rdname extract_variable +#' @export +extract_variable.draws_list <- function(x, variable, ...) { + variable <- as_one_character(variable) + out <- .subset_draws(x, variable = variable, reserved = FALSE) + out <- as_draws_df(out) + out[[variable]] +} + #' @rdname extract_variable #' @export extract_variable.draws_rvars <- function(x, variable, ...) { variable <- as_one_character(variable) - variable_regex <- regexec("^(.*)\\[.*\\]$", variable) - if (!isTRUE(variable_regex[[1]] == -1)) { - # regex match => variable with indices in the name ("x[1]", etc), which - # can't be subset from draws_rvars directly, so we'll convert to a - # draws_array first. root_variable is "x" when variable is "x[...]" - root_variable <- regmatches(variable, variable_regex)[[1]][[2]] - out <- extract_variable(as_draws_array(x[root_variable]), variable, ...) + parts <- split_variable_names(variable) + if (isTRUE(nzchar(parts$indices))) { + # variable with indices in the name ("x[1]", etc), which can't be subset + # from draws_rvars directly, so we'll convert to a draws_df first. + out <- extract_variable(as_draws_df(x[parts$base_name]), variable = variable, ...) } else if (length(x[[variable]]) > 1) { stop_no_call( 'Cannot extract non-scalar value using extract_variable():\n', @@ -51,7 +66,8 @@ extract_variable.draws_rvars <- function(x, variable, ...) { ' Try including brackets ("[]") and indices in the variable name to extract a scalar value.' ) } else { - out <- NextMethod() + # scalar + out <- unname(drop(draws_of(x[[variable]]))) } out } diff --git a/R/extract_variable_array.R b/R/extract_variable_array.R new file mode 100644 index 00000000..b7baf6ec --- /dev/null +++ b/R/extract_variable_array.R @@ -0,0 +1,56 @@ +#' Extract array of a single (possibly indexed) variable +#' +#' Extract an array of draws of a single variable, including any dimensions of +#' variables with indices. +#' +#' @template args-methods-x +#' @param variable (string) The name of the variable to extract. To extract all +#' dimensions from variables with indices (e.g. `"x[1]"`), provide the base +#' variable name (e.g. `"x"`). +#' @template args-methods-dots +#' @returns +#' An `array` with dimension `niterations(x)` x `nchains(x)` x any remaining +#' dimensions determined by the indices of the variable `x`. +#' @family variable extraction methods +#' @examples +#' x <- example_draws(example = "multi_normal") +#' +#' mu <- extract_variable_array(x, variable = "mu") +#' str(mu) +#' +#' mu1 <- extract_variable_array(x, variable = "mu[1]") +#' str(mu1) +#' +#' Sigma <- extract_variable_array(x, variable = "Sigma") +#' str(Sigma) +#' +#' @export +extract_variable_array <- function(x, variable, ...) { + UseMethod("extract_variable_array") +} + +#' @rdname extract_variable_array +#' @export +extract_variable_array.default <- function(x, variable, ...) { + x <- as_draws(x) + extract_variable_array(x, variable, ...) +} + +#' @rdname extract_variable_array +#' @export +extract_variable_array.draws <- function(x, variable, ...) { + variable <- as_one_character(variable) + + if (isTRUE(nzchar(split_variable_names(variable)$indices))) { + # indices provided => scalar => equivalent to extract_variable_matrix + out <- extract_variable_matrix(x, variable, ...) + dim(out) <- c(dim(out), 1) + dimnames(out) <- list(NULL) + } else { + x <- subset_draws(x, variable = variable, reserved = FALSE) + x <- as_draws_rvars(x) + out <- draws_of(x[[variable]], with_chains = TRUE) + } + + out +} diff --git a/R/extract_variable_matrix.R b/R/extract_variable_matrix.R index a9e74fa4..393e569a 100644 --- a/R/extract_variable_matrix.R +++ b/R/extract_variable_matrix.R @@ -4,10 +4,10 @@ #' This is primarily used for convergence diagnostic functions such as [rhat()]. #' #' @template args-methods-x -#' @param variable (string) The name of the variable to extract. +#' @template args-extract-variable #' @template args-methods-dots #' @return A `matrix` with dimension iterations x chains. -#' +#' @family variable extraction methods #' @examples #' x <- example_draws() #' mu <- extract_variable_matrix(x, variable = "mu") @@ -37,17 +37,53 @@ extract_variable_matrix.draws <- function(x, variable, ...) { out } +#' @rdname extract_variable_matrix +#' @export +extract_variable_matrix.draws_df <- function(x, variable, ...) { + variable <- as_one_character(variable) + if (is.factor(x[[variable]])) { + x_variable_factor <- x[[variable]] + x[[variable]] <- unclass(x[[variable]]) + out <- copy_levels(x_variable_factor, NextMethod()) + } else { + out <- NextMethod() + } + out +} + +#' @rdname extract_variable_matrix +#' @export +extract_variable_matrix.draws_list <- function(x, variable, ...) { + variable <- as_one_character(variable) + if (is.factor(x[[1]][[variable]])) { + x_variable_factor <- x[[1]][[variable]] + for (i in seq_along(x)) { + x[[i]][[variable]] <- unclass(x[[i]][[variable]]) + } + out <- copy_levels(x_variable_factor, NextMethod()) + } else { + out <- NextMethod() + } + out +} + #' @rdname extract_variable_matrix #' @export extract_variable_matrix.draws_rvars <- function(x, variable, ...) { variable <- as_one_character(variable) - variable_regex <- regexec("^(.*)\\[.*\\]$", variable) - if (!isTRUE(variable_regex[[1]] == -1)) { - # regex match => variable with indices in the name ("x[1]", etc), which - # can't be subset from draws_rvars directly, so we'll convert to a - # draws_array first. root_variable is "x" when variable is "x[...]" - root_variable <- regmatches(variable, variable_regex)[[1]][[2]] - extract_variable_matrix(as_draws_array(x[root_variable]), variable, ...) + parts <- split_variable_names(variable) + + .draws <- draws_of(x[[parts$base_name]]) + if (is.factor(.draws)) { + # if x is a factor rvar, convert it to numeric before extracting and + # then we'll add levels back at the end + x[[parts$base_name]] <- as_rvar_integer(x[[parts$base_name]]) + } + + if (isTRUE(nzchar(parts$indices))) { + # variable with indices in the name ("x[1]", etc) can't be subset from + # draws_rvars directly, so we'll convert to a draws_array first + out <- extract_variable_matrix(as_draws_array(x[parts$base_name]), variable = variable, ...) } else if (length(x[[variable]]) > 1) { stop_no_call( 'Cannot extract non-scalar value using extract_variable_matrix():\n', @@ -55,6 +91,8 @@ extract_variable_matrix.draws_rvars <- function(x, variable, ...) { ' Try including brackets ("[]") and indices in the variable name to extract a scalar value.' ) } else { - NextMethod() + out <- NextMethod() } + + copy_levels(.draws, out) } diff --git a/R/misc.R b/R/misc.R index e5c681d8..c1bce76c 100644 --- a/R/misc.R +++ b/R/misc.R @@ -203,11 +203,8 @@ SW <- function(expr) { # escape all special characters in character strings escape_all <- function(x) { - specials <- c(".", "*", "+", "?", "^", "$", "(", ")", "[", "]", "|") - for (s in specials) { - x <- gsub(s, paste0("\\", s), x, fixed = TRUE) - } - x + specials <- "(\\.|\\*|\\+|\\?|\\^|\\$|\\(|\\)|\\[|\\]|\\|)" + gsub(specials, "\\\\\\1", x) } # numerically stable version of log(sum(exp(x))) diff --git a/R/posterior-package.R b/R/posterior-package.R index fe022273..0cb571ed 100644 --- a/R/posterior-package.R +++ b/R/posterior-package.R @@ -1,6 +1,5 @@ #' Tools for working with posterior (and prior) distributions #' -#' @docType package #' @name posterior-package #' @aliases posterior #' @@ -66,4 +65,4 @@ #' match between two objects involved in a binary operation. Whether this #' causes a warning can be controlled by this option. #' -NULL +"_PACKAGE" diff --git a/R/rename_variables.R b/R/rename_variables.R index 04566ac8..eb6c6e1f 100755 --- a/R/rename_variables.R +++ b/R/rename_variables.R @@ -13,7 +13,7 @@ #' Returns a [`draws`] object of the same format as `.x`, with variables renamed #' according to the expressions provided in `...`. #' -#' @seealso [`variables`], [`set_variables`], [`mutate_variables`] +#' @seealso [`variables`], [`variables<-`], [`mutate_variables`] #' #' @examples #' x <- as_draws_df(example_draws()) diff --git a/R/rvar-.R b/R/rvar-.R index 129f357c..33db28fd 100755 --- a/R/rvar-.R +++ b/R/rvar-.R @@ -370,7 +370,8 @@ match.default <- function(x, ...) base::match(x, ...) #' @rdname match #' @export match.rvar <- function(x, ...) { - draws_of(x) <- while_preserving_dims(base::match, draws_of(x), ...) + .draws <- draws_of(x) + draws_of(x) <- copy_dims(.draws, base::match(.draws, ...)) x } @@ -742,55 +743,6 @@ broadcast_draws <- function(x, .ndraws, keep_constants = FALSE) { } } -# flatten dimensions and names of an array -flatten_array = function(x, x_name = NULL) { - # determine new dimension names in the form x,y,z - # start with numeric names - dimname_lists <- lapply(dim(x), seq_len) - .dimnames <- dimnames(x) - if (!is.null(.dimnames)) { - # where character names are provided, use those instead of the numeric names - dimname_lists = lapply(seq_along(dimname_lists), function(i) .dimnames[[i]] %||% dimname_lists[[i]]) - } - # expand out the dimname lists into the appropriate combinations and assemble into new names - dimname_grid <- expand.grid(dimname_lists) - new_names <- apply(dimname_grid, 1, paste0, collapse = ",") - - .length <- length(x) - old_dim <- dim(x) - dim(x) <- .length - - # update variable names - if (is.null(x_name)) { - # no base name for x provided, just use index names - names(x) <- new_names - } else if (.length == 1 && (isTRUE(old_dim == 1) || length(old_dim) == 0)) { - # scalar, use the provided base name - names(x) <- x_name - } else if (.length >= 1) { - # rename the variables with their indices in brackets - names(x) <- paste0(x_name, "[", new_names %||% seq_along(x), "]") - } - - x -} - -#' Fast conversion of rvar draws to a flattened data frame. Equivalent to -#' as.data.frame(draws_of(flatten_array(x, name))) except it works with -#' factor rvars (as.data.frame does not work on array-like factors) -#' @noRd -flatten_rvar_draws_to_df <- function(x, x_name = NULL) { - if (length(x) == 0) { - out <- data.frame(row.names = draw_ids(x)) - } else { - draws <- draws_of(flatten_array(x, x_name)) - cols <- lapply(seq_len(ncol(draws)), function(i) unname(draws[, i])) - names(cols) <- colnames(draws) - out <- vctrs::new_data_frame(cols) - } - out -} - #' copy the dimension names (and name of the dimension) from dimension src_i #' in array src to dimension dst_i in array dst #' @noRd @@ -872,35 +824,35 @@ cleanup_rvar_draws <- function(x) { # if x is a character array, make it a factor if (is.character(x)) { - x <- while_preserving_dims(factor, x) + x <- copy_dims(x, factor(x)) } x } -#' Execute x <- f(x, ...) but preserve dimensions and dimension names of x. -#' Useful for functions that do not change the length of x but which drop -#' dimensions. +#' Copy dims and dimnames of `src` to `dst`. +#' Useful for functions that do not change the length of a variable but which +#' drop dimensions. +#' @param src a variable, possibly with dims and dimnames +#' @param dst a variable of same length as `src`. #' @noRd -while_preserving_dims <- function(f, x, ...) { - .dim <- dim(x) - .dimnames <- dimnames(x) - x <- f(x, ...) - dim(x) <- .dim - dimnames(x) <- .dimnames - x +copy_dims <- function(src, dst) { + dim(dst) <- dim(src) + dimnames(dst) <- dimnames(src) + dst } -#' Execute x <- f(x, ...) but preserve class and levels of x. -#' Useful for functions that do not change the length of x but which levels. +#' Copy class and levels of src to dst. Class is copied to ensure that the +#' status of the variable as a factor or ordered is maintained. +#' Useful for functions that do not change the shape of a variable but which +#' drop levels. +#' @param src a variable, possibly with levels +#' @param dst a variable of same shape as `src`. #' @noRd -while_preserving_levels <- function(f, x, ...) { - .class <- oldClass(x) - .levels <- levels(x) - x <- f(x, ...) - oldClass(x) <- .class - levels(x) <- .levels - x +copy_levels <- function(src, dst) { + oldClass(dst) <- oldClass(src) + levels(dst) <- levels(src) + dst } #' a version of apply() that works on factor-like arrays @@ -964,7 +916,7 @@ summarise_rvar_within_draws_via_matrix <- function(x, .name, .f, ..., .ordered_o if (.ordered_okay && is_rvar_ordered(x)) { .levels <- levels(x) .draws <- .f(draws_of(as_rvar_numeric(x)), ...) - .draws <- while_preserving_dims(function(.draws) ordered(.levels[round(.draws)], .levels), .draws) + .draws <- copy_dims(.draws, ordered(.levels[round(.draws)], .levels)) } else if (is_rvar_factor(x)) { stop_no_call("Cannot apply `", .name, "` function to rvar_factor objects.") } else { diff --git a/R/rvar-bind.R b/R/rvar-bind.R index 55c1a58a..68b38885 100755 --- a/R/rvar-bind.R +++ b/R/rvar-bind.R @@ -106,8 +106,8 @@ broadcast_and_bind_rvars.rvar <- function(x, y, axis = 1) { draws_y <- broadcast_array(draws_y, new_dim) # factors may not bind properly with abind, so convert them to characters first - if (is.factor(draws_x)) draws_x <- while_preserving_dims(as.character, draws_x) - if (is.factor(draws_y)) draws_y <- while_preserving_dims(as.character, draws_y) + if (is.factor(draws_x)) draws_x <- copy_dims(draws_x, as.character(draws_x)) + if (is.factor(draws_y)) draws_y <- copy_dims(draws_y, as.character(draws_y)) # bind along desired axis result <- new_rvar( diff --git a/R/rvar-cast.R b/R/rvar-cast.R index 11328947..bdcfb209 100755 --- a/R/rvar-cast.R +++ b/R/rvar-cast.R @@ -83,7 +83,8 @@ as_rvar <- function(x, dim = NULL, dimnames = NULL, nchains = NULL) { #' @export as_rvar_numeric <- function(x, dim = NULL, dimnames = NULL, nchains = NULL) { out <- as_rvar(x, dim = dim, dimnames = dimnames, nchains = nchains) - draws_of(out) <- while_preserving_dims(as.numeric, draws_of(out)) + .draws <- draws_of(out) + draws_of(out) <- copy_dims(.draws, as.numeric(.draws)) out } @@ -91,7 +92,8 @@ as_rvar_numeric <- function(x, dim = NULL, dimnames = NULL, nchains = NULL) { #' @export as_rvar_integer <- function(x, dim = NULL, dimnames = NULL, nchains = NULL) { out <- as_rvar(x, dim = dim, dimnames = dimnames, nchains = nchains) - draws_of(out) <- while_preserving_dims(as.integer, draws_of(out)) + .draws <- draws_of(out) + draws_of(out) <- copy_dims(.draws, as.integer(.draws)) out } @@ -99,7 +101,8 @@ as_rvar_integer <- function(x, dim = NULL, dimnames = NULL, nchains = NULL) { #' @export as_rvar_logical <- function(x, dim = NULL, dimnames = NULL, nchains = NULL) { out <- as_rvar(x, dim = dim, dimnames = dimnames, nchains = nchains) - draws_of(out) <- while_preserving_dims(as.logical, draws_of(out)) + .draws <- draws_of(out) + draws_of(out) <- copy_dims(.draws, as.logical(.draws)) out } @@ -378,11 +381,11 @@ vec_cast.rvar.double <- function(x, to, ...) new_constant_rvar(x) # double -> rvar_factor #' @export -vec_cast.rvar_factor.double <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.factor, x)) +vec_cast.rvar_factor.double <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.factor(x))) # double -> rvar_ordered #' @export -vec_cast.rvar_ordered.double <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.ordered, x)) +vec_cast.rvar_ordered.double <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.ordered(x))) # integer -> rvar #' @export @@ -394,11 +397,11 @@ vec_cast.rvar.integer <- function(x, to, ...) new_constant_rvar(x) # integer -> rvar_factor #' @export -vec_cast.rvar_factor.integer <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.factor, x)) +vec_cast.rvar_factor.integer <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.factor(x))) # integer -> rvar_ordered #' @export -vec_cast.rvar_ordered.integer <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.ordered, x)) +vec_cast.rvar_ordered.integer <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.ordered(x))) # logical -> rvar #' @export @@ -410,11 +413,11 @@ vec_cast.rvar.logical <- function(x, to, ...) new_constant_rvar(x) # logical -> rvar_factor #' @export -vec_cast.rvar_factor.logical <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.factor, x)) +vec_cast.rvar_factor.logical <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.factor(x))) # logical -> rvar_ordered #' @export -vec_cast.rvar_ordered.logical <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.ordered, x)) +vec_cast.rvar_ordered.logical <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.ordered(x))) # character casts --------------------------------------------------------- @@ -429,7 +432,7 @@ vec_cast.character.rvar_ordered <- function(x, to, ...) format(x) # character -> rvar #' @export -vec_cast.rvar.character <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.factor, x)) +vec_cast.rvar.character <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.factor(x))) # character -> rvar_factor #' @export @@ -437,7 +440,7 @@ vec_ptype2.character.rvar_factor <- function(x, y, ...) new_rvar(factor()) #' @export vec_ptype2.rvar_factor.character <- function(x, y, ...) new_rvar(factor()) #' @export -vec_cast.rvar_factor.character <- function(x, to, ...) new_constant_rvar(while_preserving_dims(as.factor, x)) +vec_cast.rvar_factor.character <- function(x, to, ...) new_constant_rvar(copy_dims(x, as.factor(x))) # character -> rvar_ordered #' @export @@ -450,7 +453,7 @@ vec_cast.rvar_ordered.character <- function(x, to, ...) { new_levels <- sort(setdiff(x, levels(to))) levels <- c(old_levels, new_levels) ordered <- length(new_levels) == 0 - new_constant_rvar(while_preserving_dims(factor, x, levels = levels, ordered = ordered)) + new_constant_rvar(copy_dims(x, factor(x, levels = levels, ordered = ordered))) } @@ -521,7 +524,8 @@ vec_cast.rvar.rvar_factor <- function(x, to, ...) x return(x) } - draws_of(x) <- while_preserving_dims(factor, draws_of(x), ordered = ordered, ...) + .draws <- draws_of(x) + draws_of(x) <- copy_dims(.draws, factor(.draws, ordered = ordered, ...)) x } diff --git a/R/rvar-dist.R b/R/rvar-dist.R index ee57c162..2c390a2c 100755 --- a/R/rvar-dist.R +++ b/R/rvar-dist.R @@ -114,5 +114,5 @@ quantile.rvar_factor <- function(x, probs, ...) { quantile.rvar_ordered <- function(x, probs, ...) { # `type` must be in 1:3 because x is discrete out <- quantile(as_rvar_numeric(x), probs, type = 1, ...) - while_preserving_dims(function(out) levels(x)[out], out) + copy_dims(out, levels(x)[out]) } diff --git a/R/rvar-factor.R b/R/rvar-factor.R index 1b985a07..5f57f643 100644 --- a/R/rvar-factor.R +++ b/R/rvar-factor.R @@ -67,7 +67,7 @@ rvar_factor <- function( # to ensure we pick up levels already attached to x (if there are any), we # need to convert x to a factor here if it has levels if (!is.factor(x) && !is.null(attr(x, "levels"))) { - x <- while_preserving_dims(factor, x, labels = attr(x, "levels")) + x <- copy_dims(x, factor(x, labels = attr(x, "levels"))) } out <- rvar( @@ -219,25 +219,24 @@ anyDuplicated.rvar_factor <- function(x, incomparables = FALSE, MARGIN = 1, ...) #' @noRd combine_rvar_factor_levels <- function(x, list_of_levels, ordered = FALSE) { .draws <- draws_of(x) + new_levels <- levels(.draws) %||% unique(as.character(.draws)) unique_levels <- unique(list_of_levels) # zero-length levels lists don't count (since can only come from factors with only missing values) unique_levels <- unique_levels[lengths(unique_levels) > 0] - if (length(unique_levels) <= 1) { + if (length(unique_levels) <= 1 && all(new_levels %in% unique_levels[1][[1]])) { # levels are the same in all variables, so preserve level order when binding .levels <- unique_levels[1][[1]] # We only keep the "ordered" class when the levels were all the same (this # mimics base-R, which demotes to unordered factor when combining ordered # factors with different levels) - .draws <- while_preserving_dims(factor, .draws, .levels, ordered = ordered) + .draws <- copy_dims(.draws, factor(.draws, .levels, ordered = ordered)) } else { # levels are not the same in all variables, so preserve any old levels by # merging them together, but do not apply the "ordered" class - .levels <- unique(do.call(c, list_of_levels)) - .draws <- while_preserving_dims(factor, .draws, .levels) - } - if (!is.factor(.draws)) { - .draws <- while_preserving_dims(factor, .draws) + all_levels <- unlist(list_of_levels, recursive = FALSE, use.names = FALSE) + .levels <- unique(c(all_levels, new_levels)) + .draws <- copy_dims(.draws, factor(.draws, .levels)) } draws_of(x) <- .draws diff --git a/R/rvar-math.R b/R/rvar-math.R index 11a8ca3c..e704731d 100755 --- a/R/rvar-math.R +++ b/R/rvar-math.R @@ -44,7 +44,7 @@ Ops.rvar <- function(e1, e2) { } else { dim_source <- draws_x } - draws <- while_preserving_dims(function(...) draws, dim_source) + draws <- copy_dims(dim_source, draws) } new_rvar(draws, .nchains = nchains(e1)) @@ -336,7 +336,7 @@ t.rvar = function(x) { dimnames(.draws) = c(.dimnames[1], list(NULL), .dimnames[2]) result <- new_rvar(.draws, .nchains = nchains(x)) } else if (ndim == 3) { - .draws <- while_preserving_levels(aperm, .draws, c(1, 3, 2)) + .draws <- copy_levels(.draws, aperm(.draws, c(1, 3, 2))) result <- new_rvar(.draws, .nchains = nchains(x)) } else { stop_no_call("argument is not a random vector or matrix") @@ -346,6 +346,7 @@ t.rvar = function(x) { #' @export aperm.rvar = function(a, perm, ...) { - draws_of(a) <- while_preserving_levels(aperm, draws_of(a), c(1, perm + 1), ...) + .draws <- draws_of(a) + draws_of(a) <- copy_levels(.draws, aperm(.draws, c(1, perm + 1), ...)) a } diff --git a/R/variable-indices.R b/R/variable-indices.R new file mode 100644 index 00000000..7965044f --- /dev/null +++ b/R/variable-indices.R @@ -0,0 +1,172 @@ +# helpers for manipulating array indices embedded in variable names + + +# flattening dimensions into variable names with indices ----------------------- + +#' Flatten the indices of an array-like object. +#' @param x an array or array-like object (e.g. an rvar) +#' @param base_name a base name which, if provided, is prepended to the +#' resulting index names. +#' @returns a character vector of the same length as `x`, giving index names +#' names of the form `"base_name[i,j,k,...]"` where `i`, `j`, `k`, ... are indices +#' (either numeric indices or strings if x has `dimnames`) that correspond to +#' the indices of that element in the input `x`. +#' @noRd +flatten_indices <- function(x, base_name = "") { + # determine new dimension names in the form x,y,z + # start with numeric names + .length <- length(x) + .dim <- dim(x) %||% .length + .dimnames <- dimnames(x) + dimname_lists <- lapply(.dim, seq_len) + if (!is.null(.dimnames)) { + # where character names are provided, use those instead of the numeric names + dimname_lists = lapply(seq_along(dimname_lists), function(i) .dimnames[[i]] %||% dimname_lists[[i]]) + } + # expand out the dimname lists into the appropriate combinations and assemble into new names + dimname_grid <- expand.grid(dimname_lists, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) + new_names <- do.call(paste, c(list(sep = ","), dimname_grid)) + + # prepend the base_name + if (isTRUE(.dim == 1)) { + # scalar, just use the base name + new_names <- base_name + } else if (.length >= 1) { + # non-zero length, put the indices in brackets + new_names <- paste0(base_name, "[", new_names, "]") + } + + new_names +} + +#' Flatten an array so that it is one-dimensional, setting the name of each +#' element of the output array to match its indices in the input array. +#' @param x an array or array-like object (e.g. an rvar) +#' @param base_name a base name which, if provided, is prepended to the +#' resulting dimension names. +#' @returns a modified array-like object with `dim(out) == length(x)` and, so +#' long as the input is not scalar, names of the form `"i,j,k,..."` where +#' `i`, `j`, `k`, ... are indices from the input `x`. If `base_name` is provided, +#' names of `out` will have the form `"base_name[i,j,k,...]"`. +#' @noRd +flatten_array <- function(x, base_name = "") { + new_names <- flatten_indices(x, base_name) + dim(x) <- length(x) + names(x) <- new_names + x +} + +#' Fast conversion of rvar draws to a flattened data frame. Equivalent to +#' as.data.frame(draws_of(flatten_array(x, name))) except it works with +#' factor rvars (as.data.frame does not work on array-like factors) +#' @noRd +flatten_rvar_draws_to_df <- function(x, base_name = "") { + if (length(x) == 0) { + out <- data.frame(row.names = draw_ids(x)) + } else { + draws <- draws_of(flatten_array(x, base_name)) + cols <- lapply(seq_len(ncol(draws)), function(i) unname(draws[, i])) + names(cols) <- colnames(draws) + out <- vctrs::new_data_frame(cols) + } + out +} + + +# splitting flattened variable names into base names and indices --------------- + +#' Given a vector of variable names possibly containing indices, split them into +#' a structure representing the base name and the indices +#' @param x a character vector of names, possibly containing indices; +#' e.g. `c("x", "y[1,2]", NA)` +#' @returns a data frame with `length(x)` rows and these columns: +#' - `base_name`: character vector of base names; +#' e.g. `c("x", "y", NA)` +#' - `indices`: character vector of the index strings if present; +#' e.g. `c("", "[1,2]", NA)` +#' If an input element of `x` is `NA`, the corresponding entries in the returned +#' data frame will also be `NA`. +#' @noRd +split_variable_names <- function(x) { + # split x[y,z] names into base name and indices + # we do this with a regex matching just the indices at the end instead of a + # single regex matching the entire string with multiple capture groups + # --- something like regexec("^(.*?)(\\[(.*)\\])?$", x) --- because + # regexec() + regmatches() is slow on a very large number of variables + # (because it uses lists), but regexpr() + substr() is vectorized + indices_matches <- regexpr("(\\[.*\\])?$", x) + + vctrs::new_data_frame(list( + base_name = substr(x, 1L, indices_matches - 1L), + indices = substr(x, indices_matches, .Machine$integer.max) + )) +} + +#' Given a vector of index strings (such as returned by +#' `split_variable_names(x)$indices`), split each index string into a character +#' vector of indices. +#' @param x a character vector of index strings; +#' e.g. `c("", "[1,2]", NA)` +#' @returns a list of character vectors of indices; +#' e.g. list(character(), c("1", "2"), NA) +#' @noRd +split_indices <- function(x) { + strsplit(substr(x, 2, nchar(x) - 1), ",", fixed = TRUE) +} + + +# manipulating flattened variable names ----------------------------------- + +#' Given a vector of names possibly containing indices, return the base names +#' @param x a character vector of names, possibly containing indices (e.g. `"x[1,2]"`) +#' @returns a character vector of all unique base names in `x` +#' @noRd +base_names <- function(x) { + unique(split_variable_names(x)$base_name) +} + +#' Given a vector of names possibly containing indices, modify the base names +#' @param x a character vector of names, possibly containing indices (e.g. `"x[1,2]"`) +#' @param value a character vector of replacement base names of `length(base_names(x))`. +#' @returns a character vector of `length(x)` with the base names in the original +#' strings replaced with `value`. +#' @noRd +`base_names<-` <- function(x, value) { + vars <- split_variable_names(x) + base_names <- unique(vars$base_name) + if (length(value) != length(base_names)) { + stop_no_call( + "Attempting to replace variables with ", length(base_names), " unique base names", + " with ", length(value), " new base names. Lengths must match.\n", + " - Existing base names: ", toString(base_names, width = 40), "\n", + " - Replacements: ", toString(value, width = 40) + ) + } + base_name_i <- match(vars$base_name, base_names) + paste0(value[base_name_i], vars$indices) +} + +#' Given a vector of variable names, either return the variable names or the +#' base names of the variables, depending on the value of `with_indices` +#' @noRd +variable_names <- function(x, with_indices = TRUE) { + with_indices <- as_one_logical(with_indices) + if (with_indices) { + x + } else { + base_names(x) + } +} + +#' Given a vector of variable names, either replace the variable names or the +#' base names of the variables, depending on the value of `with_indices` +#' @noRd +`variable_names<-` <- function(x, with_indices = TRUE, value) { + with_indices <- as_one_logical(with_indices) + if (with_indices) { + value + } else { + base_names(x) <- value + x + } +} diff --git a/R/variables.R b/R/variables.R new file mode 100644 index 00000000..49911d15 --- /dev/null +++ b/R/variables.R @@ -0,0 +1,323 @@ +#' Get variable names from `draws` objects +#' +#' Get variable names from [`draws`] objects. +#' +#' @name variables +#' @template args-methods-x +#' @template args-methods-dots +#' @template args-methods-reserved +#' @template args-methods-with_indices +#' +#' @details +#' `variables()` returns a vector of all variable names, and `nvariables()` +#' returns the number of variables. +#' +#' @return +#' +#' For `variables()`, a character vector. +#' +#' For `nvariables()`, a scalar integer. +#' +#' @seealso [`variables<-`], [`rename_variables`], [`draws-index`] +#' +#' @examples +#' x <- example_draws() +#' +#' variables(x) +#' nvariables(x) +#' variables(x) <- letters[1:nvariables(x)] +#' @export +variables <- function(x, ...) { + UseMethod("variables") +} + +#' @export +variables.NULL <- function(x, ...) { + NULL +} + +#' @rdname variables +#' @export +variables.draws_matrix <- function(x, reserved = FALSE, with_indices = TRUE, ...) { + out <- remove_reserved_variable_names(colnames(x), reserved) + variable_names(out, with_indices) +} + +#' @rdname variables +#' @export +variables.draws_array <- function(x, reserved = FALSE, with_indices = TRUE, ...) { + out <- remove_reserved_variable_names(dimnames(x)[[3L]], reserved) + variable_names(out, with_indices) +} + +#' @rdname variables +#' @export +variables.draws_df <- function(x, reserved = FALSE, with_indices = TRUE, ...) { + # reserved_df_variables are special data.frame columns + # which should never be included as variables + out <- names(x)[!names(x) %in% reserved_df_variables()] + out <- remove_reserved_variable_names(out, reserved) + variable_names(out, with_indices) +} + +#' @rdname variables +#' @export +variables.draws_list <- function(x, reserved = FALSE, with_indices = TRUE, ...) { + if (!length(x)) { + return(character(0)) + } + out <- remove_reserved_variable_names(names(x[[1]]), reserved) + variable_names(out, with_indices) +} + +#' @rdname variables +#' @export +variables.draws_rvars <- function(x, reserved = FALSE, with_indices = FALSE, ...) { + with_indices <- as_one_logical(with_indices) + if (with_indices) { + out <- unlist(.mapply(flatten_indices, list(x, names(x)), NULL), recursive = FALSE, use.names = FALSE) + } else { + out <- names(x) + } + remove_reserved_variable_names(out, reserved) +} + +# remove reserved variable names +remove_reserved_variable_names <- function(variables, reserved) { + reserved <- as_one_logical(reserved) + if (!reserved && length(variables)) { + # can't use setdiff() here as in the edge case where someone + # manually creates duplicate columns it will give incorrect results + variables <- variables[!variables %in% reserved_variables()] + } + variables +} + + +#' Set variable names in `draws` objects +#' +#' Set variable names for all variables in a [`draws`] object. The +#' `set_variables()` form is useful when using pipe operators. +#' +#' @template args-methods-x +#' @template args-methods-dots +#' @template args-methods-with_indices +#' @param value,variables (character vector) new variable names. +#' +#' @details +#' `variables(x) <- value` allows you to modify the vector of variable names, +#' similar to how `names(x) <- value` works for vectors and lists. For renaming +#' specific variables, `set_variables(x, value)` works equivalently, but is more intuitive +#' when using the pipe operator. +#' +#' For renaming specific variables, [rename_variables()] may offer a more +#' convenient approach. +#' +#' @return Returns a [`draws`] object of the same format as `x`, with +#' variables named as specified. +#' +#' @seealso [`variables`], [`rename_variables`], [`draws-index`] +#' +#' @examples +#' x <- example_draws() +#' +#' variables(x) +#' nvariables(x) +#' variables(x) <- letters[1:nvariables(x)] +#' +#' # or equivalently... +#' x <- set_variables(x, letters[1:nvariables(x)]) +#' +#' @export +`variables<-` <- function(x, ..., value) { + UseMethod("variables<-") +} + +#' @rdname variables-set +#' @export +`variables<-.draws_matrix` <- function(x, with_indices = TRUE, ..., value) { + check_new_variables(value) + variable_names(colnames(x), with_indices) <- value + x +} + +#' @rdname variables-set +#' @export +`variables<-.draws_array` <- function(x, with_indices = TRUE, ..., value) { + check_new_variables(value) + variable_names(dimnames(x)[[3L]], with_indices) <- value + x +} + +#' @rdname variables-set +#' @export +`variables<-.draws_df` <- function(x, with_indices = TRUE, ..., value) { + check_new_variables(value) + names_i <- !names(x) %in% reserved_df_variables() + variable_names(names(x)[names_i], with_indices) <- value + x +} + +#' @rdname variables-set +#' @export +`variables<-.draws_list` <- function(x, with_indices = TRUE, ..., value) { + check_new_variables(value) + for (i in seq_along(x)) { + variable_names(names(x[[i]]), with_indices) <- value + } + x +} + +#' @rdname variables-set +#' @export +`variables<-.draws_rvars` <- function(x, with_indices = FALSE, ..., value) { + with_indices <- as_one_logical(with_indices) + check_new_variables(value) + if (with_indices) { + # need to make sure that the provided names only change the base names of + # the variables and that the indexes otherwise match + vars <- split_variable_names(value) + base_names <- unique(vars$base_name) + base_name_i <- match(vars$base_name, base_names) + + x_indices <- ulapply(x, flatten_indices, recursive = FALSE, use.names = FALSE) + x_base_name_i <- rep(seq_along(x), lengths(x)) + if (!identical(x_base_name_i, base_name_i) || !identical(x_indices, vars$indices)) { + stop_no_call( + "variables() <- value is only allowed when the indices in `value` match ", + "the indices in the original names. To modify the names of the indices, either modify ", + "the dims of the underlying rvars or convert to another draws format first." + ) + } + + names(x) <- base_names + } else { + names(x) <- value + } + x +} + +#' @rdname variables-set +#' @export +set_variables <- function(x, variables, ...) { + variables(x, ...) <- variables + x +} + +#' @rdname variables +#' @export +nvariables <- function(x, ...) { + UseMethod("nvariables") +} +#' @export +nvariables.NULL <- function(x, ...) { + 0 +} + +#' @export +nvariables.draws <- function(x, ...) { + length(variables(x, ...)) +} + + +# internal ---------------------------------------------------------------- + +# check validity of existing variable names: e.g., that +# all `variables` exist in `x` and that no `variables`are reserved words +# Additionally, this returns the cannonical name, so e.g. "theta" will get +# converted to c("theta[1]", "theta[2]", ...) if those variables exist. +# @param regex should 'variables' be treated as regular expressions? +# @param scalar_only should only scalar variables be matched? +check_existing_variables <- function(variables, x, regex = FALSE, + scalar_only = FALSE, exclude = FALSE) { + check_draws_object(x) + if (is.null(variables)) { + return(NULL) + } + + regex <- as_one_logical(regex) + scalar_only <- as_one_logical(scalar_only) + exclude <- as_one_logical(exclude) + variables <- unique(as.character(variables)) + all_variables <- variables(x, reserved = TRUE) + + if (regex) { + tmp <- named_list(variables) + for (i in seq_along(variables)) { + tmp[[i]] <- grep(variables[i], all_variables) + } + # regular expressions are not required to match anything + missing_variables <- NULL + variables <- as.character(all_variables[unique(unlist(tmp))]) + } else if (!scalar_only) { + # need to find variables that are matched by either a scalar or vector + # variable in x and what the matching variable is, while keeping original + # order of input `variables` + + # find scalar variables (1-to-1 match between all_variables and variables) + scalar_input_ixs <- match(all_variables, variables) + # find vector variable matches (match all_variables with the indexing stripped) + all_variables_base <- all_variables + # exclude already matched scalar variables + all_variables_base[!is.na(scalar_input_ixs)] <- NA_character_ + all_variables_base <- split_variable_names(all_variables_base)$base_name + vector_input_ixs <- match(all_variables_base, variables) + # compose the vector of indices of matched input variables + input_ixs <- c(scalar_input_ixs[!is.na(scalar_input_ixs)], + vector_input_ixs[!is.na(vector_input_ixs)]) + # compose the vector of indices of matched all_variables + all_var_ixs <- seq_along(all_variables) + all_var_matched_ixs <- c(all_var_ixs[!is.na(scalar_input_ixs)], + all_var_ixs[!is.na(vector_input_ixs)]) + # select missed input variables + missing_vars_mask <- rep_len(TRUE, length(variables)) + missing_vars_mask[input_ixs] <- FALSE + missing_variables <- variables[missing_vars_mask] + # select matched all_variables maintaining the input variables order + variables <- all_variables[all_var_matched_ixs[order(input_ixs, all_var_matched_ixs)]] + } else { + missing_variables <- setdiff(variables, all_variables) + } + variables <- check_reserved_variables(variables) + if (length(missing_variables)) { + stop_no_call("The following variables are missing in the draws object: ", + comma(missing_variables)) + } + + # handle excluding variables for subset_draws + if (exclude) { + variables <- setdiff(all_variables, variables) + } + + invisible(variables) +} + +# check validity of new variables: e.g., that there are +# no duplicates in `variables` and that they do not use +# reserved words +check_new_variables <- function(variables) { + # use anyDuplicated() for the check since it is faster than any(duplicated(x)) and + # we shouldn't expect to take this branch often (since it is an error) + if (anyDuplicated(variables)) { + duplicates = unique(variables[duplicated(variables)]) + stop_no_call( + "Duplicate variable names are not allowed in draws objects.\n", + "The following variable names are duplicates:\n", + comma(duplicates) + ) + } + check_reserved_variables(variables) +} + +# check variables do not make use of reserved words +check_reserved_variables <- function(variables) { + assert_character(variables) + # for now only check reserved columns used in 'draws_df' objects + # other reserved variables such as '.log_weight' may be overwritten + # this has the advantage that power users can directly add such variables + used_reserved_variables <- intersect(reserved_df_variables(), variables) + if (length(used_reserved_variables)) { + stop_no_call("Variable names ", comma(used_reserved_variables), " are reserved.") + } + invisible(variables) +} diff --git a/man-roxygen/args-extract-variable.R b/man-roxygen/args-extract-variable.R new file mode 100644 index 00000000..61713215 --- /dev/null +++ b/man-roxygen/args-extract-variable.R @@ -0,0 +1,3 @@ +#' @param variable (string) The name of the variable to extract. Must include +#' indices for array variables (e.g. `"x[1]"`, `"y[1,2]"`). To extract all +#' dimensions from variables with indices, use [extract_variable_array()]. diff --git a/man-roxygen/args-methods-with_indices.R b/man-roxygen/args-methods-with_indices.R new file mode 100644 index 00000000..f7607279 --- /dev/null +++ b/man-roxygen/args-methods-with_indices.R @@ -0,0 +1,4 @@ +#' @param with_indices (logical) Should indices be included in variable +#' names? For example, if the object includes variables named `"x[1]"` and +#' `"x[2]"`, if `TRUE`, `c("x[1]", "x[2]")` is returned; if `FALSE`, only `"x"` +#' is returned. Defaults to `TRUE` for all formats except [draws_rvars()]. diff --git a/man/draws-index.Rd b/man/draws-index.Rd index a1e21ec8..3533f306 100644 --- a/man/draws-index.Rd +++ b/man/draws-index.Rd @@ -2,29 +2,20 @@ % Please edit documentation in R/draws-index.R \name{draws-index} \alias{draws-index} -\alias{variables} -\alias{variables<-} \alias{iteration_ids} \alias{chain_ids} \alias{draw_ids} -\alias{nvariables} \alias{niterations} \alias{nchains} \alias{ndraws} \title{Index \code{draws} objects} \usage{ -variables(x, ...) - -variables(x) <- value - iteration_ids(x) chain_ids(x) draw_ids(x) -nvariables(x, ...) - niterations(x) nchains(x) @@ -34,40 +25,25 @@ ndraws(x) \arguments{ \item{x}{(draws) A \code{draws} object or another \R object for which the method is defined.} - -\item{...}{Arguments passed to individual methods (if applicable).} - -\item{value}{(character vector) For \code{variables(x) <- value}, the new variable -names to use.} } \value{ -For \code{variables()}, a character vector. - For \code{iteration_ids()}, \code{chain_ids()}, and \code{draw_ids()}, an integer vector. For \code{niterations()}, \code{nchains()}, and \code{ndraws()}, a scalar integer. } \description{ -Index variables, iterations, chains, and draws. +Index iterations, chains, and draws of \code{\link{draws}} objects. } \details{ -The methods \code{variables()}, \code{iteration_ids()}, \code{chain_ids()}, and \code{draw_ids()} return -vectors of all variables, iterations, chains, and draws, respectively. In -contrast, the methods \code{nvariables()}, \code{niterations()}, \code{nchains()}, and +The methods \code{iteration_ids()}, \code{chain_ids()}, and \code{draw_ids()} return +vectors of all iterations, chains, and draws, respectively. In +contrast, the methods \code{niterations()}, \code{nchains()}, and \code{ndraws()} return the number of variables, iterations, chains, and draws, respectively. - -\code{variables(x) <- value} allows you to modify the vector of variable names, -similar to how \code{names(x) <- value} works for vectors and lists. For renaming -specific variables, \code{\link[=set_variables]{set_variables()}} works equivalently, but is more intuitive when using the pipe operator. \code{\link[=rename_variables]{rename_variables()}} may offer a more convenient approach. } \examples{ x <- example_draws() -variables(x) -nvariables(x) -variables(x) <- letters[1:nvariables(x)] - iteration_ids(x) niterations(x) diff --git a/man/extract_variable.Rd b/man/extract_variable.Rd index daa46186..2379dfa3 100644 --- a/man/extract_variable.Rd +++ b/man/extract_variable.Rd @@ -4,6 +4,8 @@ \alias{extract_variable} \alias{extract_variable.default} \alias{extract_variable.draws} +\alias{extract_variable.draws_df} +\alias{extract_variable.draws_list} \alias{extract_variable.draws_rvars} \title{Extract draws of a single variable} \usage{ @@ -13,18 +15,24 @@ extract_variable(x, variable, ...) \method{extract_variable}{draws}(x, variable, ...) +\method{extract_variable}{draws_df}(x, variable, ...) + +\method{extract_variable}{draws_list}(x, variable, ...) + \method{extract_variable}{draws_rvars}(x, variable, ...) } \arguments{ \item{x}{(draws) A \code{draws} object or another \R object for which the method is defined.} -\item{variable}{(string) The name of the variable to extract.} +\item{variable}{(string) The name of the variable to extract. Must include +indices for array variables (e.g. \code{"x[1]"}, \code{"y[1,2]"}). To extract all +dimensions from variables with indices, use \code{\link[=extract_variable_array]{extract_variable_array()}}.} \item{...}{Arguments passed to individual methods (if applicable).} } \value{ -A numeric vector of length equal to the number of draws. +A vector of length equal to the number of draws. } \description{ Extract a vector of draws of a single variable. @@ -35,3 +43,9 @@ mu <- extract_variable(x, variable = "mu") str(mu) } +\seealso{ +Other variable extraction methods: +\code{\link{extract_variable_array}()}, +\code{\link{extract_variable_matrix}()} +} +\concept{variable extraction methods} diff --git a/man/extract_variable_array.Rd b/man/extract_variable_array.Rd new file mode 100644 index 00000000..348a1a74 --- /dev/null +++ b/man/extract_variable_array.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract_variable_array.R +\name{extract_variable_array} +\alias{extract_variable_array} +\alias{extract_variable_array.default} +\alias{extract_variable_array.draws} +\title{Extract array of a single (possibly indexed) variable} +\usage{ +extract_variable_array(x, variable, ...) + +\method{extract_variable_array}{default}(x, variable, ...) + +\method{extract_variable_array}{draws}(x, variable, ...) +} +\arguments{ +\item{x}{(draws) A \code{draws} object or another \R object for which the method +is defined.} + +\item{variable}{(string) The name of the variable to extract. To extract all +dimensions from variables with indices (e.g. \code{"x[1]"}), provide the base +variable name (e.g. \code{"x"}).} + +\item{...}{Arguments passed to individual methods (if applicable).} +} +\value{ +An \code{array} with dimension \code{niterations(x)} x \code{nchains(x)} x any remaining +dimensions determined by the indices of the variable \code{x}. +} +\description{ +Extract an array of draws of a single variable, including any dimensions of +variables with indices. +} +\examples{ +x <- example_draws(example = "multi_normal") + +mu <- extract_variable_array(x, variable = "mu") +str(mu) + +mu1 <- extract_variable_array(x, variable = "mu[1]") +str(mu1) + +Sigma <- extract_variable_array(x, variable = "Sigma") +str(Sigma) + +} +\seealso{ +Other variable extraction methods: +\code{\link{extract_variable_matrix}()}, +\code{\link{extract_variable}()} +} +\concept{variable extraction methods} diff --git a/man/extract_variable_matrix.Rd b/man/extract_variable_matrix.Rd index da19b8b0..1b9c97c1 100644 --- a/man/extract_variable_matrix.Rd +++ b/man/extract_variable_matrix.Rd @@ -4,6 +4,8 @@ \alias{extract_variable_matrix} \alias{extract_variable_matrix.default} \alias{extract_variable_matrix.draws} +\alias{extract_variable_matrix.draws_df} +\alias{extract_variable_matrix.draws_list} \alias{extract_variable_matrix.draws_rvars} \title{Extract matrix of a single variable} \usage{ @@ -13,13 +15,19 @@ extract_variable_matrix(x, variable, ...) \method{extract_variable_matrix}{draws}(x, variable, ...) +\method{extract_variable_matrix}{draws_df}(x, variable, ...) + +\method{extract_variable_matrix}{draws_list}(x, variable, ...) + \method{extract_variable_matrix}{draws_rvars}(x, variable, ...) } \arguments{ \item{x}{(draws) A \code{draws} object or another \R object for which the method is defined.} -\item{variable}{(string) The name of the variable to extract.} +\item{variable}{(string) The name of the variable to extract. Must include +indices for array variables (e.g. \code{"x[1]"}, \code{"y[1,2]"}). To extract all +dimensions from variables with indices, use \code{\link[=extract_variable_array]{extract_variable_array()}}.} \item{...}{Arguments passed to individual methods (if applicable).} } @@ -37,3 +45,9 @@ dim(mu) rhat(mu) } +\seealso{ +Other variable extraction methods: +\code{\link{extract_variable_array}()}, +\code{\link{extract_variable}()} +} +\concept{variable extraction methods} diff --git a/man/posterior-package.Rd b/man/posterior-package.Rd index 0f933f84..5396bf16 100644 --- a/man/posterior-package.Rd +++ b/man/posterior-package.Rd @@ -73,3 +73,33 @@ causes a warning can be controlled by this option. } } +\seealso{ +Useful links: +\itemize{ + \item \url{https://mc-stan.org/posterior/} + \item \url{https://discourse.mc-stan.org/} + \item Report bugs at \url{https://github.com/stan-dev/posterior/issues} +} + +} +\author{ +\strong{Maintainer}: Paul-Christian Bürkner \email{paul.buerkner@gmail.com} + +Authors: +\itemize{ + \item Jonah Gabry \email{jsg2201@columbia.edu} + \item Matthew Kay \email{mjskay@northwestern.edu} + \item Aki Vehtari \email{Aki.Vehtari@aalto.fi} +} + +Other contributors: +\itemize{ + \item Måns Magnusson [contributor] + \item Rok Češnovar [contributor] + \item Ben Lambert [contributor] + \item Ozan Adıgüzel [contributor] + \item Jacob Socolar [contributor] + \item Noa Kallioinen [contributor] +} + +} diff --git a/man/rename_variables.Rd b/man/rename_variables.Rd index 85e9df9c..d0a16505 100755 --- a/man/rename_variables.Rd +++ b/man/rename_variables.Rd @@ -41,5 +41,5 @@ variables(x) } \seealso{ -\code{\link{variables}}, \code{\link{set_variables}}, \code{\link{mutate_variables}} +\code{\link{variables}}, \code{\link{variables<-}}, \code{\link{mutate_variables}} } diff --git a/man/set_variables.Rd b/man/set_variables.Rd deleted file mode 100644 index ba67f440..00000000 --- a/man/set_variables.Rd +++ /dev/null @@ -1,38 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/draws-index.R -\name{set_variables} -\alias{set_variables} -\title{Set variable names in \code{draws} objects} -\usage{ -set_variables(x, variables, ...) -} -\arguments{ -\item{x}{(draws) A \code{\link{draws}} object.} - -\item{variables}{(character) new variable names.} - -\item{...}{Arguments passed to individual methods (if applicable).} -} -\value{ -Returns a \code{\link{draws}} object of the same format as \code{x}, with -variables named as specified. -} -\description{ -Set variable names for all variables in a \code{\link{draws}} object. Useful -when using pipe operators. -} -\examples{ -x <- as_draws(matrix(rnorm(100), ncol = 2)) -variables(x) - -x <- set_variables(x, c("theta[1]", "theta[2]")) -variables(x) - -# this is equivalent to -variables(x) <- c("theta[1]", "theta[2]") -variables(x) - -} -\seealso{ -\code{\link{variables}} -} diff --git a/man/subset_draws.Rd b/man/subset_draws.Rd index c5698e08..7d040bcf 100644 --- a/man/subset_draws.Rd +++ b/man/subset_draws.Rd @@ -105,8 +105,9 @@ unique chains, iterations, and draws are selected regardless of how often they appear in the respective selecting arguments.} \item{exclude}{(logical) Should the selected subset be excluded? -If \code{FALSE} (the default) the selection will be returned. If -\code{TRUE} all but the selected subset will be returned.} +If \code{FALSE} (the default) only the selected subset will be +returned. If \code{TRUE} everything but the selected subset will be +returned.} } \value{ A \code{draws} object of the same class as \code{x}. diff --git a/man/variables-set.Rd b/man/variables-set.Rd new file mode 100644 index 00000000..036707fd --- /dev/null +++ b/man/variables-set.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/variables.R +\name{variables<-} +\alias{variables<-} +\alias{variables<-.draws_matrix} +\alias{variables<-.draws_array} +\alias{variables<-.draws_df} +\alias{variables<-.draws_list} +\alias{variables<-.draws_rvars} +\alias{set_variables} +\title{Set variable names in \code{draws} objects} +\usage{ +variables(x, ...) <- value + +\method{variables}{draws_matrix}(x, with_indices = TRUE, ...) <- value + +\method{variables}{draws_array}(x, with_indices = TRUE, ...) <- value + +\method{variables}{draws_df}(x, with_indices = TRUE, ...) <- value + +\method{variables}{draws_list}(x, with_indices = TRUE, ...) <- value + +\method{variables}{draws_rvars}(x, with_indices = FALSE, ...) <- value + +set_variables(x, variables, ...) +} +\arguments{ +\item{x}{(draws) A \code{draws} object or another \R object for which the method +is defined.} + +\item{...}{Arguments passed to individual methods (if applicable).} + +\item{value, variables}{(character vector) new variable names.} + +\item{with_indices}{(logical) Should indices be included in variable +names? For example, if the object includes variables named \code{"x[1]"} and +\code{"x[2]"}, if \code{TRUE}, \code{c("x[1]", "x[2]")} is returned; if \code{FALSE}, only \code{"x"} +is returned. Defaults to \code{TRUE} for all formats except \code{\link[=draws_rvars]{draws_rvars()}}.} +} +\value{ +Returns a \code{\link{draws}} object of the same format as \code{x}, with +variables named as specified. +} +\description{ +Set variable names for all variables in a \code{\link{draws}} object. The +\code{set_variables()} form is useful when using pipe operators. +} +\details{ +\code{variables(x) <- value} allows you to modify the vector of variable names, +similar to how \code{names(x) <- value} works for vectors and lists. For renaming +specific variables, \code{set_variables(x, value)} works equivalently, but is more intuitive +when using the pipe operator. + +For renaming specific variables, \code{\link[=rename_variables]{rename_variables()}} may offer a more +convenient approach. +} +\examples{ +x <- example_draws() + +variables(x) +nvariables(x) +variables(x) <- letters[1:nvariables(x)] + +# or equivalently... +x <- set_variables(x, letters[1:nvariables(x)]) + +} +\seealso{ +\code{\link{variables}}, \code{\link{rename_variables}}, \code{\link{draws-index}} +} diff --git a/man/variables.Rd b/man/variables.Rd new file mode 100644 index 00000000..fb408ddb --- /dev/null +++ b/man/variables.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/variables.R +\name{variables} +\alias{variables} +\alias{variables.draws_matrix} +\alias{variables.draws_array} +\alias{variables.draws_df} +\alias{variables.draws_list} +\alias{variables.draws_rvars} +\alias{nvariables} +\title{Get variable names from \code{draws} objects} +\usage{ +variables(x, ...) + +\method{variables}{draws_matrix}(x, reserved = FALSE, with_indices = TRUE, ...) + +\method{variables}{draws_array}(x, reserved = FALSE, with_indices = TRUE, ...) + +\method{variables}{draws_df}(x, reserved = FALSE, with_indices = TRUE, ...) + +\method{variables}{draws_list}(x, reserved = FALSE, with_indices = TRUE, ...) + +\method{variables}{draws_rvars}(x, reserved = FALSE, with_indices = FALSE, ...) + +nvariables(x, ...) +} +\arguments{ +\item{x}{(draws) A \code{draws} object or another \R object for which the method +is defined.} + +\item{...}{Arguments passed to individual methods (if applicable).} + +\item{reserved}{(logical) Should reserved variables be included in the +output? Defaults to \code{FALSE}. See \code{\link{reserved_variables}} for an overview of +currently reserved variable names.} + +\item{with_indices}{(logical) Should indices be included in variable +names? For example, if the object includes variables named \code{"x[1]"} and +\code{"x[2]"}, if \code{TRUE}, \code{c("x[1]", "x[2]")} is returned; if \code{FALSE}, only \code{"x"} +is returned. Defaults to \code{TRUE} for all formats except \code{\link[=draws_rvars]{draws_rvars()}}.} +} +\value{ +For \code{variables()}, a character vector. + +For \code{nvariables()}, a scalar integer. +} +\description{ +Get variable names from \code{\link{draws}} objects. +} +\details{ +\code{variables()} returns a vector of all variable names, and \code{nvariables()} +returns the number of variables. +} +\examples{ +x <- example_draws() + +variables(x) +nvariables(x) +variables(x) <- letters[1:nvariables(x)] +} +\seealso{ +\code{\link{variables<-}}, \code{\link{rename_variables}}, \code{\link{draws-index}} +} diff --git a/tests/testthat/test-as_draws.R b/tests/testthat/test-as_draws.R index c016351d..f33720ae 100644 --- a/tests/testthat/test-as_draws.R +++ b/tests/testthat/test-as_draws.R @@ -409,7 +409,8 @@ test_that("0-length rvars can be cast to draws formats", { expect_equal(ndraws(draws_rvars), 10) expect_equal(niterations(draws_rvars), 5) expect_equal(nchains(draws_rvars), 2) - expect_equal(nvariables(draws_rvars), 1) + expect_equal(nvariables(draws_rvars, with_indices = FALSE), 1) + expect_equal(nvariables(draws_rvars, with_indices = TRUE), 0) draws_matrix <- as_draws_matrix(rvar0) expect_equal(ndraws(draws_matrix), 10) diff --git a/tests/testthat/test-extract_variable.R b/tests/testthat/test-extract_variable.R index 28958edc..3e22a898 100644 --- a/tests/testthat/test-extract_variable.R +++ b/tests/testthat/test-extract_variable.R @@ -30,6 +30,14 @@ test_that("extract_variable works for draws_rvars on an indexed variable", { expect_error(extract_variable(draws_rvars, "theta"), "Cannot extract non-scalar value") }) +test_that("extract_variable works for factor types", { + draws_rvars <- draws_rvars(x = rvar_factor(letters, nchains = 2)) + + expect_equal(extract_variable(draws_rvars, "x"), factor(letters)) + expect_equal(extract_variable(as_draws_df(draws_rvars), "x"), factor(letters)) + expect_equal(extract_variable(as_draws_list(draws_rvars), "x"), factor(letters)) +}) + test_that("extract_variable default method works", { # it should convert matrix to draws object x <- matrix(1:20, nrow = 10, ncol = 2) diff --git a/tests/testthat/test-extract_variable_array.R b/tests/testthat/test-extract_variable_array.R new file mode 100644 index 00000000..83d69132 --- /dev/null +++ b/tests/testthat/test-extract_variable_array.R @@ -0,0 +1,34 @@ +test_that("extract_variable_array works the same for different formats", { + draws <- list( + array = as_draws_array(example_draws()), + df = as_draws_df(example_draws()), + list = as_draws_list(example_draws()), + matrix = as_draws_matrix(example_draws()), + rvars = as_draws_rvars(example_draws()) + ) + + mu = draws_of(draws$rvars$mu, with_chains = TRUE) + theta1 = draws_of(draws$rvars$theta[1], with_chains = TRUE) + theta = draws_of(draws$rvars$theta, with_chains = TRUE) + + for (type in names(draws)) { + expect_equal(extract_variable_array(draws[[!!type]], "mu"), mu) + expect_equal(extract_variable_array(draws[[!!type]], "theta[1]"), theta1) + expect_equal(extract_variable_array(draws[[!!type]], "theta"), theta) + } + + # rvars are converted to draws on the way in, thus the variable name to + # use to extract the array is the generic "x" + expect_equal(extract_variable_array(draws$rvars$mu, "x"), mu) +}) + +test_that("extract_variable_array works for factor types", { + draws_rvars <- draws_rvars(y = rvar(1:26, nchains = 2), x = rvar_factor(letters, nchains = 2)) + x_array <- array(1:26, dim = c(13, 2, 1), dimnames = list(NULL)) + levels(x_array) <- letters + class(x_array) <- "factor" + + expect_equal(extract_variable_array(draws_rvars, "x"), x_array) + expect_equal(extract_variable_array(as_draws_df(draws_rvars), "x"), x_array) + expect_equal(extract_variable_array(as_draws_list(draws_rvars), "x"), x_array) +}) diff --git a/tests/testthat/test-extract_variable_matrix.R b/tests/testthat/test-extract_variable_matrix.R index aa4ae70b..be7f6bbe 100644 --- a/tests/testthat/test-extract_variable_matrix.R +++ b/tests/testthat/test-extract_variable_matrix.R @@ -30,6 +30,17 @@ test_that("extract_variable_matrix works for draws_rvars on an indexed variable" expect_error(extract_variable_matrix(draws_rvars, "theta"), "Cannot extract non-scalar value") }) +test_that("extract_variable_matrix works for factor types", { + draws_rvars <- draws_rvars(y = rvar(1:26, nchains = 2), x = rvar_factor(letters, nchains = 2)) + x_array <- matrix(1:26, ncol = 2, dimnames = list(iteration = 1:13, chain = 1:2)) + levels(x_array) <- letters + class(x_array) <- "factor" + + expect_equal(extract_variable_matrix(draws_rvars, "x"), x_array) + expect_equal(extract_variable_matrix(as_draws_df(draws_rvars), "x"), x_array) + expect_equal(extract_variable_matrix(as_draws_list(draws_rvars), "x"), x_array) +}) + test_that("extract_variable_matrix default method works", { # it should convert matrix to draws object x <- matrix(1:20, nrow = 10, ncol = 2) diff --git a/tests/testthat/test-remove_variables.R b/tests/testthat/test-remove_variables.R index f4ba3075..63ca321e 100644 --- a/tests/testthat/test-remove_variables.R +++ b/tests/testthat/test-remove_variables.R @@ -30,5 +30,5 @@ test_that("remove_variables works correctly for draws_rvars objects", { x <- as_draws_rvars(example_draws()) expect_equal(posterior:::remove_variables(x, NULL), x) x <- posterior:::remove_variables(x, c("mu", "tau")) - expect_equal(variables(x), "theta") + expect_equal(variables(x, with_indices = FALSE), "theta") }) diff --git a/tests/testthat/test-rstar.R b/tests/testthat/test-rstar.R index 6625b7a2..65a47517 100644 --- a/tests/testthat/test-rstar.R +++ b/tests/testthat/test-rstar.R @@ -89,6 +89,7 @@ test_that("rstar accepts different hyperparameters", { test_that("rstar accepts different training proportion", { skip_if_not_installed("caret") x <- example_draws() + set.seed(12345) val1 <- rstar(x, method = "knn") val2 <- rstar(x, method = "knn", training_proportion = 0.1) expect_true(val1 > val2) diff --git a/tests/testthat/test-rvar-cast.R b/tests/testthat/test-rvar-cast.R index f411c56a..384cb05e 100755 --- a/tests/testthat/test-rvar-cast.R +++ b/tests/testthat/test-rvar-cast.R @@ -375,6 +375,29 @@ test_that("proxy restore works", { ) }) +test_that("proxy restore works when combining factors with non-factors", { + ref_1a = as_rvar_factor(c("1", "a"), levels = c("1", "a")) + ref_a1 = as_rvar_factor(c("a", "1"), levels = c("a", "1")) + + expect_equal( + vec_restore(c(vec_proxy(rvar(1)), vec_proxy(rvar_factor("a"))), to = rvar()), + ref_1a + ) + expect_equal( + # even if the second element is ordered, because new levels are added by the + # first element, we expect order to be dropped and the result to be unordered + vec_restore(c(vec_proxy(rvar(1)), vec_proxy(rvar_ordered("a"))), to = rvar()), + ref_1a + ) + expect_equal( + vec_restore(c(vec_proxy(rvar_factor("a")), vec_proxy(rvar(1))), to = rvar()), + ref_a1 + ) + expect_equal( + vec_restore(c(vec_proxy(rvar_ordered("a")), vec_proxy(rvar(1))), to = rvar()), + ref_a1 + ) +}) # vctrs comparison proxies --------------------------------------------------- diff --git a/tests/testthat/test-subset_draws.R b/tests/testthat/test-subset_draws.R index bae8bc2f..66dfcfc2 100644 --- a/tests/testthat/test-subset_draws.R +++ b/tests/testthat/test-subset_draws.R @@ -191,6 +191,10 @@ test_that("variables can be subsetted via non-scalar selection", { x <- as_draws_df(example_draws()) x_sub <- subset_draws(x, variable = "theta") expect_equal(variables(x_sub), c(paste0("theta[", 1:8, "]"))) + + # can do scalar and non-scalar in the same selection + x_sub <- subset_draws(x, variable = c("mu", "theta")) + expect_equal(variables(x_sub), c("mu", paste0("theta[", 1:8, "]"))) }) test_that("subset_draws speed is tolerable with many variables", { diff --git a/tests/testthat/test-variables.R b/tests/testthat/test-variables.R index e773d49f..6d96e7bf 100755 --- a/tests/testthat/test-variables.R +++ b/tests/testthat/test-variables.R @@ -10,6 +10,7 @@ test_that('duplicate variable names are not allowed', { test_that("variables() work with NULL", { expect_equal(variables(NULL), NULL) + expect_equal(nvariables(NULL), 0) }) test_that("variables() and variables<-() work on draws_matrix", { @@ -109,3 +110,54 @@ test_that("cannot set duplicate variable names", { expect_error(set_variables(x, c("a", "a")), "Duplicate variable names are not allowed") }) +test_that("with_indices works", { + x <- example_draws() + draws <- list( + array = as_draws_array(x), + df = as_draws_df(x), + list = as_draws_list(x), + matrix = as_draws_matrix(x), + rvars = as_draws_rvars(x) + ) + + mu_tau_theta = c( + "mu", "tau", "theta[1]", "theta[2]", "theta[3]", "theta[4]", + "theta[5]", "theta[6]", "theta[7]", "theta[8]" + ) + a_b_c = c("a", "b", "c[1]", "c[2]", "c[3]", "c[4]", "c[5]", "c[6]", "c[7]", "c[8]") + for (type in names(draws)) { + expect_equal(variables(draws[[!!type]], with_indices = TRUE), mu_tau_theta) + expect_equal(variables(draws[[!!type]], with_indices = FALSE), c("mu", "tau", "theta")) + expect_equal(nvariables(draws[[!!type]], with_indices = TRUE), 10) + expect_equal(nvariables(draws[[!!type]], with_indices = FALSE), 3) + + expect_equal( + variables(set_variables(draws[[!!type]], a_b_c, with_indices = TRUE), with_indices = FALSE), + c("a", "b", "c") + ) + expect_equal( + variables(set_variables(draws[[!!type]], c("a","b","c"), with_indices = FALSE), with_indices = TRUE), + a_b_c + ) + + expect_error( + set_variables(draws[[!!type]], c("a","c","c"), with_indices = FALSE), + "[Dd]uplicate" + ) + } + + for (type in head(names(draws), -1)) { + expect_error( + set_variables(draws[[!!type]], c("a","b"), with_indices = FALSE), + "base name.*[Ll]engths must match" + ) + } + + expect_error( + set_variables( + draws$rvars, + c("a", "b", "XX[1]", "c[2]", "c[3]", "c[4]", "c[5]", "c[6]", "c[7]", "c[8]"), + with_indices = TRUE + ) + ) +}) diff --git a/touchstone/script.R b/touchstone/script.R index 53d9b6e6..39aae730 100644 --- a/touchstone/script.R +++ b/touchstone/script.R @@ -38,7 +38,7 @@ for (dest_type in draws_types) { as_draws_dest(x) } }, - n = 10 + n = 50 ) } @@ -54,7 +54,7 @@ for (n_variables in c(10, 100)) { "summarise_draws_{n_variables}_variables" := { posterior::summarise_draws(x) }, - n = 10 + n = 50 ) }