diff --git a/DESCRIPTION b/DESCRIPTION index 05d966e8cd..01a35b04ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -179,3 +179,4 @@ Collate: 'utils_grid.R' 'utils_rtables.R' 'utils_split_funs.R' + 'xutils_custom_stats_formats_varying_dp.R' diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 392a253f18..051709e4a9 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -463,7 +463,53 @@ s_summary.logical <- function(x, #' #' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") #' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) +#' a_summary( +#' rnorm(10, 5, 1), +#' .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE, +#' .stats = "mean", +#' fmts_df_var = "variant1", +#' d = 2 +#' ) #' +#' x1 <- rnorm(10, 5, 1) +#' xref <- rnorm(20, -5, 1) +#' +#' a_summary( +#' x1, +#' .ref_group = xref, .var = "bla", compare = TRUE, +#' .stats = c("mean", "sd"), +#' .formats = c("mean" = format_xx("xx.xxx"), "sd" = format_xx("xx.x")) +#' ) +#' a_summary( +#' x1, +#' .ref_group = xref, .var = "bla", compare = TRUE, +#' .stats = "mean_sd", +#' fmt_specs = list( +#' fmts_df_var = "variant2", +#' d = 1, +#' formatting_function = "format_xx" +#' ) +#' ) +#' a_summary( +#' x1, +#' .ref_group = xref, .var = "bla", compare = TRUE, +#' .stats = c("mean", "mean_sd", "mean_pval") +#' ) +#' +#' our_fmt_specs_variant <- list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "default", +#' formatting_function = "format_xx_fixed_dp", +#' d = 0 +#' ) + +#' a_summary( +#' x1, .ref_group = xref, .var = "bla", compare = TRUE, +#' .stats = c("mean", "mean_sd", "mean_pval"), +#' .formats = c("mean_sd" = "xx.d (xx.dxxxx)"), +#' fmt_specs = our_fmt_specs_variant +#' ) + #' @export a_summary <- function(x, .N_col, # nolint @@ -479,6 +525,7 @@ a_summary <- function(x, .indent_mods = NULL, na.rm = TRUE, # nolint na_str = default_na_str(), + fmt_specs = default_fmt_specs, ...) { extra_args <- list(...) if (is.numeric(x)) { @@ -507,7 +554,28 @@ a_summary <- function(x, # Fill in with formatting defaults if needed met_grp <- paste0(c("analyze_vars", type), collapse = "_") .stats <- get_stats(met_grp, stats_in = .stats, add_pval = compare) - .formats <- get_formats_from_stats(.stats, .formats) + + if (is.null(fmt_specs$fmts_df)) { + .formats <- get_formats_from_stats(.stats, .formats) + } else { + d_actual <- derive_d_from_fmt_specs(fmt_specs, .df_row) + + # update the spec with the actual derived d + fmt_specs$d <- d_actual + + # core function that does the conversion of the xx.d based formats to the actual format + # note that is it most safe to apply formatting functions, as many of the final formats will not belong to + # list_valid_format_labels() + .formats_all <- get_formats_from_stats_custom( + .stats, + formats_in = .formats, + ### variant specific arguments + fmts_specs = fmt_specs + ) + .formats <- .formats_all$fmt + .formats_char <- .formats_all$fmt_char + } + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) lbls <- get_labels_from_stats(.stats, .labels) @@ -615,6 +683,59 @@ a_summary <- function(x, #' ) %>% #' build_table(dt) #' +#' # custom format +#' our_fmt_specs_variant <- list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' formatting_function = "format_xx_fixed_dp", +#' d = 0 +#' ) +#' +#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4)) +#' basic_table() %>% +#' analyze_vars( +#' vars = "VAR", +#' .stats = c("n", "mean", "mean_sd", "range"), +#' .formats = c("mean" = "xx.dxx"), +#' fmt_specs = our_fmt_specs_variant, +#' ) %>% +#' build_table(dt) +#' +#' # custom format +#' our_fmt_specs_variant2 <- list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' formatting_function = "format_xx_fixed_dp", +#' d = "decimal" +#' ) +#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4), decimal = 2) +#' basic_table() %>% +#' analyze_vars( +#' vars = "VAR", +#' .stats = c("n", "mean", "mean_sd", "range"), +#' .formats = c("mean" = "xx.dxxxxxx"), +#' fmt_specs = our_fmt_specs_variant2, +#' ) %>% +#' build_table(dt) +#' +#' # custom format +#' dt2 <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4, 0.002, 0.004, 0.006), decimal = c(rep(2, 4), rep(1, 4)), by = c(rep("by1", 4), rep("by2", 4))) +#' our_fmt_specs_variant2 <- list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' formatting_function = "format_xx_fixed_dp", +#' d = "decimal", +#' d_cap = 3 +#' ) +#' basic_table() %>% +#' split_rows_by("by") %>% +#' analyze_vars( +#' vars = "VAR", +#' .stats = c("n", "mean", "mean_sd", "range"), +#' fmt_specs = our_fmt_specs_variant2 +#' ) %>% +#' build_table(dt2) +#' #' @export #' @order 2 analyze_vars <- function(lyt, @@ -630,8 +751,15 @@ analyze_vars <- function(lyt, .stats = c("n", "mean_sd", "median", "range", "count_fraction"), .formats = NULL, .labels = NULL, - .indent_mods = NULL) { - extra_args <- list(.stats = .stats, na.rm = na.rm, na_str = na_str, ...) + .indent_mods = NULL, + # varying precision arguments + fmt_specs = default_fmt_specs) { + extra_args <- list( + .stats = .stats, na.rm = na.rm, na_str = na_str, + fmt_specs = fmt_specs, + ... + ) + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats if (!is.null(.labels)) extra_args[[".labels"]] <- .labels if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods diff --git a/R/formatting_functions.R b/R/formatting_functions.R index f18409810c..b90ed54db1 100644 --- a/R/formatting_functions.R +++ b/R/formatting_functions.R @@ -242,6 +242,90 @@ format_xx <- function(str) { return(rtable_format) } +#' Alternative format XX function :a formatting function with fixed decimal precision +#' +#' Translate a string where x and dots are interpreted as number place +#' holders, and others as formatting elements. +#' +#' @param str (`string`)\cr template. +#' +#' @return An `rtables` formatting function. +#' +#' @examples +#' test <- list(c(1.658, 0.5761), c(1e1, 785.6)) +#' +#' z <- format_xx_fixed_dp("xx (xx.x)") +#' sapply(test, z) +#' +#' z <- format_xx_fixed_dp("xx.x - xx.x") +#' sapply(test, z) +#' +#' z <- format_xx_fixed_dp("xx.x, incl. xx.x% NE") +#' sapply(test, z) +#' @seealso [format_xx] +#' @export +format_xx_fixed_dp <- function(str, na_str) { + # Find position in the string. + if (grepl(pattern = "xxx.", x = str, fixed = TRUE)) { + stop("Error: format_xx_fixed_dp do not use xxx. in input str, replace by xx. instead") + } + if (!grepl(pattern = "xx", x = str, fixed = TRUE)) { + stop("Error: format_xx_fixed_dp: input str should contain xx") + } + positions <- gregexpr( + pattern = "xx\\.?x*", text = str, + perl = TRUE + ) + x_positions <- regmatches(x = str, m = positions)[[1]] + ### str is splitted into pieces as xx. xx xx.xxx + ### xx is no rounding + ### xx. rounding to integer (is treated same as rounding to 0 decimal) + ### xx.x rounding to 1 decimal, etc + + no_round <- function(x) { + if (is.na(x)) { + return(na_str) + } else { + return(x) + } + } + roundfunc <- round + + # Roundings depends on the number of x behind [.]. + roundings <- lapply( + X = x_positions, + function(x) { + if (x == "xx") { + rounding <- no_round + } else { + y <- strsplit(split = "\\.", x = x)[[1]] + digits <- ifelse(length(y) > 1, nchar(y[2]), 0) + + rounding <- function(x) { + if (is.na(x)) { + return(na_str) + } else { + format(roundfunc(x, digits = digits), + nsmall = digits + ) + } + } + } + + return(rounding) + } + ) + + rtable_format <- function(x, output) { + values <- Map(y = x, fun = roundings, function(y, fun) fun(y)) + regmatches(x = str, m = positions)[[1]] <- values + return(str) + } + + return(rtable_format) +} + + #' Format numeric values by significant figures #' #' Format numeric values to print with a specified number of significant figures. diff --git a/R/xutils_custom_stats_formats_varying_dp.R b/R/xutils_custom_stats_formats_varying_dp.R new file mode 100644 index 0000000000..15e3e5b6f2 --- /dev/null +++ b/R/xutils_custom_stats_formats_varying_dp.R @@ -0,0 +1,396 @@ +#' Get statistical methods and formats for custom variants +#' +#' @description `r lifecycle::badge("experimental")` +#' +#' +#' @param stats (`character`)\cr statistical methods to get defaults for. +#' +#' @details +#' Currently only available for usage within `a_summary`, `analyze_vars`, and hope to extend to at least `a_ancova` and `summarize_ancova`. +#' Question to Roche: is there an intention to refactor a_ancova and summarize_ancova to not use make_afun, but same approach of in_rows as in a_summary? +#' +#' @note +#' These defaults are experimental because we use the names of functions to retrieve the default +#' statistics. This should be generalized in groups of methods according to more reasonable groupings. +#' +#' @name custom_stats_formats +#' @include utils_default_stats_formats_labels.R +#' @include formatting_functions.R +#' @order 1 +NULL + +#' @describeIn custom_stats_formats Get formats corresponding to a list of statistics. +#' +#' @param formats_in (named `vector`)\cr inserted formats to replace defaults. It can be a +#' character vector from [formatters::list_valid_format_labels()] or a custom format function. +#' +#' @param fmts_specs (named `list`) with specifications ( +#' TO EXPAND) +#' +#' @return +#' * `get_formats_from_stats_custom()` returns a 3 component list. The primary one (named .fmt) is a named vector of formats (if present in either +#' `fmts_df` or `formats_in`, otherwise `NULL`). Values can be taken from +#' [formatters::list_valid_format_labels()] or a custom function (e.g. [formatting_functions]). +#' .fmt_char is the character representation. +#' .fmt_fun is the name of the formatting function applied to the character representation +#' +#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and +#' return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`. +#' +#' @family formatting functions +#' @seealso [default_stats_formats_labels] +#' +#' @export +#' +#' @examples +#' # Defaults formats +#' get_formats_from_stats_custom( +#' stats = c("mean", "sd"), +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant1", +#' d = 1) +#' +#' get_formats_from_stats_custom(stats = c("mean", "sd"))$fmt_char +#' +#' get_formats_from_stats(stats = c("mean", "sd")) +#' +#' get_formats_from_stats_custom( +#' stats = c("mean", "sd"), +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' d = 2, +#' formatting_function = format_xx)$fmt_char +#' +#' get_formats_from_stats_custom( +#' stats = c("mean", "sd"), +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' d = 2, +#' formatting_function = format_xx)$fmt_char +#' +#' +#' # Addition of customs including xx.d style notation +#' get_formats_from_stats_custom( +#' stats = c("mean", "sd"), +#' formats_in = c("mean" = "xx.dxxxx"), +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' d = 0)$fmt +#' +#' get_formats_from_stats_custom( +#' stats = c("mean_pval", "mean", "sd" ), +#' formats_in = c("mean" = "xx.dxxxx"), +#' fmts_specs = list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' d = 0, +#' formatting_function = format_xx_fixed_dp))$fmt +#' +#' get_formats_from_stats_custom( +#' stats = c("mean_pval", "mean", "sd" ), +#' formats_in = c("mean" = "xx.xxxx"), +#' fmts_specs = list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "default" +#' ))$fmt + +#' get_formats_from_stats_custom( +#' stats = c("mean_pval", "mean", "sd" ), +#' formats_in = c("mean" = "xx.xxxx"), +#' fmts_specs = list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "default" +#' ))$fmt_fun +#' +#' get_formats_from_stats_custom( +#' stats = c("mean_pval", "mean", "sd" ), +#' formats_in = c("mean" = "xx.xxxx"), +#' fmts_specs = list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "default" +#' ))$fmt_fun +#' +#' # example using analyze_vars on continuous data +#' dt2 <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4, 0.002, 0.004, 0.006), decimal = c(rep(2, 4), rep(1, 4)), by = c(rep("by1", 4), rep("by2", 4))) +#' our_fmt_specs_variant2 <- list( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2", +#' formatting_function = "format_xx_fixed_dp", +#' d = "decimal", +#' d_cap = 0) +#' basic_table() %>% +#' split_rows_by("by") %>% +#' analyze_vars( +#' vars = "VAR", +#' .stats = c("n", "mean", "mean_sd", "range"), +#' fmt_specs = our_fmt_specs_variant2 +#' ) %>% +#' build_table(dt2) +#' +get_formats_from_stats_custom <- function(stats, + formats_in = NULL, + fmts_specs = list( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "default", + d = 0) + ) + { + + # It may be a list if there is a function in the formats + if (checkmate::test_list(formats_in, null.ok = TRUE)) { + checkmate::assert_list(formats_in, null.ok = TRUE) + # Or it may be a vector of characters + } else { + checkmate::assert_character(formats_in, null.ok = TRUE) + } + + checkmate::assert_list(fmts_specs) + + checkmate::assert_subset(names(fmts_specs), c("fmts_df", "fmts_df_var", "d", "d_cap", "formatting_function", "formatting_function_exclude")) + + list2env(fmts_specs, environment()) + reqvars <- c(fmts_df_var, "stat") + reqvars <- list(reqvars) + names(reqvars) <- reqvars + assert_df_with_variables(fmts_df, variables = reqvars) + + ls <- ls(environment()) + if (!("formatting_function_exclude" %in% ls)) {formatting_function_exclude <- NULL} + if (!("formatting_function" %in% ls)) {formatting_function <- NULL} + if ("fmts_df_var" %in% ls && fmts_df_var != "default" &&!("fmts_df" %in% ls)) { + stop("fmts_df should be added to fmts_specs")} + if (!("fmts_df" %in% ls)) { fmts_df <- NULL} + if (!("d" %in% ls)) { d <- NULL} + + if (is.null(stats)){ + stats <- fmts_df$stat + } else { + checkmate::assert_character(stats, min.len = 1) + } + + if (!is.null(formatting_function)){ + checkmate::assert_character(formatting_function) + a_formatting_function <- get(formatting_function) + checkmate::assert_function(a_formatting_function) + } + + checkmate::assert_character(formatting_function_exclude, null.ok = TRUE) + + + # Extract defaults from the fmts_df dataframe + which_fmt <- match(stats, fmts_df$stat) + + + + def_formats <- get_formats_from_df(fmts_df = fmts_df, + fmts_df_var = fmts_df_var, + stats = stats) + + # Select only needed formats from stats + ret <- vector("list", length = length(stats)) # Returning a list is simpler + ret[!is.na(which_fmt)] <- def_formats[!is.na(which_fmt)] + + out <- setNames(ret, stats) + + # Modify some with custom formats + if (!is.null(formats_in)) { + # Stats is the main + common_names <- intersect(names(out), names(formats_in)) + out[common_names] <- formats_in[common_names] + } + # convert the xx.d notation into full xx.x notation + out <- xxd_to_xx(out, d) + + out_char <- out + + if (!is.null(formatting_function)){ + + # split format vector into ones that should not utilize formatting function and ones that do need + non_fmt_function_stats <- c("n", "mean_pval", "pval", formatting_function_exclude) + out1 <- out[names(out) %in% non_fmt_function_stats] + out2 <- out[!(names(out) %in% non_fmt_function_stats)] + out2 <- sapply(out2, function(x) { + a_formatting_function(x) + }) + out <- c(out1, out2) + + # reset the formats in the proper order + out <- out[stats] + + } + + return(list(fmt = out, + fmt_char = out_char, + fmt_fun = formatting_function)) +} + + + +#' @describeIn custom_stats_formats Approach for custom formats using d-style formats where d is a parameter for the decimal precision. +#' The construction of the dataframe can be customized and used as input dataframe in further processing (eg afun = `a_summary`) +#' +#' @return +#' * The result of `tern_formats_custom_df` is a dataframe of available default formats, with each element +#' named for their corresponding statistic. +#' xx.d will be translated into xx. for d=0, xx.x for d=1, and xx.xx for d=2 +#' xx.dx will be translated into xx.x for d=0, xx.xx for d=1, and xx.xxx for d=2 +#' +#' @include utils_default_stats_formats_labels.R +#' +#' @export +#' +#' @examples +#' +#' our_custom_fmts <- tern_formats_custom_df() +#' +#' +tern_formats_custom_df <- function(){ + + start_stats <- get_stats(method_groups = c("analyze_vars_numeric", "summarize_ancova")) + + start_fmts <- get_formats_from_stats(start_stats) + + start_fmts <- sapply(start_fmts, function(x){ + if (is.null(x)){ + NA_character_ + } else x + }) + + start_fmts[c("lsmean", "lsmean_diff")] <- "xx.xx" + start_fmts[c("lsmean_diff_ci")] <- "(xx.xx, xx.xx)" + + fmts_df <- data.frame( stat = start_stats, default = start_fmts) + + fmts_df$variant1 <- fmts_df$default + + selstats <- !(start_stats %in% c("n", "pval", "mean_pval")) + fmts_df$variant1[selstats] <- gsub("xx.x", "xx.dx", fmts_df$variant1[selstats], fixed = TRUE) + + + + + fmts_df$variant2 <- fmts_df$variant1 + + ## modify precision for some + fmts_df$variant2[start_stats %in% c("sd", "se")] <- "xx.dxx" + + fmts_df$variant2[start_stats %in% c("mean_sd", "mean_se")] <- "xx.dx (xx.dxx)" + + return(fmts_df) +} + + +xxd_to_xx <- function(str, d = 0){ + + checkmate::assert_integerish(d, null.ok = TRUE) + if (checkmate::test_list(str, null.ok = FALSE)) { + checkmate::assert_list(str, null.ok = FALSE) + # Or it may be a vector of characters + } else { + checkmate::assert_character(str, null.ok = FALSE) + } + + nmstr <- names(str) + + if (any(grepl("xx.d", str, fixed = TRUE))){ + checkmate::assert_integerish(d) + str <- + gsub("xx.d", paste0("xx.", paste0(rep("x", times = d), collapse = "")), str, fixed = TRUE) + + } + str <- setNames(str, nmstr) + return(str) +} + + +#' @describeIn custom_stats_formats Approach for custom formats using d-style formats where d is a parameter for the decimal precision. +#' @param fmts_df dataframe Any dataframe can be used. Expect to have the following columns: `stat`, `default`, and any other column with custom formatting definitions. +#' @param fmts_df_var Column name from the `fmts_df` that should be used for the format definitions. +#' @param stats Character vector with the names of the stats to define the custom format. +#' @examples +#' # Defaults formats +#' +#' get_formats_from_df( +#' stats = c("mean", "sd"), +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant1") +#' #' get_formats_from_df( +#' stats = c("mean", "sd"), +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2") +#' +#' get_formats_from_df( +#' fmts_df = tern_formats_custom_df(), +#' fmts_df_var = "variant2") +#' +get_formats_from_df <- function(fmts_df = tern_formats_custom_df(), + fmts_df_var = "default", + stats = NULL){ + + checkmate::assert_data_frame(fmts_df) + + check <- all(c("stat", fmts_df_var) %in% names(fmts_df)) + if (!check) { + stop(paste0("The dataframe fmts_df should have at least variables stat and ", fmts_df_var)) + } + + + if (is.null(stats)) { + fmts <- fmts_df[, fmts_df_var] + names(fmts) <- fmts_df[, "stat"] + } else { + fmts <- fmts_df[fmts_df$stat %in% stats, fmts_df_var] + names(fmts) <- fmts_df[fmts_df$stat %in% stats, "stat"] + } + + if (!is.null(stats)){ + fmts <- fmts[stats] + } + + return(fmts) + +} + + +default_fmt_specs <- list(fmts_df = NULL) + +default_fmt_specs_variant <- list( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "default", + formatting_function = "format_xx_fixed_dp", + d = 0) + + +derive_d_from_fmt_specs <- function(fmt_specs, .df_row){ + ## core code on varying formats + ## d is the requested precision of the incoming data (d = 0 will lead to showing mean at 1 decimal precision) + ## this is the reason to set d <- 0 when not available + if (exists("fmt_specs") && "d" %in% names(fmt_specs)) { + d <- fmt_specs$d + } else { + d <- 0 + } + ## set a cap to it, to avoid when precision of data is too high, too many digits will be included in the format + if (exists("fmt_specs") && "d_cap" %in% names(fmt_specs)) { + d_cap <- fmt_specs$d_cap + } else { + # if not available, set default to 3 (ie still showing 4 digits) + d_cap <- 3 + } + + # when d has been defined as a character string + # we assume this is a variable name available on the input dataset + if (is.character(d)){ + # d is expected to be a variable name on the input dataframe .df_row + check <- d %in% names(.df_row) + if (!check){ + stop(paste0("variable ", d, " is not a variable name on the input dataset")) + } + d <- max(.df_row[[d]]) + } + + # now apply the cap + d <- min(d, d_cap) + + return(d) +} diff --git a/man/analyze_variables.Rd b/man/analyze_variables.Rd index ba567bd1ae..8244d9e951 100644 --- a/man/analyze_variables.Rd +++ b/man/analyze_variables.Rd @@ -25,7 +25,8 @@ analyze_vars( .stats = c("n", "mean_sd", "median", "range", "count_fraction"), .formats = NULL, .labels = NULL, - .indent_mods = NULL + .indent_mods = NULL, + fmt_specs = default_fmt_specs ) s_summary(x, na.rm = TRUE, denom, .N_row, .N_col, .var, ...) @@ -85,6 +86,7 @@ a_summary( .indent_mods = NULL, na.rm = TRUE, na_str = default_na_str(), + fmt_specs = default_fmt_specs, ... ) } @@ -356,6 +358,59 @@ basic_table() \%>\% ) \%>\% build_table(dt) +# custom format +our_fmt_specs_variant <- list( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "variant2", + formatting_function = "format_xx_fixed_dp", + d = 0 +) + +dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4)) +basic_table() \%>\% + analyze_vars( + vars = "VAR", + .stats = c("n", "mean", "mean_sd", "range"), + .formats = c("mean" = "xx.dxx"), + fmt_specs = our_fmt_specs_variant, + ) \%>\% + build_table(dt) + +# custom format +our_fmt_specs_variant2 <- list( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "variant2", + formatting_function = "format_xx_fixed_dp", + d = "decimal" +) +dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4), decimal = 2) +basic_table() \%>\% + analyze_vars( + vars = "VAR", + .stats = c("n", "mean", "mean_sd", "range"), + .formats = c("mean" = "xx.dxxxxxx"), + fmt_specs = our_fmt_specs_variant2, + ) \%>\% + build_table(dt) + +# custom format +dt2 <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4, 0.002, 0.004, 0.006), decimal = c(rep(2, 4), rep(1, 4)), by = c(rep("by1", 4), rep("by2", 4))) +our_fmt_specs_variant2 <- list( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "variant2", + formatting_function = "format_xx_fixed_dp", + d = "decimal", + d_cap = 3 +) +basic_table() \%>\% + split_rows_by("by") \%>\% + analyze_vars( + vars = "VAR", + .stats = c("n", "mean", "mean_sd", "range"), + fmt_specs = our_fmt_specs_variant2 + ) \%>\% + build_table(dt2) + # `s_summary.numeric` ## Basic usage: empty numeric returns NA-filled items. @@ -450,5 +505,49 @@ a_summary( a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla") a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE) +a_summary( + rnorm(10, 5, 1), + .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE, + .stats = "mean", + fmts_df_var = "variant1", + d = 2 +) + +x1 <- rnorm(10, 5, 1) +xref <- rnorm(20, -5, 1) + +a_summary( + x1, + .ref_group = xref, .var = "bla", compare = TRUE, + .stats = c("mean", "sd"), + .formats = c("mean" = format_xx("xx.xxx"), "sd" = format_xx("xx.x")) +) +a_summary( + x1, + .ref_group = xref, .var = "bla", compare = TRUE, + .stats = "mean_sd", + fmt_specs = list( + fmts_df_var = "variant2", + d = 1, + formatting_function = "format_xx" + ) +) +a_summary( + x1, + .ref_group = xref, .var = "bla", compare = TRUE, + .stats = c("mean", "mean_sd", "mean_pval") +) +our_fmt_specs_variant <- list( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "default", + formatting_function = "format_xx_fixed_dp", + d = 0 +) +a_summary( + x1, .ref_group = xref, .var = "bla", compare = TRUE, + .stats = c("mean", "mean_sd", "mean_pval"), + .formats = c("mean_sd" = "xx.d (xx.dxxxx)"), + fmt_specs = our_fmt_specs_variant +) } diff --git a/man/custom_stats_formats.Rd b/man/custom_stats_formats.Rd new file mode 100644 index 0000000000..150cfe4b99 --- /dev/null +++ b/man/custom_stats_formats.Rd @@ -0,0 +1,199 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xutils_custom_stats_formats_varying_dp.R +\name{custom_stats_formats} +\alias{custom_stats_formats} +\alias{get_formats_from_stats_custom} +\alias{tern_formats_custom_df} +\alias{get_formats_from_df} +\title{Get statistical methods and formats for custom variants} +\usage{ +get_formats_from_stats_custom( + stats, + formats_in = NULL, + fmts_specs = list(fmts_df = tern_formats_custom_df(), fmts_df_var = "default", d = 0) +) + +tern_formats_custom_df() + +get_formats_from_df( + fmts_df = tern_formats_custom_df(), + fmts_df_var = "default", + stats = NULL +) +} +\arguments{ +\item{stats}{Character vector with the names of the stats to define the custom format.} + +\item{formats_in}{(named \code{vector})\cr inserted formats to replace defaults. It can be a +character vector from \code{\link[formatters:list_formats]{formatters::list_valid_format_labels()}} or a custom format function.} + +\item{fmts_specs}{(named \code{list}) with specifications ( +TO EXPAND)} + +\item{fmts_df}{dataframe Any dataframe can be used. Expect to have the following columns: \code{stat}, \code{default}, and any other column with custom formatting definitions.} + +\item{fmts_df_var}{Column name from the \code{fmts_df} that should be used for the format definitions.} +} +\value{ +\itemize{ +\item \code{get_formats_from_stats_custom()} returns a 3 component list. The primary one (named .fmt) is a named vector of formats (if present in either +\code{fmts_df} or \code{formats_in}, otherwise \code{NULL}). Values can be taken from +\code{\link[formatters:list_formats]{formatters::list_valid_format_labels()}} or a custom function (e.g. \link{formatting_functions}). +.fmt_char is the character representation. +.fmt_fun is the name of the formatting function applied to the character representation +} + +\itemize{ +\item The result of \code{tern_formats_custom_df} is a dataframe of available default formats, with each element +named for their corresponding statistic. +xx.d will be translated into xx. for d=0, xx.x for d=1, and xx.xx for d=2 +xx.dx will be translated into xx.x for d=0, xx.xx for d=1, and xx.xxx for d=2 +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +} +\details{ +Currently only available for usage within \code{a_summary}, \code{analyze_vars}, and hope to extend to at least \code{a_ancova} and \code{summarize_ancova}. +Question to Roche: is there an intention to refactor a_ancova and summarize_ancova to not use make_afun, but same approach of in_rows as in a_summary? +} +\section{Functions}{ +\itemize{ +\item \code{get_formats_from_stats_custom()}: Get formats corresponding to a list of statistics. + +\item \code{tern_formats_custom_df()}: Approach for custom formats using d-style formats where d is a parameter for the decimal precision. +The construction of the dataframe can be customized and used as input dataframe in further processing (eg afun = \code{a_summary}) + +\item \code{get_formats_from_df()}: Approach for custom formats using d-style formats where d is a parameter for the decimal precision. + +}} +\note{ +These defaults are experimental because we use the names of functions to retrieve the default +statistics. This should be generalized in groups of methods according to more reasonable groupings. + +Formats in \code{tern} and \code{rtables} can be functions that take in the table cell value and +return a string. This is well documented in \code{vignette("custom_appearance", package = "rtables")}. +} +\examples{ +# Defaults formats +get_formats_from_stats_custom( +stats = c("mean", "sd"), +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant1", +d = 1) + +get_formats_from_stats_custom(stats = c("mean", "sd"))$fmt_char + +get_formats_from_stats(stats = c("mean", "sd")) + +get_formats_from_stats_custom( +stats = c("mean", "sd"), +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2", +d = 2, +formatting_function = format_xx)$fmt_char + +get_formats_from_stats_custom( +stats = c("mean", "sd"), +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2", +d = 2, +formatting_function = format_xx)$fmt_char + + +# Addition of customs including xx.d style notation +get_formats_from_stats_custom( +stats = c("mean", "sd"), +formats_in = c("mean" = "xx.dxxxx"), +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2", +d = 0)$fmt + +get_formats_from_stats_custom( +stats = c("mean_pval", "mean", "sd" ), +formats_in = c("mean" = "xx.dxxxx"), +fmts_specs = list( +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2", +d = 0, +formatting_function = format_xx_fixed_dp))$fmt + +get_formats_from_stats_custom( +stats = c("mean_pval", "mean", "sd" ), +formats_in = c("mean" = "xx.xxxx"), +fmts_specs = list( +fmts_df = tern_formats_custom_df(), +fmts_df_var = "default" +))$fmt +get_formats_from_stats_custom( +stats = c("mean_pval", "mean", "sd" ), +formats_in = c("mean" = "xx.xxxx"), +fmts_specs = list( +fmts_df = tern_formats_custom_df(), +fmts_df_var = "default" +))$fmt_fun + +get_formats_from_stats_custom( +stats = c("mean_pval", "mean", "sd" ), +formats_in = c("mean" = "xx.xxxx"), +fmts_specs = list( +fmts_df = tern_formats_custom_df(), +fmts_df_var = "default" +))$fmt_fun + +# example using analyze_vars on continuous data +dt2 <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4, 0.002, 0.004, 0.006), decimal = c(rep(2, 4), rep(1, 4)), by = c(rep("by1", 4), rep("by2", 4))) +our_fmt_specs_variant2 <- list( +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2", +formatting_function = "format_xx_fixed_dp", +d = "decimal", +d_cap = 0) +basic_table() \%>\% + split_rows_by("by") \%>\% + analyze_vars( + vars = "VAR", + .stats = c("n", "mean", "mean_sd", "range"), + fmt_specs = our_fmt_specs_variant2 + ) \%>\% + build_table(dt2) + + +our_custom_fmts <- tern_formats_custom_df() + + +# Defaults formats + +get_formats_from_df( +stats = c("mean", "sd"), +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant1") +#' get_formats_from_df( +stats = c("mean", "sd"), +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2") + +get_formats_from_df( +fmts_df = tern_formats_custom_df(), +fmts_df_var = "variant2") + +} +\seealso{ +\link{default_stats_formats_labels} + +Other formatting functions: +\code{\link{extreme_format}}, +\code{\link{format_auto}()}, +\code{\link{format_count_fraction}()}, +\code{\link{format_count_fraction_fixed_dp}()}, +\code{\link{format_count_fraction_lt10}()}, +\code{\link{format_extreme_values}()}, +\code{\link{format_extreme_values_ci}()}, +\code{\link{format_fraction}()}, +\code{\link{format_fraction_fixed_dp}()}, +\code{\link{format_fraction_threshold}()}, +\code{\link{format_sigfig}()}, +\code{\link{format_xx}()}, +\code{\link{formatting_functions}} +} +\concept{formatting functions} diff --git a/man/extreme_format.Rd b/man/extreme_format.Rd index 12f7dde55d..d3e33374c0 100644 --- a/man/extreme_format.Rd +++ b/man/extreme_format.Rd @@ -55,6 +55,7 @@ h_format_threshold(1000) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, \code{\link{format_count_fraction_fixed_dp}()}, diff --git a/man/format_auto.Rd b/man/format_auto.Rd index 89f7e6b3e7..b90c49308f 100644 --- a/man/format_auto.Rd +++ b/man/format_auto.Rd @@ -44,6 +44,7 @@ format_auto(no_sc_x, "range")(x = no_sc_x) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_count_fraction}()}, \code{\link{format_count_fraction_fixed_dp}()}, diff --git a/man/format_count_fraction.Rd b/man/format_count_fraction.Rd index f326e10b41..3c553f9f20 100644 --- a/man/format_count_fraction.Rd +++ b/man/format_count_fraction.Rd @@ -26,6 +26,7 @@ format_count_fraction(x = c(0, 0)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction_fixed_dp}()}, diff --git a/man/format_count_fraction_fixed_dp.Rd b/man/format_count_fraction_fixed_dp.Rd index dae4457113..d32b20b749 100644 --- a/man/format_count_fraction_fixed_dp.Rd +++ b/man/format_count_fraction_fixed_dp.Rd @@ -27,6 +27,7 @@ format_count_fraction_fixed_dp(x = c(0, 0)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_count_fraction_lt10.Rd b/man/format_count_fraction_lt10.Rd index b1b74bfbc5..9faead8c34 100644 --- a/man/format_count_fraction_lt10.Rd +++ b/man/format_count_fraction_lt10.Rd @@ -27,6 +27,7 @@ format_count_fraction_lt10(x = c(9, 1)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_extreme_values.Rd b/man/format_extreme_values.Rd index 25a806fa6a..a7d501aba4 100644 --- a/man/format_extreme_values.Rd +++ b/man/format_extreme_values.Rd @@ -27,6 +27,7 @@ format_fun(x = 0.009) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_extreme_values_ci.Rd b/man/format_extreme_values_ci.Rd index da307d6755..296765f84b 100644 --- a/man/format_extreme_values_ci.Rd +++ b/man/format_extreme_values_ci.Rd @@ -27,6 +27,7 @@ format_fun(x = c(0, 0.009)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_fraction.Rd b/man/format_fraction.Rd index d5855b6c75..d448d4cb42 100644 --- a/man/format_fraction.Rd +++ b/man/format_fraction.Rd @@ -26,6 +26,7 @@ format_fraction(x = c(num = 0L, denom = 3L)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_fraction_fixed_dp.Rd b/man/format_fraction_fixed_dp.Rd index ae6275c396..e27810b6d6 100644 --- a/man/format_fraction_fixed_dp.Rd +++ b/man/format_fraction_fixed_dp.Rd @@ -28,6 +28,7 @@ format_fraction_fixed_dp(x = c(num = 0L, denom = 3L)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_fraction_threshold.Rd b/man/format_fraction_threshold.Rd index 7f77c62991..551d1d5997 100644 --- a/man/format_fraction_threshold.Rd +++ b/man/format_fraction_threshold.Rd @@ -30,6 +30,7 @@ format_fun(x = c(0, 0)) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_sigfig.Rd b/man/format_sigfig.Rd index fef9ab6608..eb0825c223 100644 --- a/man/format_sigfig.Rd +++ b/man/format_sigfig.Rd @@ -36,6 +36,7 @@ fmt_5sf(0.000025645) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_xx.Rd b/man/format_xx.Rd index a9dce61ddb..fdd2998b91 100644 --- a/man/format_xx.Rd +++ b/man/format_xx.Rd @@ -31,6 +31,7 @@ sapply(test, z) } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()}, diff --git a/man/format_xx_fixed_dp.Rd b/man/format_xx_fixed_dp.Rd new file mode 100644 index 0000000000..9ea880201f --- /dev/null +++ b/man/format_xx_fixed_dp.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/formatting_functions.R +\name{format_xx_fixed_dp} +\alias{format_xx_fixed_dp} +\title{Alternative format XX function :a formatting function with fixed decimal precision} +\usage{ +format_xx_fixed_dp(str, na_str) +} +\arguments{ +\item{str}{(\code{string})\cr template.} +} +\value{ +An \code{rtables} formatting function. +} +\description{ +Translate a string where x and dots are interpreted as number place +holders, and others as formatting elements. +} +\examples{ +test <- list(c(1.658, 0.5761), c(1e1, 785.6)) + +z <- format_xx_fixed_dp("xx (xx.x)") +sapply(test, z) + +z <- format_xx_fixed_dp("xx.x - xx.x") +sapply(test, z) + +z <- format_xx_fixed_dp("xx.x, incl. xx.x\% NE") +sapply(test, z) +} +\seealso{ +\link{format_xx} +} diff --git a/man/formatting_functions.Rd b/man/formatting_functions.Rd index b7f7e8d584..f2a4be9410 100644 --- a/man/formatting_functions.Rd +++ b/man/formatting_functions.Rd @@ -12,6 +12,7 @@ custom formats can be created via the \code{\link[formatters:sprintf_format]{for } \seealso{ Other formatting functions: +\code{\link{custom_stats_formats}}, \code{\link{extreme_format}}, \code{\link{format_auto}()}, \code{\link{format_count_fraction}()},