From b9f45047614cce806e45e1162fa68dab8718fe58 Mon Sep 17 00:00:00 2001 From: oliviaAB Date: Wed, 21 Feb 2024 18:49:57 +1300 Subject: [PATCH] Finished adding logx transformation --- NAMESPACE | 2 + R/transformation.R | 459 ++++++++++++++++--------- man/get_table_transformations.Rd | 17 +- man/transform_dataset.Rd | 20 +- man/transform_logx.Rd | 41 +++ man/transformation_datasets_factory.Rd | 152 +++++--- man/zero_to_half_min.Rd | 18 + tests/testthat/helper.R | 7 + tests/testthat/test-transformation.R | 161 ++++++++- 9 files changed, 646 insertions(+), 231 deletions(-) create mode 100644 man/transform_logx.Rd create mode 100644 man/zero_to_half_min.Rd diff --git a/NAMESPACE b/NAMESPACE index 98c54c1..5b5464d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -163,9 +163,11 @@ export(subset_features) export(transform_bestNormalise_auto) export(transform_bestNormalise_manual) export(transform_dataset) +export(transform_logx) export(transform_vsn) export(transform_vst) export(transformation_datasets_factory) +export(zero_to_half_min) exportClasses(MetabolomeSet) exportClasses(PhenotypeSet) exportMethods(add_metabo) diff --git a/R/transformation.R b/R/transformation.R index 03bc1d2..9aede2e 100644 --- a/R/transformation.R +++ b/R/transformation.R @@ -163,16 +163,19 @@ transform_bestNormalise_manual <- function(mat, method, return_matrix_only = FAL ) } - poss_methods <- c("arcsinh_x", "boxcox", "log_x", "sqrt_x", "yeojohnson", "center_scale", "exp_x", "orderNorm") - .check_names(method, poss_methods, "'method' argument: '_W_' not a valid method. Possible methods are: '_C_'.") + poss_methods <- c("arcsinh_x", "boxcox", "log_x", "sqrt_x", + "yeojohnson", "center_scale", "exp_x", "orderNorm") + .check_names( + method, + poss_methods, + "'method' argument: '_W_' not a valid method. Possible methods are: '_C_'." + ) res <- apply(mat, 1, function(x) { eval(str2expression(paste0("bestNormalize::", method, "(x, ...)"))) }) - res_mat <- t(sapply(res, function(x) { - x$x.t - })) + res_mat <- t(sapply(res, function(x) {x$x.t})) if (return_matrix_only) { return(res_mat) @@ -195,11 +198,11 @@ transform_bestNormalise_manual <- function(mat, method, return_matrix_only = FAL #' returned? If `TRUE`, the function will return a matrix. If `FALSE`, the #' function instead returns a list with the transformed data and potentially #' other information relevant to the transformation. Default value is `FALSE`. -#' @param base Integer, the base with respect to which logarithms are computed. +#' @param log_base Numeric, the base with respect to which logarithms are +#' computed. #' @param pre_log_function Function that will be applied to the matrix before #' the log transformation (e.g. to apply an offset to the values to avoid -#' issues with zeros). If `NULL`, no function will be applied. Default value -#' is the [offset_half_min()] function. +#' issues with zeros). Default value is the [zero_to_half_min()] function. #' @returns Depending on the `return_matrix_only`, either a matrix of #' transformed data, or a list with the following elements: #' * `transformed_data`: matrix of the transformed data; @@ -208,47 +211,55 @@ transform_bestNormalise_manual <- function(mat, method, return_matrix_only = FAL #' @export transform_logx <- function(mat, return_matrix_only = FALSE, - base = 2, - pre_log_function = offset_half_min) { + log_base = 2, + pre_log_function = zero_to_half_min) { + + if (is.null(log_base)) { + stop("`log_base` argument cannot be `NULL`.") + } + if (is.null(pre_log_function)) { + stop("`pre_log_function` argument cannot be `NULL`.") + } if (!is.null(pre_log_function)) mat <- pre_log_function(mat) if (any(mat == 0)) warning("The matrix contains zero values; log-transformation will yield `-Inf`.") - res_mat <- log(mat, base = base) + res_mat <- log(mat, base = log_base) if (return_matrix_only) { return(res_mat) } res <- list( - log_base = base, + log_base = log_base, pre_log_function = pre_log_function ) return(list( transformed_data = res_mat, info_transformation = res, - transformation = paste0("log", base) + transformation = paste0("log", log_base) )) } -#' Add half-min offset to matrix +#' Replace zeros with half-min in matrix #' -#' If there are any zero values, adds a small offset to all values in a matrix. -#' The offset is computed as half the minimum non-null value in the matrix. If -#' there are no zeros in the matrix, no modifications will be made. +#' Replace zero values in a matrix by half of the minimum non-null value in the +#' matrix. #' #' @param mat Numeric matrix. -#' @returns The matrix in the offset has been added to all values. +#' @returns The matrix with zero values replaced. #' @export -offset_half_min <- function(mat) { +zero_to_half_min <- function(mat) { if (!any(mat == 0)) { return(mat) } min_val <- min(mat[mat != 0]) - mat + min_val / 2 + mat[mat == 0] <- min_val / 2 + + return(mat) } #' Applies a transformation to a dataset from a MultiDataSet object @@ -271,7 +282,8 @@ offset_half_min <- function(mat) { #' package. This method is applicable to count data only. This transformation is #' recommended for RNAseq or similar count-based datasets. In practice, applies #' the [transform_vst()] function. -#' * `logx`: +#' * `logx`: log-transformation (default to log2, but base can be specified). +#' In practice, applies the [transform_logx()] function. #' * `best-normalize-auto`: most appropriate normalisation method automatically #' selected from a number of options, implemented in the #' [bestNormalize::bestNormalize()] function from the `bestNormalize` package. @@ -290,10 +302,10 @@ offset_half_min <- function(mat) { #' * `"center_scale"`: data is centered and scaled; #' * `"exp_x"`: data is transformed as `exp(x)`; #' * `"log_x"`: data is transformed as `log_b(x+a)` (`a` and `b` either -#' selected automatically or passed as arguments); +#' selected automatically per variable or passed as arguments); #' * `"orderNorm"`: Ordered Quantile technique; #' * `"sqrt_x"`: data transformed as `sqrt(x + a)` (`a` selected automatically -#' or passed as argument), +#' per variable or passed as argument), #' * `"yeojohnson"`: Yeo-Johnson transformation. #' #' @param mo_data A \code{\link[MultiDataSet]{MultiDataSet-class}} object. @@ -312,6 +324,12 @@ offset_half_min <- function(mat) { #' `return_multidataset` is `TRUE`. Default value is `FALSE`. #' @param verbose Logical, should information about the transformation be #' printed? Default value is `TRUE`. +#' @param log_base Numeric, the base with respect to which logarithms are +#' computed. Default value is `2`. Only used if `transformation = 'logx'`. +#' @param pre_log_function Function that will be applied to the matrix before +#' the log transformation (e.g. to apply an offset to the values to avoid +#' issues with zeros). Default value is the [zero_to_half_min()] function. +#' Only used if `transformation = 'logx'`. #' @param method Character, if `transformation = 'best-normalize-manual'`, which #' normalisation method should be applied. See possible values in #' [transform_bestNormalise_manual()]. Ignored for other transformations. @@ -334,6 +352,8 @@ transform_dataset <- function(mo_data, return_multidataset = FALSE, return_matrix_only = FALSE, verbose = TRUE, + log_base = 2, + pre_log_function = zero_to_half_min, method, ...) { ## We don't want to subset the dataset @@ -377,11 +397,31 @@ transform_dataset <- function(mo_data, res <- switch( transformation, - "vsn" = transform_vsn(mat, return_matrix_only = return_matrix_only), - "vst-deseq2" = transform_vst(mat, return_matrix_only = return_matrix_only), - "logx" = transform_logx(mat, return_matrix_only = return_matrix_only, ...), - "best-normalize-auto" = transform_bestNormalise_auto(mat, return_matrix_only = return_matrix_only, ...), - "best-normalize-manual" = transform_bestNormalise_manual(mat, method, return_matrix_only = return_matrix_only, ...) + "vsn" = transform_vsn( + mat, + return_matrix_only = return_matrix_only + ), + "vst-deseq2" = transform_vst( + mat, + return_matrix_only = return_matrix_only + ), + "logx" = transform_logx( + mat, + return_matrix_only = return_matrix_only, + log_base = log_base, + pre_log_function = pre_log_function + ), + "best-normalize-auto" = transform_bestNormalise_auto( + mat, + return_matrix_only = return_matrix_only, + ... + ), + "best-normalize-manual" = transform_bestNormalise_manual( + mat, + method, + return_matrix_only = return_matrix_only, + ... + ) ) ## Option to return a MultiDataSet object in which the original dataset is replaced by the transformed dataset @@ -446,63 +486,91 @@ get_transformed_data <- function(mo_data, transformation_result) { #' Create a list of targets to apply some transformation methods to one or more #' datasets in a `MultiDataSet` object. #' -#' #' Currently implemented transformations and recommendations based on dataset type: -#' \itemize{ -#' \item `vsn`: Variance Stabilising normalisation, implemented in the \code{\link[vsn]{justvsn}} -#' function from the `vsn` package. This method was originally developed for microarray intensities. -#' In practice, applies the \code{\link{transform_vsn}} function. This transformation is recommended -#' for microarray, metabolome, chemical or other intensity-based datasets. -#' \item `vst-deseq2`: Variance Stabilising Transformation, implemented in the -#' \code{\link[DESeq2]{varianceStabilizingTransformation}} function from the `DESeq2` package. -#' This method is applicable to count data only. In practice, applies the \code{\link{transform_vst}} function. -#' This transformation is recommended for RNAseq or similar count-based datasets. -#' \item `best-normalize-auto`: most appropriate normalisation method automatically selected from a number -#' of options, implemented in the \code{\link[bestNormalize]{bestNormalize}} function from the -#' `bestNormalize` package. In practice, applies the \code{\link{transform_bestNormalise_auto}} function. -#' This transformation is recommended for phenotypes that are each measured on different scales (since the -#' transformation method selected will potentially be different across the phenotypes), preferably with a -#' reasonable number of features (less than 100) to avoid large computation times. -#' \item `best-normalize-manual`: performs the same transformation (specified with the `method` argument) -#' to each feature of a dataset. This transformation is recommended for phenotypes data in which the -#' different phenotypes are measured on the same scale. The different normalisation methods are: -#' * `"arcsinh_x"`: data is transformed as `log(x + sqrt(x^2 + 1))`; -#' * `"boxcox"`: Box Cox transformation; -#' * `"center_scale"`: data is centered and scaled; -#' * `"exp_x"`: data is transformed as `exp(x)`; -#' * `"log_x"`: data is transformed as `log_b(x+a)` (`a` and `b` either selected automatically or passed as -#' arguments); -#' * `"orderNorm"`: Ordered Quantile technique; -#' * `"sqrt_x"`: data transformed as `sqrt(x + a)` (`a` selected automatically or passed as argument), -#' * `"yeojohnson"`: Yeo-Johnson transformation. -#' } +#' Currently implemented transformations and recommendations based on dataset +#' type: +#' * `vsn`: Variance Stabilising normalisation, implemented in the +#' [vsn::justvsn()] function from the `vsn` package. This method was originally +#' developed for microarray intensities. This transformation is recommended for +#' microarray, metabolome, chemical or other intensity-based datasets. In +#' practice, applies the [transform_vsn()] function. +#' * `vst-deseq2`: Variance Stabilising Transformation, implemented in the +#' [DESeq2::varianceStabilizingTransformation()] function from the `DESeq2` +#' package. This method is applicable to count data only. This transformation is +#' recommended for RNAseq or similar count-based datasets. In practice, applies +#' the [transform_vst()] function. +#' * `logx`: log-transformation (default to log2, but base can be specified). +#' In practice, applies the [transform_logx()] function. +#' * `best-normalize-auto`: most appropriate normalisation method automatically +#' selected from a number of options, implemented in the +#' [bestNormalize::bestNormalize()] function from the `bestNormalize` package. +#' This transformation is recommended for phenotypes that are each measured on +#' different scales (since the transformation method selected will potentially +#' be different across the features), preferably with a reasonable number of +#' features (less than 100) to avoid large computation times. In practice, +#' applies the [transform_bestNormalise_auto()] function. +#' * `best-normalize-manual`: performs the same transformation (specified +#' through the `method` argument) to each feature of a dataset. This +#' transformation is recommended for phenotypes data in which the different +#' phenotypes are measured on the same scale. The different normalisation +#' methods are: +#' * `"arcsinh_x"`: data is transformed as `log(x + sqrt(x^2 + 1))`; +#' * `"boxcox"`: Box Cox transformation; +#' * `"center_scale"`: data is centered and scaled; +#' * `"exp_x"`: data is transformed as `exp(x)`; +#' * `"log_x"`: data is transformed as `log_b(x+a)` (`a` and `b` either +#' selected automatically per variable or passed as arguments); +#' * `"orderNorm"`: Ordered Quantile technique; +#' * `"sqrt_x"`: data transformed as `sqrt(x + a)` (`a` selected automatically +#' per variable or passed as argument), +#' * `"yeojohnson"`: Yeo-Johnson transformation. #' -#' @param mo_data_target Symbol, the name of the target containing the `MultiDataSet` object. -#' @param transformations Named character vector, name of each element is the name of a dataset -#' to transform, corresponding element gives the type of transformation to apply to the dataset -#' (e.g. `c(rnaseq = 'vst-deseq2', phenotypes = 'best-normalize-auto')`). See Details -#' for a list of available transformations. If `'best-normalize-auto'` is selected, need to -#' provide the `methods` argument as well. -#' @param return_matrix_only Logical, should only the transformed matrix be returned for each -#' transformation? If `TRUE`, only transformed matrices will be stored. If `FALSE`, -#' instead for each transformation, a list with the transformed data and potentially other -#' information relevant to the transformation will be saved. Default value is `FALSE`. -#' @param target_name_prefix Character, a prefix to add to the name of the targets created by this target factory. -#' Default value is `""`. -#' @param transformed_data_name Character, the name of the target containing the `MultiDataSet` with -#' transformed data to be created. If `NULL`, will be selected automatically. Default value is `NULL`. -#' @param methods Named character vector, gives for each dataset for which the `'best-normalize-manual'` -#' transformation is selected which normalisation method should be applied. See possible values in -#' Details. -#' @param ... Further arguments passed to the \code{\link{transform_dataset}} function or the -#' `method` function from the `bestNormalize` package. Only relevant for `'best-normalize-XX'` transformations. -#' @return A list of target objects. With `target_name_prefix = ""` and `transformed_data_name = NULL`, -#' the following targets are created: -#' * `transformations_spec`: generates a grouped tibble where each row corresponds to one dataset to be tranformed, -#' with the columns specifying each dataset name and the transformation to apply. -#' * `transformations_runs_list`: a dynamic branching target that runs the \code{\link{transform_dataset}} function -#' on each dataset. Returns a list. -#' * `transformed_set`: a target that returns the `MultiDataSet` object with the original data replaced by the -#' transformed data. +#' @param mo_data_target Symbol, the name of the target containing the +#' `MultiDataSet` object. +#' @param transformations Named character vector, name of each element is the +#' name of a dataset to transform, corresponding element gives the type of +#' transformation to apply to the dataset (e.g. `c(rnaseq = 'vst-deseq2', +#' phenotypes = 'best-normalize-auto')`). See Details for a list of available +#' transformations. If `'best-normalize-auto'` is selected, need to provide +#' the `methods` argument as well. +#' @param return_matrix_only Logical, should only the transformed matrix be +#' returned for each transformation? If `TRUE`, only transformed matrices will +#' be stored. If `FALSE`, instead for each transformation, a list with the +#' transformed data and potentially other information relevant to the +#' transformation will be saved. Default value is `FALSE`. +#' @param target_name_prefix Character, a prefix to add to the name of the +#' targets created by this target factory. Default value is `""`. +#' @param transformed_data_name Character, the name of the target containing the +#' `MultiDataSet` with transformed data to be created. If `NULL`, will be +#' selected automatically. Default value is `NULL`. +#' @param methods Character or named character list, gives for each dataset for +#' which the `'best-normalize-manual'` transformation is selected the +#' normalisation method that should be applied. See possible values in +#' Details. If one value, will be used for all concerned datasets. Otherwise, +#' can specify a different method for each concerned dataset by passing a +#' named list. +#' @param log_bases Numeric or named numeric list, gives for each dataset for +#' which the `'logx'` transformation is selected the log base to use. If one +#' value, will be used for all concerned datasets. Otherwise, can specify a +#' different log-base for each concerned dataset by passing a named list. +#' @param pre_log_functions Function or named list of functions, gives for each +#' dataset for which the `'logx`` transformation is selected the function that +#' will be applied to the matrix before the log transformation (e.g. to apply +#' an offset to the values to avoid issues with zeros). Default value is the +#' [zero_to_half_min()] function. If one value, will be used for all concerned +#' datasets. Otherwise, can specify a different log-base for each concerned +#' dataset by passing a named list. +#' @param ... Further arguments passed to the \code{\link{transform_dataset}} +#' function or the `method` function from the `bestNormalize` package. Only +#' relevant for `'best-normalize-XX'` transformations. +#' @returns A list of target objects. With `target_name_prefix = ""` and +#' `transformed_data_name = NULL`, the following targets are created: +#' * `transformations_spec`: generates a grouped tibble where each row +#' corresponds to one dataset to be tranformed, with the columns specifying each +#' dataset name and the transformation to apply. +#' * `transformations_runs_list`: a dynamic branching target that runs the +#' [transform_dataset()] function on each dataset. Returns a list. +#' * `transformed_set`: a target that returns the `MultiDataSet` object with the +#' original data replaced by the transformed data. #' @examples #' \dontrun{ #' ## in the _targets.R @@ -529,15 +597,30 @@ get_transformed_data <- function(mo_data, transformation_result) { #' transformed_data_name = "mo_set_transformed" #' ), #' -#' ## Example 2 - with a log2 transformation for the metabolome dataset +#' ## Example 2 - with a log2 transformation for both datasets +#' transformation_datasets_factory( +#' mo_set_complete, +#' c( +#' "rnaseq" = "logx", +#' "metabolome" = "logx" +#' ), +#' log_bases = 2, +#' pre_log_functions = zero_to_half_min +#' ), +#' +#' ## Example 3 - with different log bases for each dataset and a different +#' ## preprocessing function to be run before applying the log #' transformation_datasets_factory( #' mo_set_complete, #' c( -#' "rnaseq" = "vst-deseq2", -#' "metabolome" = "best-normalize-manual" +#' "rnaseq" = "logx", +#' "metabolome" = "logx" #' ), -#' methods = c("metabolome" = "log_x"), -#' b = 2 +#' log_bases = list(rnaseq = 10, metabolome = 2), +#' pre_log_functions = list( +#' rnaseq = \(x) x + 0.5, +#' metabolome = zero_to_half_min +#' ) #' ) #' ) #' } @@ -547,42 +630,85 @@ transformation_datasets_factory <- function(mo_data_target, return_matrix_only = FALSE, target_name_prefix = "", transformed_data_name = NULL, + log_bases = 2, + pre_log_functions = zero_to_half_min, methods, ...) { - if (is.null(names(transformations))) stop("'transformations' vector should be named (see `?transformation_datasets_factory`).") + if (is.null(names(transformations))) { + stop("'transformations' vector should be named.") + } if (missing(methods)) methods <- NULL + if (any(transformations == "best-normalize-manual")) { + methods <- .make_var_list( + methods, + names(transformations)[transformations == "best-normalize-manual"] + ) + } + + if (any(transformations == "logx")) { + log_bases <- .make_var_list( + log_bases, + names(transformations)[transformations == "logx"] + ) + + # if (is.list(pre_log_functions)) { + # pre_log_functions <- purrr::map_chr(pre_log_functions, \(x) deparse(substitute(x))) + # } else { + # pre_log_functions <- deparse(substitute(pre_log_functions)) + # } + + pre_log_functions <- .make_var_list( + pre_log_functions, + names(transformations)[transformations == "logx"] + ) + } else { + log_bases <- pre_log_functions <- NULL + } + ## Target names transf_spec_name <- paste0(target_name_prefix, "transformations_spec") transf_run_name <- paste0(target_name_prefix, "transformations_runs_list") - if (is.null(transformed_data_name)) transformed_data_name <- paste0(target_name_prefix, "transformed_set") + if (is.null(transformed_data_name)) { + transformed_data_name <- paste0(target_name_prefix, "transformed_set") + } ## Target symbols trans_spec_target <- as.symbol(transf_spec_name) transf_run_target <- as.symbol(transf_run_name) list( - ## store the MAD specifications (arguments) as a tibble (one row per dataset to prefilter) - ## and group it by dataset name so that following targets will be applied to each row in turn targets::tar_target_raw( transf_spec_name, - substitute(tibble::tibble(dsn = names(transformations), transf = transformations, meth = methods[dsn]) |> - dplyr::group_by(dsn) |> - tar_group()), + substitute( + tibble::tibble( + dsn = names(transformations), + transf = transformations, + meth = methods[[dsn]], + log_b = log_bases[[dsn]], + prelog_f = pre_log_functions[[dsn]] + ) |> + dplyr::group_by(dsn) |> + targets::tar_group()), iteration = "group" ), ## Apply the transformation to each dataset targets::tar_target_raw( transf_run_name, - substitute(transform_dataset(mo_data_target, - dataset = trans_spec_target$dsn, - transformation = trans_spec_target$transf, - return_matrix_only = return_matrix_only, - method = trans_spec_target$meth, - ... - )), + substitute( + transform_dataset( + mo_data_target, + dataset = trans_spec_target$dsn, + transformation = trans_spec_target$transf, + return_matrix_only = return_matrix_only, + method = trans_spec_target$meth, + log_base = trans_spec_target$log_b, + pre_log_function = trans_spec_target$prelog_f, + ... + ) + ), pattern = substitute(map(trans_spec_target)), iteration = "list" ), @@ -630,66 +756,91 @@ transformation_datasets_factory <- function(mo_data_target, #' From the results of transformations on datasets, generates a table giving for #' each dataset the transformation that was applied to it. #' -#' @param transformation_result A list in which each element is the result of -#' a transformation applied to a different dataset, computed with the -#' \code{\link{transform_dataset}} function. -#' @param best_normalize_details Logical, should information about the transformations -#' selected by bestNormalize for each feature be displayed? Default value is `FALSE`. -#' @return A tibble with columns `'Dataset'` and `'Transformation'`. If -#' `best_normalize_details = TRUE`, an additional column `'Details'` lists the chsoen -#' transformation applied to each feature of the corresponding dataset for a bestNormalize -#' transformation. +#' @param transformation_result A list in which each element is the result of a +#' transformation applied to a different dataset, computed with the +#' [transform_dataset] function. +#' @param best_normalize_details Logical, should information about the +#' transformations selected by bestNormalize for each feature be displayed? +#' Default value is `FALSE`. +#' @returns A tibble with columns `'Dataset'` and `'Transformation'`. If +#' `best_normalize_details = TRUE`, an additional column `'Details'` lists the +#' chsoen transformation applied to each feature of the corresponding dataset +#' for a bestNormalize transformation. #' @export -get_table_transformations <- function(transformation_result, best_normalize_details = FALSE) { - ## for devtools::check - Features <- Feature <- NULL - - transf_name <- c( - "vsn" = "Variance Stabilising Normalisation (vsn)", - "vst-deseq2" = "Variance Stabilising Transformation (DESeq2)", - "best-normalize-auto" = "automatic normalisation selection (bestNormalize)", - "best-normalize-manual" = " transformation (bestNormalize)" - ) - - ## for devtools::check() - transf <- Transformation <- Chosen_transformation <- NULL - - names(transformation_result) <- sapply(transformation_result, attr, "dataset_name") +get_table_transformations <- function(transformation_result, + best_normalize_details = FALSE) { + ## for devtools::check + Features <- Feature <- NULL - res <- tibble::tibble( - Dataset = sapply(transformation_result, attr, "dataset_name"), - transf = sapply(transformation_result, attr, "transformation") - ) |> - dplyr::mutate(Transformation = transf_name[transf]) + transf_name <- c( + "vsn" = "Variance Stabilising Normalisation (vsn)", + "vst-deseq2" = "Variance Stabilising Transformation (DESeq2)", + "logx" = "Log-X transformation", + "best-normalize-auto" = "automatic normalisation selection (bestNormalize)", + "best-normalize-manual" = " transformation (bestNormalize)" + ) - for (i in which(res$transf == "best-normalize-manual")) { - res$Transformation[i] <- paste0(.bestNormalize_get_transfo_name(transformation_result[[res$Dataset[i]]][["info_transformation"]][[1]]), res$Transformation[i]) - } + ## for devtools::check() + transf <- Transformation <- Chosen_transformation <- NULL - if (best_normalize_details & any(res$transf == "best-normalize-auto")) { - res$Details <- sapply(transformation_result, function(x) { - if (attr(x, "transformation") == "best-normalize-auto") { - info_list <- x$info_transformation + names(transformation_result) <- sapply( + transformation_result, + attr, + "dataset_name" + ) - df <- tibble::tibble( - Feature = names(info_list), - Chosen_transformation = sapply(info_list, .bestNormalize_get_transfo_name) - ) |> - dplyr::group_by(Chosen_transformation) |> - dplyr::summarise(Features = paste0(Feature, collapse = ", ")) + res <- tibble::tibble( + Dataset = sapply(transformation_result, attr, "dataset_name"), + transf = sapply(transformation_result, attr, "transformation") + ) |> + dplyr::mutate(Transformation = transf_name[transf]) + + for (i in which(res$transf == "logx")) { + res$Transformation[i] <- stringr::str_replace( + res$Transformation[i], + "X", + transformation_result$info_transformation$log_base + ) + } - return(paste0("- ", df$Chosen_transformation, ": ", df$Features, collapse = "\n")) - } - return("") - }) - } + for (i in which(res$transf == "best-normalize-manual")) { + res$Transformation[i] <- paste0( + .bestNormalize_get_transfo_name( + transformation_result[[res$Dataset[i]]][["info_transformation"]][[1]] + ), + res$Transformation[i] + ) + } - res <- res |> - dplyr::select(-transf) + if (best_normalize_details & any(res$transf == "best-normalize-auto")) { + res$Details <- sapply(transformation_result, function(x) { + if (attr(x, "transformation") == "best-normalize-auto") { + info_list <- x$info_transformation + + df <- tibble::tibble( + Feature = names(info_list), + Chosen_transformation = sapply(info_list, .bestNormalize_get_transfo_name) + ) |> + dplyr::group_by(Chosen_transformation) |> + dplyr::summarise(Features = paste0(Feature, collapse = ", ")) + + return(paste0( + "- ", + df$Chosen_transformation, + ": ", + df$Features, + collapse = "\n" + )) + } + return("") + }) + } - return(res) -} + res <- res |> + dplyr::select(-transf) + return(res) + } .get_transformed_matrix <- function(res, return_matrix_only) { if (return_matrix_only) { diff --git a/man/get_table_transformations.Rd b/man/get_table_transformations.Rd index 11904f4..d1fd6c2 100644 --- a/man/get_table_transformations.Rd +++ b/man/get_table_transformations.Rd @@ -10,18 +10,19 @@ get_table_transformations( ) } \arguments{ -\item{transformation_result}{A list in which each element is the result of -a transformation applied to a different dataset, computed with the -\code{\link{transform_dataset}} function.} +\item{transformation_result}{A list in which each element is the result of a +transformation applied to a different dataset, computed with the +\link{transform_dataset} function.} -\item{best_normalize_details}{Logical, should information about the transformations -selected by bestNormalize for each feature be displayed? Default value is \code{FALSE}.} +\item{best_normalize_details}{Logical, should information about the +transformations selected by bestNormalize for each feature be displayed? +Default value is \code{FALSE}.} } \value{ A tibble with columns \code{'Dataset'} and \code{'Transformation'}. If -\code{best_normalize_details = TRUE}, an additional column \code{'Details'} lists the chsoen -transformation applied to each feature of the corresponding dataset for a bestNormalize -transformation. +\code{best_normalize_details = TRUE}, an additional column \code{'Details'} lists the +chsoen transformation applied to each feature of the corresponding dataset +for a bestNormalize transformation. } \description{ From the results of transformations on datasets, generates a table giving for diff --git a/man/transform_dataset.Rd b/man/transform_dataset.Rd index 73e576c..55d8971 100644 --- a/man/transform_dataset.Rd +++ b/man/transform_dataset.Rd @@ -11,6 +11,8 @@ transform_dataset( return_multidataset = FALSE, return_matrix_only = FALSE, verbose = TRUE, + log_base = 2, + pre_log_function = zero_to_half_min, method, ... ) @@ -21,8 +23,8 @@ transform_dataset( \item{dataset}{Character, name of the dataset to transform.} \item{transformation}{Character, transformation to be applied. Possible -values are: \code{vsn}, \code{vst-deseq2}, \code{best-normalize-auto} or -\code{best-normalize-manual}. See \code{Details}.} +values are: \code{vsn}, \code{vst-deseq2}, \code{logx} \code{best-normalize-auto} +or \code{best-normalize-manual}. See \code{Details}.} \item{return_multidataset}{Logical, should a \code{MultiDataSet} object with the original data replaced by the transformed data returned? If \code{FALSE}, the @@ -38,6 +40,14 @@ other information relevant to the transformation. Ignored if \item{verbose}{Logical, should information about the transformation be printed? Default value is \code{TRUE}.} +\item{log_base}{Numeric, the base with respect to which logarithms are +computed. Default value is \code{2}. Only used if \code{transformation = 'logx'}.} + +\item{pre_log_function}{Function that will be applied to the matrix before +the log transformation (e.g. to apply an offset to the values to avoid +issues with zeros). Default value is the \code{\link[=zero_to_half_min]{zero_to_half_min()}} function. +Only used if \code{transformation = 'logx'}.} + \item{method}{Character, if \code{transformation = 'best-normalize-manual'}, which normalisation method should be applied. See possible values in \code{\link[=transform_bestNormalise_manual]{transform_bestNormalise_manual()}}. Ignored for other transformations.} @@ -79,6 +89,8 @@ practice, applies the \code{\link[=transform_vsn]{transform_vsn()}} function. package. This method is applicable to count data only. This transformation is recommended for RNAseq or similar count-based datasets. In practice, applies the \code{\link[=transform_vst]{transform_vst()}} function. +\item \code{logx}: log-transformation (default to log2, but base can be specified). +In practice, applies the \code{\link[=transform_logx]{transform_logx()}} function. \item \code{best-normalize-auto}: most appropriate normalisation method automatically selected from a number of options, implemented in the \code{\link[bestNormalize:bestNormalize]{bestNormalize::bestNormalize()}} function from the \code{bestNormalize} package. @@ -98,10 +110,10 @@ methods are: \item \code{"center_scale"}: data is centered and scaled; \item \code{"exp_x"}: data is transformed as \code{exp(x)}; \item \code{"log_x"}: data is transformed as \code{log_b(x+a)} (\code{a} and \code{b} either -selected automatically or passed as arguments); +selected automatically per variable or passed as arguments); \item \code{"orderNorm"}: Ordered Quantile technique; \item \code{"sqrt_x"}: data transformed as \code{sqrt(x + a)} (\code{a} selected automatically -or passed as argument), +per variable or passed as argument), \item \code{"yeojohnson"}: Yeo-Johnson transformation. } } diff --git a/man/transform_logx.Rd b/man/transform_logx.Rd new file mode 100644 index 0000000..ba9c5d9 --- /dev/null +++ b/man/transform_logx.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transformation.R +\name{transform_logx} +\alias{transform_logx} +\title{Applies a log-x transformation to matrix} +\usage{ +transform_logx( + mat, + return_matrix_only = FALSE, + log_base = 2, + pre_log_function = zero_to_half_min +) +} +\arguments{ +\item{mat}{Numeric matrix.} + +\item{return_matrix_only}{Logical, should only the transformed matrix be +returned? If \code{TRUE}, the function will return a matrix. If \code{FALSE}, the +function instead returns a list with the transformed data and potentially +other information relevant to the transformation. Default value is \code{FALSE}.} + +\item{log_base}{Numeric, the base with respect to which logarithms are +computed.} + +\item{pre_log_function}{Function that will be applied to the matrix before +the log transformation (e.g. to apply an offset to the values to avoid +issues with zeros). Default value is the \code{\link[=zero_to_half_min]{zero_to_half_min()}} function.} +} +\value{ +Depending on the \code{return_matrix_only}, either a matrix of +transformed data, or a list with the following elements: +\itemize{ +\item \code{transformed_data}: matrix of the transformed data; +\item \code{info_transformation}: a list with the log base used and the function +applied prior to log-transformation. +} +} +\description{ +Applies a log-x transformation (by default log2) through the \code{\link[=log]{log()}} +function. +} diff --git a/man/transformation_datasets_factory.Rd b/man/transformation_datasets_factory.Rd index e21ac40..4a8b129 100644 --- a/man/transformation_datasets_factory.Rd +++ b/man/transformation_datasets_factory.Rd @@ -10,47 +10,70 @@ transformation_datasets_factory( return_matrix_only = FALSE, target_name_prefix = "", transformed_data_name = NULL, + log_bases = 2, + pre_log_functions = zero_to_half_min, methods, ... ) } \arguments{ -\item{mo_data_target}{Symbol, the name of the target containing the \code{MultiDataSet} object.} +\item{mo_data_target}{Symbol, the name of the target containing the +\code{MultiDataSet} object.} -\item{transformations}{Named character vector, name of each element is the name of a dataset -to transform, corresponding element gives the type of transformation to apply to the dataset -(e.g. \code{c(rnaseq = 'vst-deseq2', phenotypes = 'best-normalize-auto')}). See Details -for a list of available transformations. If \code{'best-normalize-auto'} is selected, need to -provide the \code{methods} argument as well.} +\item{transformations}{Named character vector, name of each element is the +name of a dataset to transform, corresponding element gives the type of +transformation to apply to the dataset (e.g. \code{c(rnaseq = 'vst-deseq2', phenotypes = 'best-normalize-auto')}). See Details for a list of available +transformations. If \code{'best-normalize-auto'} is selected, need to provide +the \code{methods} argument as well.} -\item{return_matrix_only}{Logical, should only the transformed matrix be returned for each -transformation? If \code{TRUE}, only transformed matrices will be stored. If \code{FALSE}, -instead for each transformation, a list with the transformed data and potentially other -information relevant to the transformation will be saved. Default value is \code{FALSE}.} +\item{return_matrix_only}{Logical, should only the transformed matrix be +returned for each transformation? If \code{TRUE}, only transformed matrices will +be stored. If \code{FALSE}, instead for each transformation, a list with the +transformed data and potentially other information relevant to the +transformation will be saved. Default value is \code{FALSE}.} -\item{target_name_prefix}{Character, a prefix to add to the name of the targets created by this target factory. -Default value is \code{""}.} +\item{target_name_prefix}{Character, a prefix to add to the name of the +targets created by this target factory. Default value is \code{""}.} -\item{transformed_data_name}{Character, the name of the target containing the \code{MultiDataSet} with -transformed data to be created. If \code{NULL}, will be selected automatically. Default value is \code{NULL}.} +\item{transformed_data_name}{Character, the name of the target containing the +\code{MultiDataSet} with transformed data to be created. If \code{NULL}, will be +selected automatically. Default value is \code{NULL}.} -\item{methods}{Named character vector, gives for each dataset for which the \code{'best-normalize-manual'} -transformation is selected which normalisation method should be applied. See possible values in -Details.} +\item{log_bases}{Numeric or named numeric list, gives for each dataset for +which the \code{'logx'} transformation is selected the log base to use. If one +value, will be used for all concerned datasets. Otherwise, can specify a +different log-base for each concerned dataset by passing a named list.} -\item{...}{Further arguments passed to the \code{\link{transform_dataset}} function or the -\code{method} function from the \code{bestNormalize} package. Only relevant for \code{'best-normalize-XX'} transformations.} +\item{pre_log_functions}{Function or named list of functions, gives for each +dataset for which the `'logx`` transformation is selected the function that +will be applied to the matrix before the log transformation (e.g. to apply +an offset to the values to avoid issues with zeros). Default value is the +\code{\link[=zero_to_half_min]{zero_to_half_min()}} function. If one value, will be used for all concerned +datasets. Otherwise, can specify a different log-base for each concerned +dataset by passing a named list.} + +\item{methods}{Character or named character list, gives for each dataset for +which the \code{'best-normalize-manual'} transformation is selected the +normalisation method that should be applied. See possible values in +Details. If one value, will be used for all concerned datasets. Otherwise, +can specify a different method for each concerned dataset by passing a +named list.} + +\item{...}{Further arguments passed to the \code{\link{transform_dataset}} +function or the \code{method} function from the \code{bestNormalize} package. Only +relevant for \code{'best-normalize-XX'} transformations.} } \value{ -A list of target objects. With \code{target_name_prefix = ""} and \code{transformed_data_name = NULL}, -the following targets are created: +A list of target objects. With \code{target_name_prefix = ""} and +\code{transformed_data_name = NULL}, the following targets are created: \itemize{ -\item \code{transformations_spec}: generates a grouped tibble where each row corresponds to one dataset to be tranformed, -with the columns specifying each dataset name and the transformation to apply. -\item \code{transformations_runs_list}: a dynamic branching target that runs the \code{\link{transform_dataset}} function -on each dataset. Returns a list. -\item \code{transformed_set}: a target that returns the \code{MultiDataSet} object with the original data replaced by the -transformed data. +\item \code{transformations_spec}: generates a grouped tibble where each row +corresponds to one dataset to be tranformed, with the columns specifying each +dataset name and the transformation to apply. +\item \code{transformations_runs_list}: a dynamic branching target that runs the +\code{\link[=transform_dataset]{transform_dataset()}} function on each dataset. Returns a list. +\item \code{transformed_set}: a target that returns the \code{MultiDataSet} object with the +original data replaced by the transformed data. } } \description{ @@ -58,34 +81,44 @@ Create a list of targets to apply some transformation methods to one or more datasets in a \code{MultiDataSet} object. } \details{ -#' Currently implemented transformations and recommendations based on dataset type: +Currently implemented transformations and recommendations based on dataset +type: \itemize{ -\item \code{vsn}: Variance Stabilising normalisation, implemented in the \code{\link[vsn]{justvsn}} -function from the \code{vsn} package. This method was originally developed for microarray intensities. -In practice, applies the \code{\link{transform_vsn}} function. This transformation is recommended -for microarray, metabolome, chemical or other intensity-based datasets. +\item \code{vsn}: Variance Stabilising normalisation, implemented in the +\code{\link[vsn:justvsn]{vsn::justvsn()}} function from the \code{vsn} package. This method was originally +developed for microarray intensities. This transformation is recommended for +microarray, metabolome, chemical or other intensity-based datasets. In +practice, applies the \code{\link[=transform_vsn]{transform_vsn()}} function. \item \code{vst-deseq2}: Variance Stabilising Transformation, implemented in the -\code{\link[DESeq2]{varianceStabilizingTransformation}} function from the \code{DESeq2} package. -This method is applicable to count data only. In practice, applies the \code{\link{transform_vst}} function. -This transformation is recommended for RNAseq or similar count-based datasets. -\item \code{best-normalize-auto}: most appropriate normalisation method automatically selected from a number -of options, implemented in the \code{\link[bestNormalize]{bestNormalize}} function from the -\code{bestNormalize} package. In practice, applies the \code{\link{transform_bestNormalise_auto}} function. -This transformation is recommended for phenotypes that are each measured on different scales (since the -transformation method selected will potentially be different across the phenotypes), preferably with a -reasonable number of features (less than 100) to avoid large computation times. -\item \code{best-normalize-manual}: performs the same transformation (specified with the \code{method} argument) -to each feature of a dataset. This transformation is recommended for phenotypes data in which the -different phenotypes are measured on the same scale. The different normalisation methods are: +\code{\link[DESeq2:varianceStabilizingTransformation]{DESeq2::varianceStabilizingTransformation()}} function from the \code{DESeq2} +package. This method is applicable to count data only. This transformation is +recommended for RNAseq or similar count-based datasets. In practice, applies +the \code{\link[=transform_vst]{transform_vst()}} function. +\item \code{logx}: log-transformation (default to log2, but base can be specified). +In practice, applies the \code{\link[=transform_logx]{transform_logx()}} function. +\item \code{best-normalize-auto}: most appropriate normalisation method automatically +selected from a number of options, implemented in the +\code{\link[bestNormalize:bestNormalize]{bestNormalize::bestNormalize()}} function from the \code{bestNormalize} package. +This transformation is recommended for phenotypes that are each measured on +different scales (since the transformation method selected will potentially +be different across the features), preferably with a reasonable number of +features (less than 100) to avoid large computation times. In practice, +applies the \code{\link[=transform_bestNormalise_auto]{transform_bestNormalise_auto()}} function. +\item \code{best-normalize-manual}: performs the same transformation (specified +through the \code{method} argument) to each feature of a dataset. This +transformation is recommended for phenotypes data in which the different +phenotypes are measured on the same scale. The different normalisation +methods are: \itemize{ \item \code{"arcsinh_x"}: data is transformed as \code{log(x + sqrt(x^2 + 1))}; \item \code{"boxcox"}: Box Cox transformation; \item \code{"center_scale"}: data is centered and scaled; \item \code{"exp_x"}: data is transformed as \code{exp(x)}; -\item \code{"log_x"}: data is transformed as \code{log_b(x+a)} (\code{a} and \code{b} either selected automatically or passed as -arguments); +\item \code{"log_x"}: data is transformed as \code{log_b(x+a)} (\code{a} and \code{b} either +selected automatically per variable or passed as arguments); \item \code{"orderNorm"}: Ordered Quantile technique; -\item \code{"sqrt_x"}: data transformed as \code{sqrt(x + a)} (\code{a} selected automatically or passed as argument), +\item \code{"sqrt_x"}: data transformed as \code{sqrt(x + a)} (\code{a} selected automatically +per variable or passed as argument), \item \code{"yeojohnson"}: Yeo-Johnson transformation. } } @@ -116,15 +149,30 @@ list( transformed_data_name = "mo_set_transformed" ), - ## Example 2 - with a log2 transformation for the metabolome dataset + ## Example 2 - with a log2 transformation for both datasets + transformation_datasets_factory( + mo_set_complete, + c( + "rnaseq" = "logx", + "metabolome" = "logx" + ), + log_bases = 2, + pre_log_functions = zero_to_half_min + ), + + ## Example 3 - with different log bases for each dataset and a different + ## preprocessing function to be run before applying the log transformation_datasets_factory( mo_set_complete, c( - "rnaseq" = "vst-deseq2", - "metabolome" = "best-normalize-manual" + "rnaseq" = "logx", + "metabolome" = "logx" ), - methods = c("metabolome" = "log_x"), - b = 2 + log_bases = list(rnaseq = 10, metabolome = 2), + pre_log_functions = list( + rnaseq = \(x) x + 0.5, + metabolome = zero_to_half_min + ) ) ) } diff --git a/man/zero_to_half_min.Rd b/man/zero_to_half_min.Rd new file mode 100644 index 0000000..c9b4a82 --- /dev/null +++ b/man/zero_to_half_min.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transformation.R +\name{zero_to_half_min} +\alias{zero_to_half_min} +\title{Replace zeros with half-min in matrix} +\usage{ +zero_to_half_min(mat) +} +\arguments{ +\item{mat}{Numeric matrix.} +} +\value{ +The matrix with zero values replaced. +} +\description{ +Replace zero values in a matrix by half of the minimum non-null value in the +matrix. +} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 039069c..88bef40 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -198,3 +198,10 @@ test_extract_samples_score <- function(x, ld) { dplyr::select(sample_id, score) |> tibble::deframe() } + +test_clean_expression <- function(x) { + as.character(x) |> + stringr::str_remove_all("\n") |> + stringr::str_replace_all("\\s+", " ") |> + stringr::str_remove_all("(?<=function) (?=\\()") +} diff --git a/tests/testthat/test-transformation.R b/tests/testthat/test-transformation.R index f13b8c0..b970b7c 100644 --- a/tests/testthat/test-transformation.R +++ b/tests/testthat/test-transformation.R @@ -117,28 +117,28 @@ test_that("transform_logx works", { mat1 <- matrix(c(0, 1, 2, 3), nrow = 2) expect_equal( - transform_logx(mat1, return_matrix_only = TRUE, base = 2), - log2(mat1 + 0.5) + transform_logx(mat1, return_matrix_only = TRUE, log_base = 2), + log2(zero_to_half_min(mat1)) ) expect_equal( - transform_logx(mat1, return_matrix_only = TRUE, base = 10, pre_log_function = \(x){x + 1}), + transform_logx(mat1, return_matrix_only = TRUE, log_base = 10, pre_log_function = \(x){x + 1}), log10(mat1 + 1) ) expect_warning( - transform_logx(mat1, return_matrix_only = TRUE, base = 10, pre_log_function = NULL), + transform_logx(mat1, return_matrix_only = TRUE, log_base = 10, pre_log_function = \(x){x}), "The matrix contains zero values; log-transformation will yield `-Inf`." ) expect_equal( - transform_logx(mat1, return_matrix_only = FALSE, base = 2), + transform_logx(mat1, return_matrix_only = FALSE, log_base = 2), list( - transformed_data = log2(mat1 + 0.5), - info_transformation = list(log_base = 2, pre_log_function = offset_half_min), + transformed_data = log2(zero_to_half_min(mat1)), + info_transformation = list(log_base = 2, pre_log_function = zero_to_half_min), transformation = "log2" ) ) expect_equal( - transform_logx(mat1, return_matrix_only = FALSE, base = 10, pre_log_function = \(x){x + 1}), + transform_logx(mat1, return_matrix_only = FALSE, log_base = 10, pre_log_function = \(x){x + 1}), list( transformed_data = log10(mat1 + 1), info_transformation = list(log_base = 10, pre_log_function = \(x){x + 1}), @@ -147,17 +147,17 @@ test_that("transform_logx works", { ) }) -test_that("offset_half_min works", { +test_that("zero_to_half_min works", { mat1 <- matrix(c(0, 1, 2, 3), nrow = 2) mat2 <- matrix(c(2, 1, 2, 3), nrow = 2) expect_equal( - offset_half_min(mat1), - matrix(c(0.5, 1.5, 2.5, 3.5), nrow = 2) + zero_to_half_min(mat1), + matrix(c(0.5, 1, 2, 3), nrow = 2) ) expect_equal( - offset_half_min(mat2), + zero_to_half_min(mat2), mat2 ) }) @@ -187,7 +187,7 @@ test_that("transform_dataset works", { ) expect_error( transform_dataset(multiomics_set, "snps+A", "TEST"), - "'transformation' argument: 'TEST' is not a recognised transformation. Possible values are: 'vsn', 'vst-deseq2', 'best-normalize-auto', 'best-normalize-manual'." + "'transformation' argument: 'TEST' is not a recognised transformation. Possible values are: 'vsn', 'vst-deseq2', 'logx', 'best-normalize-auto', 'best-normalize-manual'." ) expect_error(transform_dataset(multiomics_set, "snps+A", "best-normalize-manual"), "'method' argument should be provided for 'best-normalize-manual' transformation.") @@ -203,3 +203,138 @@ test_that("transform_dataset works", { # expect_message(transform_dataset(multiomics_set, dataset = "snps+A", transformation = "best-normalize-auto", method = "center_scale")) }) + +test_that("transformation_datasets_factory works - default", { + tar_res <- transformation_datasets_factory( + mo_set, + c("rnaseq" = "vst", "metabolome" = "vsn") + ) + + expect_type(tar_res, "list") + expect_equal( + purrr::map_chr(tar_res, \(x) x$settings$name), + c("transformations_spec", "transformations_runs_list", "transformed_set") + ) + expect_s3_class(tar_res[[1]], "tar_stem") + expect_s3_class(tar_res[[2]], "tar_pattern") + expect_s3_class(tar_res[[3]], "tar_stem") + + expect_equal( + tar_res[[1]]$command$expr, + str2expression("tibble::tibble( + dsn = names(c(\"rnaseq\" = \"vst\", \"metabolome\" = \"vsn\")), + transf = c(\"rnaseq\" = \"vst\", \"metabolome\" = \"vsn\"), + meth = NULL[[dsn]], + log_b = NULL[[dsn]], + prelog_f = NULL[[dsn]] + ) |> + dplyr::group_by(dsn) |> + targets::tar_group()") + ) + expect_equal( + tar_res[[2]]$command$expr, + str2expression("transform_dataset( + mo_set, + dataset = transformations_spec$dsn, + transformation = transformations_spec$transf, + return_matrix_only = FALSE, + method = transformations_spec$meth, + log_base = transformations_spec$log_b, + pre_log_function = transformations_spec$prelog_f + )") + ) + expect_equal( + tar_res[[3]]$command$expr, + str2expression("get_transformed_data(mo_set, transformations_runs_list)") + ) + + ## Adding prefix + tar_res <- transformation_datasets_factory( + mo_set, + c("rnaseq" = "vst", "metabolome" = "vsn"), + target_name_prefix = "TEST_" + ) + expect_type(tar_res, "list") + expect_equal( + purrr::map_chr(tar_res, \(x) x$settings$name), + c("TEST_transformations_spec", "TEST_transformations_runs_list", "TEST_transformed_set") + ) + expect_s3_class(tar_res[[1]], "tar_stem") + expect_s3_class(tar_res[[2]], "tar_pattern") + expect_s3_class(tar_res[[3]], "tar_stem") + + ## Changing final set name + tar_res <- transformation_datasets_factory( + mo_set, + c("rnaseq" = "vst", "metabolome" = "vsn"), + transformed_data_name = "TEST" + ) + expect_type(tar_res, "list") + expect_equal( + purrr::map_chr(tar_res, \(x) x$settings$name), + c("transformations_spec", "transformations_runs_list", "TEST") + ) + expect_s3_class(tar_res[[1]], "tar_stem") + expect_s3_class(tar_res[[2]], "tar_pattern") + expect_s3_class(tar_res[[3]], "tar_stem") +}) + +test_that("transformation_datasets_factory works - logx", { + tar_res <- transformation_datasets_factory( + mo_set, + c("rnaseq" = "vst", "metabolome" = "logx") + ) + + expect_type(tar_res, "list") + expect_equal(tar_res[[1]]$settings$name, "transformations_spec") + expect_s3_class(tar_res[[1]], "tar_stem") + + expect_equal( + tar_res[[1]]$command$expr |> test_clean_expression(), + expression( + tibble::tibble( + dsn = names(c(rnaseq = "vst", metabolome = "logx")), + transf = c(rnaseq = "vst", metabolome = "logx"), + meth = NULL[[dsn]], + log_b = list(metabolome = 2)[[dsn]], + prelog_f = list(metabolome = function(mat) { + if (!any(mat == 0)) { + return(mat) + } + + min_val <- min(mat[mat != 0]) + mat[mat == 0] <- min_val / 2 + + return(mat) + })[[dsn]] + ) |> + dplyr::group_by(dsn) |> + targets::tar_group()) |> test_clean_expression() + ) + + tar_res <- transformation_datasets_factory( + mo_set, + c("rnaseq" = "logx", "metabolome" = "logx"), + log_bases = list(rnaseq = 10, metabolome = 2), + pre_log_functions = list( + rnaseq = \(x) x + 0.5, + metabolome = \(x) x + 1 + ) + ) + + expect_equal( + tar_res[[1]]$command$expr |> test_clean_expression(), + expression(tibble::tibble( + dsn = names(c(rnaseq = "logx", metabolome = "logx")), + transf = c(rnaseq = "logx", metabolome = "logx"), + meth = NULL[[dsn]], + log_b = list(rnaseq = 10, metabolome = 2)[[dsn]], + prelog_f = list( + rnaseq = \(x) x + 0.5, + metabolome = \(x) x + 1 + )[[dsn]] + ) |> + dplyr::group_by(dsn) |> + targets::tar_group()) |> test_clean_expression() + ) +})