From f79bce66d2ddc9c9a7467462d7c574407900a676 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Fri, 24 Jan 2025 17:03:07 +0200 Subject: [PATCH] up --- R/getShortTermChange.R | 225 ++++++++++++++++++++++------------------- 1 file changed, 122 insertions(+), 103 deletions(-) diff --git a/R/getShortTermChange.R b/R/getShortTermChange.R index 72aab05..9a331d6 100644 --- a/R/getShortTermChange.R +++ b/R/getShortTermChange.R @@ -1,128 +1,147 @@ -#' @title Short term Changes in Abundance +#' @name +#' getShortTermChange +#' +#' @export +#' +#' @title +#' Short term changes in abundance +#' +#' @description +#' Calculates short term changes in abundance of taxa using temporal +#' abundance data. +#' +#' @details +#' This approach is used by Wisnoski NI and colleagues +#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on +#' the following calculation log(present abundance/past abundance). +#' Also a compositional version using relative abundance similar to +#' Brian WJi, Sheth R et al +#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used. +#' This approach is useful for identifying short term growth behaviors of taxa. +#' +#' @references +#' +#' Ji, B.W., et al. (2020) Macroecological dynamics of gut microbiota. +#' Nat Microbiol 5, 768–775 . doi: https://doi.org/10.1038/s41564-020-0685-1 +#' +#' @return +#' \code{getShortTermChange} returns \code{DataFrame} object containing +#' the short term change in abundance over time for a microbe. +#' \code{addShortTermChange}, on the other hand, returns a +#' \code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} +#' object with these results in its \code{metadata}. +#' +#' @inheritParams addBaselineDivergence #' -#' @description Calculates short term changes in abundance of taxa -#' using temporal Abundance data. -#' -#' @param x a \code{\link{SummarizedExperiment}} object. -#' @param assay.type \code{Character scalar}. Specifies the name of assay -#' used in calculation. (Default: \code{"counts"}) #' @param name \code{Character scalar}. Specifies a name for storing #' short term results. (Default: \code{"short_term_change"}) +#' #' @param ... additional arguments. -#' -#' -#' @return \code{getShortTermChange} returns \code{DataFrame} object containing -#' the short term change in abundance over time for a microbe. -#' \code{addShortTermChange}, on the other hand, returns a -#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} -#' object with these results in its \code{metadata}. -#' -#' @details This approach is used by Wisnoski NI and colleagues -#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on -#' the following calculation log(present abundance/past abundance). -#' Also a compositional version using relative abundance similar to -#' Brian WJi, Sheth R et al -#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used. -#' This approach is useful for identifying short term growth behaviors of taxa. -#' -#' @name addShortTermChange -#' -#' +# #' @examples -#' +#' library(miaTime) +#' #' # Load time series data #' data(minimalgut) #' tse <- minimalgut -#' -#' short_time_labels <- c("74.5h", "173h", "438h", "434h", "390h") -#' -#' # Subset samples by Time_label and StudyIdentifier -#' tse <- tse[, !(tse$Time_label %in% short_time_labels)] -#' tse <- tse[, (tse$StudyIdentifier == "Bioreactor A")] -#' -#' # Get short term change -#' # Case of rarefying counts +#' +#' # Get relative abundances #' tse <- transformAssay(tse, method = "relabundance") -#' getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr") -#' -#' # Case of transforming counts -#' tse <- rarefyAssay(tse, assay.type = "counts") -#' getShortTermChange(tse, assay.type = "subsampled", time.col = "Time.hr") +#' # Calculate short term changes +#' df <- getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr", group = "StudyIdentifier") +#' +#' # Select certain bacteria +#' select <- df[["FeatureID"]] %in% c( +#' "Ruminococcus_bromii", "Coprococcus_catus", "Akkermansia_muciniphila") +#' df <- df[ select, ] +#' +#' # Plot results +#' library(ggplot2) +#' p <- ggplot(df, aes(x = Time.hr, y = growth_rate, colour = FeatureID)) + +#' geom_smooth() + +#' facet_grid(. ~ StudyIdentifier, scales = "free") + +#' scale_y_log10() +#' p +#' +#' @seealso +#' \code{\link[mia:getStepwiseDivergence]{mia::getStepwiseDivergence()}} NULL -#' @rdname addShortTermChange +#' @rdname getShortTermChange +#' @export +setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"), + function(x, name = "short_term_change", ...){ + x <- .check_and_get_altExp(x, ...) + args <- c(list(x = x), list(...)[!names(list(...)) %in% c("altexp")]) + # Calculate short term change + res <- do.call(getShortTermChange, args) + # Add to metadata + x <- .add_values_to_metadata(x, name, res, ...) + return(x) + } +) + +#' @rdname getShortTermChange #' @export -#' @importFrom dplyr arrange as_tibble summarize "%>%" -#' @importFrom ggplot2 ggplot aes -#' @importFrom ggrepel geom_text_repel -#' @importFrom mia rarefyAssay transformAssay -#' @importFrom SummarizedExperiment colData setMethod("getShortTermChange", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = "counts", ...){ + function( + x, time.col, assay.type = "counts", time.interval = 1L, group = NULL, + ...){ ############################## Input check ############################# - # Check validity of object - if (nrow(x) == 0L){ - stop("No data available in `x` ('x' has nrow(x) == 0L.)", + x <- .check_and_get_altExp(x, ...) + temp <- .check_input( + time.col, list("character scalar"), colnames(colData(x))) + if( !is.numeric(x[[time.col]]) ){ + stop("'time.col' must specify numeric column from colData(x)", call. = FALSE) } - # Check assay.type + # .check_assay_present(assay.type, x) - ########################### Growth Metrics ############################ - grwt <- .calculate_growth_metrics(x, assay.type = assay.type, ...) - # Clean and format growth metrics - grs.all <- .clean_growth_metrics(grwt, ...) - return(grs.all) + # + temp <- .check_input( + group, list(NULL, "character scalar"), colnames(colData(x))) + ########################### Input check end ############################ + res <- .calculate_growth_metrics( + x, assay.type, time.col, group, time.interval, ...) + return(res) } ) -#' @rdname addShortTermChange -#' @export -setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = "counts", name = "short_term_change", ...){ - # Calculate short term change - res <- getShortTermChange(x, ...) - # Add to metadata - x <- .add_values_to_metadata(x, name, res, ...) - return(x) - } -) - ################################ HELP FUNCTIONS ################################ # wrapper to calculate growth matrix -.calculate_growth_metrics <- function(x, assay.type, time.col = NULL, ...) { - - # Reshape data and calculate growth metrics - assay_data <- meltSE(x, assay.type = assay.type, - add.col = time.col, row.name = "Feature_ID") - assay_data <- assay_data %>% - arrange( !!sym(time.col) ) %>% - group_by(Feature_ID) %>% +#' @importFrom dplyr arrange group_by summarise mutate select +.calculate_growth_metrics <- function( + x, assay.type, time.col, group, time.interval, ...) { + # Get data in long format + df <- meltSE(x, assay.type, add.col = c(time.col, group)) + # If there are replicated samples, give warning that average is calculated + if( anyDuplicated(df[, c("FeatureID", group, time.col)]) ){ + warning("The dataset contains replicated samples. The average is ", + "calculated for each time point.", call. = FALSE) + } + # Sort data based on time + df <- df |> arrange( !!sym(time.col) ) + # Group based on features and time points. If group was specified, take that + # also into account + if( !is.null(group) ){ + df <- df |> group_by(!!sym(group), FeatureID, !!sym(time.col)) + } else{ + df <- df |> group_by(FeatureID, !!sym(time.col)) + } + df <- df |> + # Summarize duplicated samples by taking an average + summarise(value = mean(.data[[assay.type]], na.rm = TRUE)) |> + # For each feature in a sample group, calculate growth metrics mutate( - time_lag = !!sym(time.col) - lag( !!sym(time.col) ), - growth_diff =!!sym(assay.type) - lag(!!sym(assay.type)), - growth_rate = (!!sym(assay.type) - lag(!!sym(assay.type))) / lag(!!sym(assay.type)), - var_abund = (!!sym(assay.type) - lag(!!sym(assay.type))) / time_lag - ) - return(assay_data) -} - -.clean_growth_metrics <- function(grwt, time.col = NULL, ...) { - # Calculate max growth - maxgrs <- grwt %>% - summarize(max_growth = max(growth_diff, na.rm = TRUE)) - # Merge growth data with max growth - grs.all <- merge(grwt, maxgrs, by = "Feature_ID") - # Add 'ismax' column indicating if the growth is the maximum - grs.all <- grs.all %>% - mutate(ismax = ifelse(growth_diff == max_growth, TRUE, FALSE)) - # Clean and abbreviate FeatureID names - grs.all$Feature_IDabb <- toupper(abbreviate(grs.all$Feature_ID, - minlength = 3, - method = "both.sides")) - # Create 'Feature.time' column combining abbreviation and time information - grs.all$Feature_time <- paste0(grs.all$Feature_IDabb, " ", - grs.all[[time.col]], "h") - - return(grs.all) + time_lag = !!sym(time.col) - + lag(!!sym(time.col), n = time.interval), + growth_diff = value - lag(value, n = time.interval), + growth_rate = (value - lag(value, n = time.interval)) / + lag(value, n = time.interval), + var_abund = (value - lag(value, n = time.interval)) / time_lag + ) |> + # Remove value column that includes average abundances + select(-value) + return(df) }