From f79bce66d2ddc9c9a7467462d7c574407900a676 Mon Sep 17 00:00:00 2001
From: TuomasBorman <tvborm@utu.fi>
Date: Fri, 24 Jan 2025 17:03:07 +0200
Subject: [PATCH] up

---
 R/getShortTermChange.R | 225 ++++++++++++++++++++++-------------------
 1 file changed, 122 insertions(+), 103 deletions(-)

diff --git a/R/getShortTermChange.R b/R/getShortTermChange.R
index 72aab05..9a331d6 100644
--- a/R/getShortTermChange.R
+++ b/R/getShortTermChange.R
@@ -1,128 +1,147 @@
-#' @title Short term Changes in Abundance
+#' @name
+#' getShortTermChange
+#'
+#' @export
+#'
+#' @title
+#' Short term changes in abundance
+#'
+#' @description
+#' Calculates short term changes in abundance of taxa using temporal
+#' abundance data.
+#'
+#' @details
+#' This approach is used by Wisnoski NI and colleagues
+#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on
+#' the following calculation log(present abundance/past abundance).
+#' Also a compositional version using relative abundance similar to
+#' Brian WJi, Sheth R et al
+#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used.
+#' This approach is useful for identifying short term growth behaviors of taxa.
+#'
+#' @references
+#'
+#' Ji, B.W., et al. (2020) Macroecological dynamics of gut microbiota.
+#' Nat Microbiol 5, 768–775 . doi: https://doi.org/10.1038/s41564-020-0685-1
+#'
+#' @return
+#' \code{getShortTermChange} returns \code{DataFrame} object containing
+#' the short term change in abundance over time for a microbe.
+#' \code{addShortTermChange}, on the other hand, returns a
+#' \code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}
+#' object with these results in its \code{metadata}.
+#'
+#' @inheritParams addBaselineDivergence
 #'
-#' @description Calculates short term changes in abundance of taxa
-#'              using temporal Abundance data.
-#'  
-#' @param x a \code{\link{SummarizedExperiment}} object.
-#' @param assay.type \code{Character scalar}. Specifies the name of assay 
-#'   used in calculation. (Default: \code{"counts"})
 #' @param name \code{Character scalar}. Specifies a name for storing
 #' short term results. (Default: \code{"short_term_change"})
+#'
 #' @param ... additional arguments.
-#' 
-#' 
-#' @return \code{getShortTermChange} returns \code{DataFrame} object containing 
-#' the short term change in abundance over time for a microbe. 
-#' \code{addShortTermChange}, on the other hand, returns a
-#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
-#' object with these results in its \code{metadata}.
-#' 
-#' @details This approach is used by Wisnoski NI and colleagues
-#'          \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on
-#'          the following calculation log(present abundance/past abundance).
-#'          Also a compositional version using relative abundance similar to
-#'          Brian WJi, Sheth R et al
-#'          \url{https://www.nature.com/articles/s41564-020-0685-1} can be used.
-#'          This approach is useful for identifying short term growth behaviors of taxa.
-#'          
-#' @name addShortTermChange
-#' 
-#' 
+#
 #' @examples
-#' 
+#' library(miaTime)
+#'
 #' # Load time series data
 #' data(minimalgut)
 #' tse <- minimalgut
-#' 
-#' short_time_labels <- c("74.5h", "173h", "438h", "434h", "390h")
-#' 
-#' # Subset samples by Time_label and StudyIdentifier
-#' tse <- tse[, !(tse$Time_label %in% short_time_labels)]
-#' tse <- tse[, (tse$StudyIdentifier == "Bioreactor A")]
-#' 
-#' # Get short term change
-#' # Case of rarefying counts
+#'
+#' # Get relative abundances
 #' tse <- transformAssay(tse, method = "relabundance")
-#' getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr")
-#' 
-#' # Case of transforming counts
-#' tse <- rarefyAssay(tse, assay.type = "counts")
-#' getShortTermChange(tse, assay.type = "subsampled", time.col = "Time.hr")
+#' # Calculate short term changes
+#' df <- getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr", group = "StudyIdentifier")
+#'
+#' # Select certain bacteria
+#' select <- df[["FeatureID"]] %in% c(
+#'     "Ruminococcus_bromii", "Coprococcus_catus", "Akkermansia_muciniphila")
+#' df <- df[ select, ]
+#'
+#' # Plot results
+#' library(ggplot2)
+#' p <- ggplot(df, aes(x = Time.hr, y = growth_rate, colour = FeatureID)) +
+#'     geom_smooth() +
+#'     facet_grid(. ~ StudyIdentifier, scales = "free") +
+#'     scale_y_log10()
+#' p
+#'
+#' @seealso
+#' \code{\link[mia:getStepwiseDivergence]{mia::getStepwiseDivergence()}}
 NULL
 
-#' @rdname addShortTermChange
+#' @rdname getShortTermChange
+#' @export
+setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"),
+    function(x, name = "short_term_change", ...){
+        x <- .check_and_get_altExp(x, ...)
+        args <- c(list(x = x), list(...)[!names(list(...)) %in% c("altexp")])
+        # Calculate short term change
+        res <- do.call(getShortTermChange, args)
+        # Add to metadata
+        x <- .add_values_to_metadata(x, name, res, ...)
+        return(x)
+    }
+)
+
+#' @rdname getShortTermChange
 #' @export
-#' @importFrom dplyr arrange as_tibble summarize "%>%"
-#' @importFrom ggplot2 ggplot aes
-#' @importFrom ggrepel geom_text_repel
-#' @importFrom mia rarefyAssay transformAssay
-#' @importFrom SummarizedExperiment colData
 setMethod("getShortTermChange", signature = c(x = "SummarizedExperiment"),
-    function(x, assay.type = "counts", ...){
+    function(
+        x, time.col, assay.type = "counts", time.interval = 1L, group = NULL,
+        ...){
         ############################## Input check #############################
-        # Check validity of object
-        if (nrow(x) == 0L){
-            stop("No data available in `x` ('x' has nrow(x) == 0L.)",
+        x <- .check_and_get_altExp(x, ...)
+        temp <- .check_input(
+            time.col, list("character scalar"), colnames(colData(x)))
+        if( !is.numeric(x[[time.col]]) ){
+            stop("'time.col' must specify numeric column from colData(x)",
                  call. = FALSE)
         }
-        # Check assay.type
+        #
         .check_assay_present(assay.type, x)
-        ########################### Growth Metrics ############################
-        grwt <- .calculate_growth_metrics(x, assay.type = assay.type, ...)
-        # Clean and format growth metrics
-        grs.all <- .clean_growth_metrics(grwt, ...)
-        return(grs.all)
+        #
+        temp <- .check_input(
+            group, list(NULL, "character scalar"), colnames(colData(x)))
+        ########################### Input check end ############################
+        res <- .calculate_growth_metrics(
+            x, assay.type, time.col, group, time.interval, ...)
+        return(res)
     }
 )
 
-#' @rdname addShortTermChange
-#' @export
-setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"),
-          function(x, assay.type = "counts", name = "short_term_change", ...){
-              # Calculate short term change
-              res <- getShortTermChange(x, ...)
-              # Add to metadata
-              x <- .add_values_to_metadata(x, name, res, ...)
-              return(x)
-          }
-)
-
 ################################ HELP FUNCTIONS ################################
 
 # wrapper to calculate growth matrix
-.calculate_growth_metrics <- function(x, assay.type, time.col = NULL, ...) {
-
-    # Reshape data and calculate growth metrics
-    assay_data <- meltSE(x, assay.type = assay.type, 
-                         add.col = time.col, row.name = "Feature_ID")
-    assay_data <- assay_data %>%
-        arrange( !!sym(time.col) ) %>%
-        group_by(Feature_ID) %>%
+#' @importFrom dplyr arrange group_by summarise mutate select
+.calculate_growth_metrics <- function(
+        x, assay.type, time.col, group, time.interval, ...) {
+    # Get data in long format
+    df <- meltSE(x, assay.type, add.col = c(time.col, group))
+    # If there are replicated samples, give warning that average is calculated
+    if( anyDuplicated(df[, c("FeatureID", group, time.col)]) ){
+        warning("The dataset contains replicated samples. The average is ",
+                "calculated for each time point.", call. = FALSE)
+    }
+    # Sort data based on time
+    df <- df |> arrange( !!sym(time.col) )
+    # Group based on features and time points. If group was specified, take that
+    # also into account
+    if( !is.null(group) ){
+        df <- df |> group_by(!!sym(group), FeatureID, !!sym(time.col))
+    } else{
+        df <- df |> group_by(FeatureID, !!sym(time.col))
+    }
+    df <- df |>
+        # Summarize duplicated samples by taking an average
+        summarise(value = mean(.data[[assay.type]], na.rm = TRUE)) |>
+        # For each feature in a sample group, calculate growth metrics
         mutate(
-            time_lag = !!sym(time.col) - lag( !!sym(time.col) ), 
-            growth_diff =!!sym(assay.type) - lag(!!sym(assay.type)),
-            growth_rate = (!!sym(assay.type) - lag(!!sym(assay.type))) / lag(!!sym(assay.type)),
-            var_abund = (!!sym(assay.type) - lag(!!sym(assay.type))) / time_lag
-        )
-    return(assay_data)
-}
-
-.clean_growth_metrics <- function(grwt, time.col = NULL, ...) {
-    # Calculate max growth
-    maxgrs <- grwt %>%
-        summarize(max_growth = max(growth_diff, na.rm = TRUE))
-    # Merge growth data with max growth
-    grs.all <- merge(grwt, maxgrs, by = "Feature_ID")
-    # Add 'ismax' column indicating if the growth is the maximum
-    grs.all <- grs.all %>%
-        mutate(ismax = ifelse(growth_diff == max_growth, TRUE, FALSE))
-    # Clean and abbreviate FeatureID names
-    grs.all$Feature_IDabb <- toupper(abbreviate(grs.all$Feature_ID, 
-                                               minlength = 3, 
-                                               method = "both.sides"))
-    # Create 'Feature.time' column combining abbreviation and time information
-    grs.all$Feature_time <- paste0(grs.all$Feature_IDabb, " ", 
-                                   grs.all[[time.col]], "h")
-    
-    return(grs.all)
+            time_lag = !!sym(time.col) -
+                lag(!!sym(time.col), n = time.interval),
+            growth_diff = value - lag(value, n = time.interval),
+            growth_rate = (value - lag(value, n = time.interval)) /
+                lag(value, n = time.interval),
+            var_abund = (value - lag(value, n = time.interval)) / time_lag
+        ) |>
+        # Remove value column that includes average abundances
+        select(-value)
+    return(df)
 }