Skip to content

Commit

Permalink
up
Browse files Browse the repository at this point in the history
  • Loading branch information
TuomasBorman committed Jan 24, 2025
1 parent 17fb518 commit f79bce6
Showing 1 changed file with 122 additions and 103 deletions.
225 changes: 122 additions & 103 deletions R/getShortTermChange.R
Original file line number Diff line number Diff line change
@@ -1,128 +1,147 @@
#' @title Short term Changes in Abundance
#' @name
#' getShortTermChange
#'
#' @export
#'
#' @title
#' Short term changes in abundance
#'
#' @description
#' Calculates short term changes in abundance of taxa using temporal
#' abundance data.
#'
#' @details
#' This approach is used by Wisnoski NI and colleagues
#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on
#' the following calculation log(present abundance/past abundance).
#' Also a compositional version using relative abundance similar to
#' Brian WJi, Sheth R et al
#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used.
#' This approach is useful for identifying short term growth behaviors of taxa.
#'
#' @references
#'
#' Ji, B.W., et al. (2020) Macroecological dynamics of gut microbiota.
#' Nat Microbiol 5, 768–775 . doi: https://doi.org/10.1038/s41564-020-0685-1
#'
#' @return
#' \code{getShortTermChange} returns \code{DataFrame} object containing
#' the short term change in abundance over time for a microbe.
#' \code{addShortTermChange}, on the other hand, returns a
#' \code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}
#' object with these results in its \code{metadata}.
#'
#' @inheritParams addBaselineDivergence
#'
#' @description Calculates short term changes in abundance of taxa
#' using temporal Abundance data.
#'
#' @param x a \code{\link{SummarizedExperiment}} object.
#' @param assay.type \code{Character scalar}. Specifies the name of assay
#' used in calculation. (Default: \code{"counts"})
#' @param name \code{Character scalar}. Specifies a name for storing
#' short term results. (Default: \code{"short_term_change"})
#'
#' @param ... additional arguments.
#'
#'
#' @return \code{getShortTermChange} returns \code{DataFrame} object containing
#' the short term change in abundance over time for a microbe.
#' \code{addShortTermChange}, on the other hand, returns a
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' object with these results in its \code{metadata}.
#'
#' @details This approach is used by Wisnoski NI and colleagues
#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on
#' the following calculation log(present abundance/past abundance).
#' Also a compositional version using relative abundance similar to
#' Brian WJi, Sheth R et al
#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used.
#' This approach is useful for identifying short term growth behaviors of taxa.
#'
#' @name addShortTermChange
#'
#'
#
#' @examples
#'
#' library(miaTime)
#'
#' # Load time series data
#' data(minimalgut)
#' tse <- minimalgut
#'
#' short_time_labels <- c("74.5h", "173h", "438h", "434h", "390h")
#'
#' # Subset samples by Time_label and StudyIdentifier
#' tse <- tse[, !(tse$Time_label %in% short_time_labels)]
#' tse <- tse[, (tse$StudyIdentifier == "Bioreactor A")]
#'
#' # Get short term change
#' # Case of rarefying counts
#'
#' # Get relative abundances
#' tse <- transformAssay(tse, method = "relabundance")
#' getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr")
#'
#' # Case of transforming counts
#' tse <- rarefyAssay(tse, assay.type = "counts")
#' getShortTermChange(tse, assay.type = "subsampled", time.col = "Time.hr")
#' # Calculate short term changes
#' df <- getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr", group = "StudyIdentifier")
#'
#' # Select certain bacteria
#' select <- df[["FeatureID"]] %in% c(
#' "Ruminococcus_bromii", "Coprococcus_catus", "Akkermansia_muciniphila")
#' df <- df[ select, ]
#'
#' # Plot results
#' library(ggplot2)
#' p <- ggplot(df, aes(x = Time.hr, y = growth_rate, colour = FeatureID)) +
#' geom_smooth() +
#' facet_grid(. ~ StudyIdentifier, scales = "free") +
#' scale_y_log10()
#' p
#'
#' @seealso
#' \code{\link[mia:getStepwiseDivergence]{mia::getStepwiseDivergence()}}
NULL

#' @rdname addShortTermChange
#' @rdname getShortTermChange
#' @export
setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"),
function(x, name = "short_term_change", ...){
x <- .check_and_get_altExp(x, ...)
args <- c(list(x = x), list(...)[!names(list(...)) %in% c("altexp")])
# Calculate short term change
res <- do.call(getShortTermChange, args)
# Add to metadata
x <- .add_values_to_metadata(x, name, res, ...)
return(x)
}
)

#' @rdname getShortTermChange
#' @export
#' @importFrom dplyr arrange as_tibble summarize "%>%"
#' @importFrom ggplot2 ggplot aes
#' @importFrom ggrepel geom_text_repel
#' @importFrom mia rarefyAssay transformAssay
#' @importFrom SummarizedExperiment colData
setMethod("getShortTermChange", signature = c(x = "SummarizedExperiment"),
function(x, assay.type = "counts", ...){
function(
x, time.col, assay.type = "counts", time.interval = 1L, group = NULL,
...){
############################## Input check #############################
# Check validity of object
if (nrow(x) == 0L){
stop("No data available in `x` ('x' has nrow(x) == 0L.)",
x <- .check_and_get_altExp(x, ...)
temp <- .check_input(
time.col, list("character scalar"), colnames(colData(x)))
if( !is.numeric(x[[time.col]]) ){
stop("'time.col' must specify numeric column from colData(x)",
call. = FALSE)
}
# Check assay.type
#
.check_assay_present(assay.type, x)
########################### Growth Metrics ############################
grwt <- .calculate_growth_metrics(x, assay.type = assay.type, ...)
# Clean and format growth metrics
grs.all <- .clean_growth_metrics(grwt, ...)
return(grs.all)
#
temp <- .check_input(
group, list(NULL, "character scalar"), colnames(colData(x)))
########################### Input check end ############################
res <- .calculate_growth_metrics(
x, assay.type, time.col, group, time.interval, ...)
return(res)
}
)

#' @rdname addShortTermChange
#' @export
setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"),
function(x, assay.type = "counts", name = "short_term_change", ...){
# Calculate short term change
res <- getShortTermChange(x, ...)
# Add to metadata
x <- .add_values_to_metadata(x, name, res, ...)
return(x)
}
)

################################ HELP FUNCTIONS ################################

# wrapper to calculate growth matrix
.calculate_growth_metrics <- function(x, assay.type, time.col = NULL, ...) {

# Reshape data and calculate growth metrics
assay_data <- meltSE(x, assay.type = assay.type,
add.col = time.col, row.name = "Feature_ID")
assay_data <- assay_data %>%
arrange( !!sym(time.col) ) %>%
group_by(Feature_ID) %>%
#' @importFrom dplyr arrange group_by summarise mutate select
.calculate_growth_metrics <- function(
x, assay.type, time.col, group, time.interval, ...) {
# Get data in long format
df <- meltSE(x, assay.type, add.col = c(time.col, group))
# If there are replicated samples, give warning that average is calculated
if( anyDuplicated(df[, c("FeatureID", group, time.col)]) ){
warning("The dataset contains replicated samples. The average is ",
"calculated for each time point.", call. = FALSE)
}
# Sort data based on time
df <- df |> arrange( !!sym(time.col) )
# Group based on features and time points. If group was specified, take that
# also into account
if( !is.null(group) ){
df <- df |> group_by(!!sym(group), FeatureID, !!sym(time.col))
} else{
df <- df |> group_by(FeatureID, !!sym(time.col))
}
df <- df |>
# Summarize duplicated samples by taking an average
summarise(value = mean(.data[[assay.type]], na.rm = TRUE)) |>
# For each feature in a sample group, calculate growth metrics
mutate(
time_lag = !!sym(time.col) - lag( !!sym(time.col) ),
growth_diff =!!sym(assay.type) - lag(!!sym(assay.type)),
growth_rate = (!!sym(assay.type) - lag(!!sym(assay.type))) / lag(!!sym(assay.type)),
var_abund = (!!sym(assay.type) - lag(!!sym(assay.type))) / time_lag
)
return(assay_data)
}

.clean_growth_metrics <- function(grwt, time.col = NULL, ...) {
# Calculate max growth
maxgrs <- grwt %>%
summarize(max_growth = max(growth_diff, na.rm = TRUE))
# Merge growth data with max growth
grs.all <- merge(grwt, maxgrs, by = "Feature_ID")
# Add 'ismax' column indicating if the growth is the maximum
grs.all <- grs.all %>%
mutate(ismax = ifelse(growth_diff == max_growth, TRUE, FALSE))
# Clean and abbreviate FeatureID names
grs.all$Feature_IDabb <- toupper(abbreviate(grs.all$Feature_ID,
minlength = 3,
method = "both.sides"))
# Create 'Feature.time' column combining abbreviation and time information
grs.all$Feature_time <- paste0(grs.all$Feature_IDabb, " ",
grs.all[[time.col]], "h")

return(grs.all)
time_lag = !!sym(time.col) -
lag(!!sym(time.col), n = time.interval),
growth_diff = value - lag(value, n = time.interval),
growth_rate = (value - lag(value, n = time.interval)) /
lag(value, n = time.interval),
var_abund = (value - lag(value, n = time.interval)) / time_lag
) |>
# Remove value column that includes average abundances
select(-value)
return(df)
}

0 comments on commit f79bce6

Please sign in to comment.