-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
17fb518
commit f79bce6
Showing
1 changed file
with
122 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,128 +1,147 @@ | ||
#' @title Short term Changes in Abundance | ||
#' @name | ||
#' getShortTermChange | ||
#' | ||
#' @export | ||
#' | ||
#' @title | ||
#' Short term changes in abundance | ||
#' | ||
#' @description | ||
#' Calculates short term changes in abundance of taxa using temporal | ||
#' abundance data. | ||
#' | ||
#' @details | ||
#' This approach is used by Wisnoski NI and colleagues | ||
#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on | ||
#' the following calculation log(present abundance/past abundance). | ||
#' Also a compositional version using relative abundance similar to | ||
#' Brian WJi, Sheth R et al | ||
#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used. | ||
#' This approach is useful for identifying short term growth behaviors of taxa. | ||
#' | ||
#' @references | ||
#' | ||
#' Ji, B.W., et al. (2020) Macroecological dynamics of gut microbiota. | ||
#' Nat Microbiol 5, 768–775 . doi: https://doi.org/10.1038/s41564-020-0685-1 | ||
#' | ||
#' @return | ||
#' \code{getShortTermChange} returns \code{DataFrame} object containing | ||
#' the short term change in abundance over time for a microbe. | ||
#' \code{addShortTermChange}, on the other hand, returns a | ||
#' \code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} | ||
#' object with these results in its \code{metadata}. | ||
#' | ||
#' @inheritParams addBaselineDivergence | ||
#' | ||
#' @description Calculates short term changes in abundance of taxa | ||
#' using temporal Abundance data. | ||
#' | ||
#' @param x a \code{\link{SummarizedExperiment}} object. | ||
#' @param assay.type \code{Character scalar}. Specifies the name of assay | ||
#' used in calculation. (Default: \code{"counts"}) | ||
#' @param name \code{Character scalar}. Specifies a name for storing | ||
#' short term results. (Default: \code{"short_term_change"}) | ||
#' | ||
#' @param ... additional arguments. | ||
#' | ||
#' | ||
#' @return \code{getShortTermChange} returns \code{DataFrame} object containing | ||
#' the short term change in abundance over time for a microbe. | ||
#' \code{addShortTermChange}, on the other hand, returns a | ||
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} | ||
#' object with these results in its \code{metadata}. | ||
#' | ||
#' @details This approach is used by Wisnoski NI and colleagues | ||
#' \url{https://github.com/nwisnoski/ul-seedbank}. Their approach is based on | ||
#' the following calculation log(present abundance/past abundance). | ||
#' Also a compositional version using relative abundance similar to | ||
#' Brian WJi, Sheth R et al | ||
#' \url{https://www.nature.com/articles/s41564-020-0685-1} can be used. | ||
#' This approach is useful for identifying short term growth behaviors of taxa. | ||
#' | ||
#' @name addShortTermChange | ||
#' | ||
#' | ||
# | ||
#' @examples | ||
#' | ||
#' library(miaTime) | ||
#' | ||
#' # Load time series data | ||
#' data(minimalgut) | ||
#' tse <- minimalgut | ||
#' | ||
#' short_time_labels <- c("74.5h", "173h", "438h", "434h", "390h") | ||
#' | ||
#' # Subset samples by Time_label and StudyIdentifier | ||
#' tse <- tse[, !(tse$Time_label %in% short_time_labels)] | ||
#' tse <- tse[, (tse$StudyIdentifier == "Bioreactor A")] | ||
#' | ||
#' # Get short term change | ||
#' # Case of rarefying counts | ||
#' | ||
#' # Get relative abundances | ||
#' tse <- transformAssay(tse, method = "relabundance") | ||
#' getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr") | ||
#' | ||
#' # Case of transforming counts | ||
#' tse <- rarefyAssay(tse, assay.type = "counts") | ||
#' getShortTermChange(tse, assay.type = "subsampled", time.col = "Time.hr") | ||
#' # Calculate short term changes | ||
#' df <- getShortTermChange(tse, assay.type = "relabundance", time.col = "Time.hr", group = "StudyIdentifier") | ||
#' | ||
#' # Select certain bacteria | ||
#' select <- df[["FeatureID"]] %in% c( | ||
#' "Ruminococcus_bromii", "Coprococcus_catus", "Akkermansia_muciniphila") | ||
#' df <- df[ select, ] | ||
#' | ||
#' # Plot results | ||
#' library(ggplot2) | ||
#' p <- ggplot(df, aes(x = Time.hr, y = growth_rate, colour = FeatureID)) + | ||
#' geom_smooth() + | ||
#' facet_grid(. ~ StudyIdentifier, scales = "free") + | ||
#' scale_y_log10() | ||
#' p | ||
#' | ||
#' @seealso | ||
#' \code{\link[mia:getStepwiseDivergence]{mia::getStepwiseDivergence()}} | ||
NULL | ||
|
||
#' @rdname addShortTermChange | ||
#' @rdname getShortTermChange | ||
#' @export | ||
setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"), | ||
function(x, name = "short_term_change", ...){ | ||
x <- .check_and_get_altExp(x, ...) | ||
args <- c(list(x = x), list(...)[!names(list(...)) %in% c("altexp")]) | ||
# Calculate short term change | ||
res <- do.call(getShortTermChange, args) | ||
# Add to metadata | ||
x <- .add_values_to_metadata(x, name, res, ...) | ||
return(x) | ||
} | ||
) | ||
|
||
#' @rdname getShortTermChange | ||
#' @export | ||
#' @importFrom dplyr arrange as_tibble summarize "%>%" | ||
#' @importFrom ggplot2 ggplot aes | ||
#' @importFrom ggrepel geom_text_repel | ||
#' @importFrom mia rarefyAssay transformAssay | ||
#' @importFrom SummarizedExperiment colData | ||
setMethod("getShortTermChange", signature = c(x = "SummarizedExperiment"), | ||
function(x, assay.type = "counts", ...){ | ||
function( | ||
x, time.col, assay.type = "counts", time.interval = 1L, group = NULL, | ||
...){ | ||
############################## Input check ############################# | ||
# Check validity of object | ||
if (nrow(x) == 0L){ | ||
stop("No data available in `x` ('x' has nrow(x) == 0L.)", | ||
x <- .check_and_get_altExp(x, ...) | ||
temp <- .check_input( | ||
time.col, list("character scalar"), colnames(colData(x))) | ||
if( !is.numeric(x[[time.col]]) ){ | ||
stop("'time.col' must specify numeric column from colData(x)", | ||
call. = FALSE) | ||
} | ||
# Check assay.type | ||
# | ||
.check_assay_present(assay.type, x) | ||
########################### Growth Metrics ############################ | ||
grwt <- .calculate_growth_metrics(x, assay.type = assay.type, ...) | ||
# Clean and format growth metrics | ||
grs.all <- .clean_growth_metrics(grwt, ...) | ||
return(grs.all) | ||
# | ||
temp <- .check_input( | ||
group, list(NULL, "character scalar"), colnames(colData(x))) | ||
########################### Input check end ############################ | ||
res <- .calculate_growth_metrics( | ||
x, assay.type, time.col, group, time.interval, ...) | ||
return(res) | ||
} | ||
) | ||
|
||
#' @rdname addShortTermChange | ||
#' @export | ||
setMethod("addShortTermChange", signature = c(x = "SummarizedExperiment"), | ||
function(x, assay.type = "counts", name = "short_term_change", ...){ | ||
# Calculate short term change | ||
res <- getShortTermChange(x, ...) | ||
# Add to metadata | ||
x <- .add_values_to_metadata(x, name, res, ...) | ||
return(x) | ||
} | ||
) | ||
|
||
################################ HELP FUNCTIONS ################################ | ||
|
||
# wrapper to calculate growth matrix | ||
.calculate_growth_metrics <- function(x, assay.type, time.col = NULL, ...) { | ||
|
||
# Reshape data and calculate growth metrics | ||
assay_data <- meltSE(x, assay.type = assay.type, | ||
add.col = time.col, row.name = "Feature_ID") | ||
assay_data <- assay_data %>% | ||
arrange( !!sym(time.col) ) %>% | ||
group_by(Feature_ID) %>% | ||
#' @importFrom dplyr arrange group_by summarise mutate select | ||
.calculate_growth_metrics <- function( | ||
x, assay.type, time.col, group, time.interval, ...) { | ||
# Get data in long format | ||
df <- meltSE(x, assay.type, add.col = c(time.col, group)) | ||
# If there are replicated samples, give warning that average is calculated | ||
if( anyDuplicated(df[, c("FeatureID", group, time.col)]) ){ | ||
warning("The dataset contains replicated samples. The average is ", | ||
"calculated for each time point.", call. = FALSE) | ||
} | ||
# Sort data based on time | ||
df <- df |> arrange( !!sym(time.col) ) | ||
# Group based on features and time points. If group was specified, take that | ||
# also into account | ||
if( !is.null(group) ){ | ||
df <- df |> group_by(!!sym(group), FeatureID, !!sym(time.col)) | ||
} else{ | ||
df <- df |> group_by(FeatureID, !!sym(time.col)) | ||
} | ||
df <- df |> | ||
# Summarize duplicated samples by taking an average | ||
summarise(value = mean(.data[[assay.type]], na.rm = TRUE)) |> | ||
# For each feature in a sample group, calculate growth metrics | ||
mutate( | ||
time_lag = !!sym(time.col) - lag( !!sym(time.col) ), | ||
growth_diff =!!sym(assay.type) - lag(!!sym(assay.type)), | ||
growth_rate = (!!sym(assay.type) - lag(!!sym(assay.type))) / lag(!!sym(assay.type)), | ||
var_abund = (!!sym(assay.type) - lag(!!sym(assay.type))) / time_lag | ||
) | ||
return(assay_data) | ||
} | ||
|
||
.clean_growth_metrics <- function(grwt, time.col = NULL, ...) { | ||
# Calculate max growth | ||
maxgrs <- grwt %>% | ||
summarize(max_growth = max(growth_diff, na.rm = TRUE)) | ||
# Merge growth data with max growth | ||
grs.all <- merge(grwt, maxgrs, by = "Feature_ID") | ||
# Add 'ismax' column indicating if the growth is the maximum | ||
grs.all <- grs.all %>% | ||
mutate(ismax = ifelse(growth_diff == max_growth, TRUE, FALSE)) | ||
# Clean and abbreviate FeatureID names | ||
grs.all$Feature_IDabb <- toupper(abbreviate(grs.all$Feature_ID, | ||
minlength = 3, | ||
method = "both.sides")) | ||
# Create 'Feature.time' column combining abbreviation and time information | ||
grs.all$Feature_time <- paste0(grs.all$Feature_IDabb, " ", | ||
grs.all[[time.col]], "h") | ||
|
||
return(grs.all) | ||
time_lag = !!sym(time.col) - | ||
lag(!!sym(time.col), n = time.interval), | ||
growth_diff = value - lag(value, n = time.interval), | ||
growth_rate = (value - lag(value, n = time.interval)) / | ||
lag(value, n = time.interval), | ||
var_abund = (value - lag(value, n = time.interval)) / time_lag | ||
) |> | ||
# Remove value column that includes average abundances | ||
select(-value) | ||
return(df) | ||
} |