Skip to content

Commit

Permalink
bug fix for example datasets and secom functions
Browse files Browse the repository at this point in the history
  • Loading branch information
linh11 committed Mar 25, 2023
1 parent 4d089f2 commit 891f10f
Show file tree
Hide file tree
Showing 29 changed files with 602 additions and 543 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: ANCOMBC
Type: Package
Title: Microbiome differential abudance and correlation analyses with bias
correction
Version: 2.1.2
Version: 2.1.3
Description: ANCOMBC is a package containing differential abundance (DA) and
correlation analyses for microbiome data. Specifically, the package includes
Analysis of Compositions of Microbiomes with Bias Correction 2 (ANCOM-BC2),
Expand All @@ -14,7 +14,7 @@ Description: ANCOMBC is a package containing differential abundance (DA) and
sequencing efficiencies (taxon-specific biases). Methodologies included in
the ANCOMBC package are designed to correct these biases and construct
statistically consistent estimators.
Date: 2022-12-17
Date: 2023-03-25
Authors@R: c(
person(given = "Huang",
family = "Lin",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ importFrom(energy,dcor.test)
importFrom(foreach,"%:%")
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(foreach,registerDoSEQ)
importFrom(lme4,lmerControl)
importFrom(lmerTest,lmer)
importFrom(magrittr,"%>%")
Expand Down
2 changes: 1 addition & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
version 2.0.xx (last update 2022-12-17)
+ add ancombc2 function
+ Add ancombc2 function

version 1.6.xx (last update 2021-08-13)
+ Fixed bugs
Expand Down
27 changes: 16 additions & 11 deletions R/ancom.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@
#' @examples
#' library(ANCOMBC)
#' library(mia)
#' data(hitchip1006)
#' data(atlas1006, package = "microbiome")
#' tse = mia::makeTreeSummarizedExperimentFromPhyloseq(atlas1006)
#'
#' # subset to baseline
#' tse = hitchip1006[, hitchip1006$time == 0]
#' tse = tse[, tse$time == 0]
#'
#' # run ancom function
#' set.seed(123)
Expand Down Expand Up @@ -153,7 +154,7 @@
#' @importFrom lme4 lmerControl
#' @importFrom dplyr mutate
#' @importFrom parallel makeCluster stopCluster
#' @importFrom foreach foreach %dopar%
#' @importFrom foreach foreach %dopar% registerDoSEQ
#' @importFrom doParallel registerDoParallel
#' @importFrom magrittr %>%
#' @importFrom rlang .data
Expand Down Expand Up @@ -292,10 +293,15 @@ ancom = function(data = NULL, assay_name = "counts", tax_level = NULL,
}

idx1 = NULL

if (n_cl > 1) {
cl = parallel::makeCluster(n_cl)
doParallel::registerDoParallel(cl)
} else {
foreach::registerDoSEQ()
}

if (main_cat == 0) {
cl = makeCluster(n_cl)
registerDoParallel(cl)

result = foreach(idx1 = seq_len(n_tax), .combine = comb, .multicombine = TRUE) %dopar% {
alr_data = apply(comp_table, 1, function(x) x - comp_table[idx1, ])
alr_data = cbind(alr_data, meta_data)
Expand Down Expand Up @@ -338,11 +344,7 @@ ancom = function(data = NULL, assay_name = "counts", tax_level = NULL,

list(p_vec, beta_vec)
}
stopCluster(cl)
} else {
cl = makeCluster(n_cl)
registerDoParallel(cl)

result = foreach(idx1 = seq_len(n_tax), .combine = comb, .multicombine = TRUE) %dopar% {
alr_data = apply(comp_table, 1, function(x) x - comp_table[idx1, ])
alr_data = cbind(alr_data, meta_data)
Expand Down Expand Up @@ -382,7 +384,10 @@ ancom = function(data = NULL, assay_name = "counts", tax_level = NULL,

list(p_vec, beta_vec)
}
stopCluster(cl)
}

if (n_cl > 1) {
parallel::stopCluster(cl)
}

p_data = result[[1]]
Expand Down
22 changes: 15 additions & 7 deletions R/ancombc.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,11 @@
#'
#' #========================Run ANCOMBC Using a Real Data=======================
#' library(ANCOMBC)
#' data(hitchip1006)
#' data(atlas1006, package = "microbiome")
#' tse = mia::makeTreeSummarizedExperimentFromPhyloseq(atlas1006)
#'
#' # subset to baseline
#' tse = hitchip1006[, hitchip1006$time == 0]
#' tse = tse[, tse$time == 0]
#'
#' # run ancombc function
#' set.seed(123)
Expand Down Expand Up @@ -208,7 +209,7 @@
#' @importFrom TreeSummarizedExperiment TreeSummarizedExperiment
#' @importFrom S4Vectors DataFrame SimpleList
#' @importFrom parallel makeCluster stopCluster
#' @importFrom foreach foreach %dopar%
#' @importFrom foreach foreach %dopar% registerDoSEQ
#' @importFrom doParallel registerDoParallel
#' @importFrom doRNG %dorng%
#' @importFrom MASS ginv
Expand All @@ -224,9 +225,13 @@ ancombc = function(data = NULL, assay_name = "counts",
conserve = FALSE, alpha = 0.05, global = FALSE,
n_cl = 1, verbose = FALSE){
message("'ancombc' is deprecated \n", "Use 'ancombc2' instead")

cl = parallel::makeCluster(n_cl)
doParallel::registerDoParallel(cl)

if (n_cl > 1) {
cl = parallel::makeCluster(n_cl)
doParallel::registerDoParallel(cl)
} else {
foreach::registerDoSEQ()
}

# 1. Data pre-processing
# TSE data construction
Expand Down Expand Up @@ -415,7 +420,10 @@ ancombc = function(data = NULL, assay_name = "counts",
samp_frac = theta_hat, delta_em = delta_em,
delta_wls = delta_wls, res = res, res_global = res_global)

parallel::stopCluster(cl)
if (n_cl > 1) {
parallel::stopCluster(cl)
}

return(out)
}

Expand Down
126 changes: 71 additions & 55 deletions R/ancombc2.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,11 @@
#' Default is "counts".
#' See \code{?SummarizedExperiment::assay} for more details.
#' @param tax_level character. The taxonomic level of interest. The input data
#' can be agglomerated at different taxonomic levels based on your research
#' interest. Default is NULL, i.e., do not perform agglomeration, and the
#' can be analyzed at any taxonomic level without prior agglomeration.
#' Note that \code{tax_level} must be a value from \code{taxonomyRanks}, which
#' includes "Kingdom", "Phylum" "Class", "Order", "Family" "Genus" or "Species".
#' See \code{?mia::taxonomyRanks} for more details.
#' Default is NULL, i.e., do not perform agglomeration, and the
#' ANCOM-BC2 anlysis will be performed at the lowest taxonomic level of the
#' input \code{data}.
#' @param fix_formula the character string expresses how the microbial absolute
Expand Down Expand Up @@ -276,19 +279,20 @@
#'
#' #=======================Run ANCOMBC2 Using a Real Data=======================
#' library(ANCOMBC)
#' data(dietswap)
#' data(dietswap, package = "microbiome")
#' tse = mia::makeTreeSummarizedExperimentFromPhyloseq(dietswap)
#'
#' colData(dietswap)$bmi_group = factor(colData(dietswap)$bmi_group,
#' levels = c("obese",
#' "overweight",
#' "lean"))
#' colData(tse)$bmi_group = factor(colData(tse)$bmi_group,
#' levels = c("obese",
#' "overweight",
#' "lean"))
#'
#' set.seed(123)
#' # Note that setting pseudo_sens = FALSE, max_iter = 1, and B = 1 is
#' # only for the sake of speed
#' # Set pseudo_sens = TRUE, and use default or larger values for max_iter and B
#' # for better performance
#' out = ancombc2(data = dietswap, assay_name = "counts", tax_level = "Phylum",
#' out = ancombc2(data = tse, assay_name = "counts", tax_level = "Phylum",
#' fix_formula = "nationality + timepoint + bmi_group",
#' rand_formula = "(timepoint | subject)",
#' p_adj_method = "holm", pseudo = 0, pseudo_sens = FALSE,
Expand Down Expand Up @@ -329,7 +333,7 @@
#' \insertRef{guo2010controlling}{ANCOMBC}
#'
#' \insertRef{grandhi2016multiple}{ANCOMBC}
#'
#'
#' @rawNamespace import(stats, except = filter)
#' @importFrom utils combn
#' @importFrom mia makeTreeSummarizedExperimentFromPhyloseq taxonomyRanks agglomerateByRank
Expand All @@ -342,7 +346,7 @@
#' @importFrom emmeans emmeans contrast
#' @importFrom CVXR Variable Minimize Problem solve matrix_frac
#' @importFrom parallel makeCluster stopCluster
#' @importFrom foreach foreach %dopar% %:%
#' @importFrom foreach foreach %dopar% %:% registerDoSEQ
#' @importFrom doParallel registerDoParallel
#' @importFrom doRNG %dorng%
#' @importFrom rngtools setRNG
Expand All @@ -369,8 +373,12 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
node = NULL,
solver = "ECOS",
B = 100)){
cl = parallel::makeCluster(n_cl)
doParallel::registerDoParallel(cl)
if (n_cl > 1) {
cl = parallel::makeCluster(n_cl)
doParallel::registerDoParallel(cl)
} else {
foreach::registerDoSEQ()
}

# 1. Data pre-processing
# TSE data construction
Expand Down Expand Up @@ -474,13 +482,16 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
message("Estimating sample-specific biases ...")
}
fun_list = list(bias_em)

bias1 = foreach(i = seq_len(ncol(beta1)), .combine = rbind) %dorng% {
output = fun_list[[1]](beta = beta1[, i],
var_hat = var_hat1[, i],
tol = em_control$tol,
max_iter = em_control$max_iter)
output = fun_list[[1]](beta = beta1[, i],
var_hat = var_hat1[, i],
tol = em_control$tol,
max_iter = em_control$max_iter)
}
bias1 = data.frame(bias1, row.names = fix_eff, check.names = FALSE)
colnames(bias1) = c("delta_em", "delta_wls", "var_delta")

delta_em = bias1$delta_em
delta_wls = bias1$delta_wls
var_delta = bias1$var_delta
Expand Down Expand Up @@ -564,43 +575,45 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
}
pseudo_list = seq(0, 1, 0.01)
tformula = paste0("~ ", fix_formula)

pseudo_sens_tab = foreach(i = seq_len(n_tax), .combine = rbind) %dorng% {
count1 = core2$feature_table[i, ]
nlp_tab = vector(mode = "numeric")
for (j in seq_along(pseudo_list)) {
pseudo = pseudo_list[j]
count2 = count1 + pseudo
log_count = log(count2)
log_count[is.infinite(log_count)] = 0
log_resid = log_count - mean(log_count)
log_resid_crt = log_resid - theta_hat
df = data.frame(y = log_resid_crt, meta_data)
lm_fit = stats::lm(formula(paste("y", tformula)), data = df)
# Negative log p-value
nlp = -log(summary(lm_fit)$coef[, "Pr(>|t|)"])
if (global) {
anova_fit = anova(lm_fit)
nlp_global = -log(anova_fit$`Pr(>F)`[grep(group, rownames(anova_fit))])
nlp = c(nlp, global = nlp_global)
}
if (pairwise) {
emm_fit = emmeans::emmeans(lm_fit, specs = group)
emm_pair = emmeans::contrast(emm_fit, "pairwise",
adjust = "none")
nlp_pair = -log(summary(emm_pair)[, "p.value"])
names(nlp_pair) = summary(emm_pair)[, "contrast"]
nlp = c(nlp, nlp_pair)
}
nlp_tab = cbind(nlp_tab, nlp)
count1 = core2$feature_table[i, ]
nlp_tab = vector(mode = "numeric")
for (j in seq_along(pseudo_list)) {
pseudo = pseudo_list[j]
count2 = count1 + pseudo
log_count = log(count2)
log_count[is.infinite(log_count)] = 0
log_resid = log_count - mean(log_count)
log_resid_crt = log_resid - theta_hat
df = data.frame(y = log_resid_crt, meta_data)
lm_fit = stats::lm(formula(paste("y", tformula)), data = df)
# Negative log p-value
nlp = -log(summary(lm_fit)$coef[, "Pr(>|t|)"])
if (global) {
anova_fit = anova(lm_fit)
nlp_global = -log(anova_fit$`Pr(>F)`[grep(group, rownames(anova_fit))])
nlp = c(nlp, global = nlp_global)
}
if (pairwise | trend) {
emm_fit = emmeans::emmeans(lm_fit, specs = group)
emm_pair = emmeans::contrast(emm_fit, "pairwise",
adjust = "none")
nlp_pair = -log(summary(emm_pair)[, "p.value"])
names(nlp_pair) = summary(emm_pair)[, "contrast"]
nlp = c(nlp, nlp_pair)
}
apply(nlp_tab, 1, function(x) {
ifelse(all(x > -log(alpha)), 0, sd(x))
})
nlp_tab = cbind(nlp_tab, nlp)
}
apply(nlp_tab, 1, function(x) {
ifelse(all(x > -log(alpha)), 0, sd(x))
})
}
pseudo_sens_tab = as.data.frame(pseudo_sens_tab)
pseudo_sens_tab = cbind(data.frame(taxon = tax_name),
pseudo_sens_tab)
pseudo_sens_tab = data.frame(taxon = tax_name,
pseudo_sens_tab,
check.names = FALSE)
rownames(pseudo_sens_tab) = NULL

} else {
pseudo_sens_tab = NULL
}
Expand All @@ -617,10 +630,10 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
alpha = alpha)
} else { res_global = NULL }

# 6. Results of pairwise test
# 6. Results of multiple pairwise comparisons
if (pairwise) {
if (verbose) {
message("ANCOM-BC2 pairwise directional test ...")
message("ANCOM-BC2 multiple pairwise comparisons ...")
}
res_pair = ancombc_pair(x = x, group = group,
beta_hat = beta_hat,
Expand Down Expand Up @@ -652,7 +665,7 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
# 7. Results of Dunnet's type of test
if (dunnet) {
if (verbose) {
message("ANCOM-BC2 Dunnet's type of test ...")
message("ANCOM-BC2 multiple pairwise comparisons against the reference group ...")
}
res_dunn = ancombc_dunn(x = x, group = group,
beta_hat = beta_hat, var_hat = var_hat,
Expand Down Expand Up @@ -680,10 +693,10 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
res_dunn = NULL
}

# 8. Results of trend test
# 8. Results of pattern analysis
if (trend) {
if (verbose) {
message("ANCOM-BC2 trend test ...")
message("ANCOM-BC2 pattern analysis ...")
}
res_trend = ancombc_trend(
x = x, group = group, beta_hat = beta_hat,
Expand Down Expand Up @@ -722,7 +735,10 @@ ancombc2 = function(data, assay_name = "counts", tax_level = NULL,
res_dunn = res_dunn,
res_trend = res_trend)

parallel::stopCluster(cl)
if (n_cl > 1) {
parallel::stopCluster(cl)
}

return(out)
}

Expand Down
Loading

0 comments on commit 891f10f

Please sign in to comment.