Skip to content

Commit

Permalink
add lm method in trans_alpha and trans_diff
Browse files Browse the repository at this point in the history
  • Loading branch information
ChiLiubio committed Feb 8, 2024
1 parent ae828d4 commit 8c1050a
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 45 deletions.
92 changes: 56 additions & 36 deletions R/trans_alpha.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ trans_alpha <- R6Class(classname = "trans_alpha",
#' For multi-way anova, Please use \code{formula} parameter to specify the model and see \code{\link{aov}} for more details}
#' \item{\strong{'scheirerRayHare'}}{Scheirer Ray Hare test (nonparametric test) for a two-way factorial experiment;
#' see \code{scheirerRayHare} function of \code{rcompanion} package}
#' \item{\strong{'lm'}}{Linear Model based on the \code{lm} function}
#' \item{\strong{'lme'}}{Linear Mixed Effect Model based on the \code{lmerTest} package}
#' \item{\strong{'betareg'}}{Beta Regression for Rates and Proportions based on the \code{betareg} package}
#' \item{\strong{'glmm'}}{Generalized linear mixed model (GLMM) based on the \code{glmmTMB} package}
Expand Down Expand Up @@ -115,7 +116,7 @@ trans_alpha <- R6Class(classname = "trans_alpha",
#' t1$cal_diff(method = "anova")
#' }
cal_diff = function(
method = c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lme", "betareg", "glmm", "glmm_beta")[1],
method = c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm", "lme", "betareg", "glmm", "glmm_beta")[1],
measure = NULL,
p_adjust_method = "fdr",
formula = NULL,
Expand All @@ -125,15 +126,15 @@ trans_alpha <- R6Class(classname = "trans_alpha",
return_model = FALSE,
...
){
method <- match.arg(method, c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lme", "betareg", "glmm", "glmm_beta"))
method <- match.arg(method, c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm", "lme", "betareg", "glmm", "glmm_beta"))
group <- self$group

if(method %in% c("scheirerRayHare", "lme", "betareg", "glmm", "glmm_beta") & is.null(formula)){
if(method %in% c("scheirerRayHare", "lm", "lme", "betareg", "glmm", "glmm_beta") & is.null(formula)){
if(is.null(formula)){
stop("formula is necessary! Please provide formula parameter!")
}
}
if(!method %in% c("anova", "scheirerRayHare", "lme", "betareg", "glmm", "glmm_beta")){
if(!method %in% c("anova", "scheirerRayHare", "lm", "lme", "betareg", "glmm", "glmm_beta")){
if(is.null(group)){
stop("For the method: ", method, " , group is necessary! Please recreate the object!")
}
Expand Down Expand Up @@ -321,58 +322,77 @@ trans_alpha <- R6Class(classname = "trans_alpha",
compare_result$Significance <- generate_p_siglabel(compare_result$P.unadj, nonsig = "ns")
method <- paste0(method, "-formula")
}
if(method %in% c("lme", "glmm", "glmm_beta")){
if(method %in% c("lm", "lme", "glmm", "glmm_beta")){
compare_result <- data.frame()
if(return_model){
res_model <- list()
}
for(k in measure){
div_table <- data_alpha[data_alpha$Measure == k, ]
tmp_res <- data.frame()
if(method == "lme"){
tmp <- lmerTest::lmer(reformulate(formula, "Value"), data = div_table, ...)
if(method == "lm"){
tmp <- stats::lm(reformulate(formula, "Value"), data = div_table, ...)
if(return_model){
res_model[[k]] <- tmp
}
tmp_summary <- summary(tmp)
tmp_coefficients <- as.data.frame(tmp_summary$coefficients, check.names = FALSE)
tmp_model_R2 <- performance::r2(tmp)
tmp_model_p <- anova(tmp)
tmp_random_p <- lmerTest::ranova(tmp)
tmp_res <- data.frame(method = paste0(method, " formula for ", formula),
Measure = k,
Factors = c("Model", rownames(tmp_model_p), rownames(tmp_random_p), rownames(tmp_coefficients)),
Conditional_R2 = c(tmp_model_R2$R2_conditional, rep(NA, nrow(tmp_model_p) + nrow(tmp_random_p) + length(rownames(tmp_coefficients)))),
Marginal_R2 = c(tmp_model_R2$R2_marginal, rep(NA, nrow(tmp_model_p) + nrow(tmp_random_p) + length(rownames(tmp_coefficients)))),
Estimate = c(NA, rep(NA, nrow(tmp_model_p) + nrow(tmp_random_p)), tmp_coefficients$Estimate),
P.unadj = c(NA, tmp_model_p$`Pr(>F)`, tmp_random_p$`Pr(>Chisq)`, tmp_coefficients$`Pr(>|t|)`)
Factors = c("Model", rownames(tmp_model_p), rownames(tmp_coefficients)),
R2 = c(tmp_model_R2$R2, rep(NA, nrow(tmp_model_p) + length(rownames(tmp_coefficients)))),
R2_adjusted = c(tmp_model_R2$R2_adjusted, rep(NA, nrow(tmp_model_p) + length(rownames(tmp_coefficients)))),
Estimate = c(NA, rep(NA, nrow(tmp_model_p)), tmp_coefficients$Estimate),
P.unadj = c(NA, tmp_model_p$`Pr(>F)`, tmp_coefficients$`Pr(>|t|)`)
)
}else{
if(method == "glmm"){
tmp <- glmmTMB::glmmTMB(reformulate(formula, "Value"), data = div_table, ...)
if(method == "lme"){
tmp <- lmerTest::lmer(reformulate(formula, "Value"), data = div_table, ...)
if(return_model){
res_model[[k]] <- tmp
}
tmp_summary <- summary(tmp)
tmp_coefficients <- as.data.frame(tmp_summary$coefficients, check.names = FALSE)
tmp_model_R2 <- performance::r2(tmp)
tmp_model_p <- anova(tmp)
tmp_random_p <- lmerTest::ranova(tmp)
tmp_res <- data.frame(method = paste0(method, " formula for ", formula),
Measure = k,
Factors = c("Model", rownames(tmp_model_p), rownames(tmp_random_p), rownames(tmp_coefficients)),
Conditional_R2 = c(tmp_model_R2$R2_conditional, rep(NA, nrow(tmp_model_p) + nrow(tmp_random_p) + length(rownames(tmp_coefficients)))),
Marginal_R2 = c(tmp_model_R2$R2_marginal, rep(NA, nrow(tmp_model_p) + nrow(tmp_random_p) + length(rownames(tmp_coefficients)))),
Estimate = c(NA, rep(NA, nrow(tmp_model_p) + nrow(tmp_random_p)), tmp_coefficients$Estimate),
P.unadj = c(NA, tmp_model_p$`Pr(>F)`, tmp_random_p$`Pr(>Chisq)`, tmp_coefficients$`Pr(>|t|)`)
)
}else{
tmp <- glmmTMB::glmmTMB(reformulate(formula, "Value"), data = div_table, family = glmmTMB::beta_family(link = "logit"), ...)
}
if(return_model){
res_model[[k]] <- tmp
}
tmp_summary <- summary(tmp)
tmp_coefficients <- as.data.frame(tmp_summary$coefficients$cond, check.names = FALSE)
tmp_model_p <- car::Anova(tmp)
tmp_model_R2 <- performance::r2(tmp)
test <- try(tmp_model_R2$R2_conditional, silent = TRUE)
if(inherits(test, "try-error")) {
message("R2 unavailable for ", k, " !")
tmp_model_R2 <- list(R2_conditional = NA, R2_marginal = NA)
if(method == "glmm"){
tmp <- glmmTMB::glmmTMB(reformulate(formula, "Value"), data = div_table, ...)
}else{
tmp <- glmmTMB::glmmTMB(reformulate(formula, "Value"), data = div_table, family = glmmTMB::beta_family(link = "logit"), ...)
}
if(return_model){
res_model[[k]] <- tmp
}
tmp_summary <- summary(tmp)
tmp_coefficients <- as.data.frame(tmp_summary$coefficients$cond, check.names = FALSE)
tmp_model_p <- car::Anova(tmp)
tmp_model_R2 <- performance::r2(tmp)
test <- try(tmp_model_R2$R2_conditional, silent = TRUE)
if(inherits(test, "try-error")) {
message("R2 unavailable for ", k, " !")
tmp_model_R2 <- list(R2_conditional = NA, R2_marginal = NA)
}
tmp_res <- data.frame(method = paste0(method, " formula for ", formula),
Measure = k,
Factors = c("Model", rownames(tmp_model_p), rownames(tmp_coefficients)),
Conditional_R2 = c(tmp_model_R2$R2_conditional, rep(NA, nrow(tmp_model_p) + length(rownames(tmp_coefficients)))),
Marginal_R2 = c(tmp_model_R2$R2_marginal, rep(NA, nrow(tmp_model_p) + length(rownames(tmp_coefficients)))),
Estimate = c(NA, rep(NA, nrow(tmp_model_p)), tmp_coefficients$Estimate),
P.unadj = c(NA, tmp_model_p$`Pr(>Chisq)`, tmp_coefficients$`Pr(>|z|)`)
)
}
tmp_res <- data.frame(method = paste0(method, " formula for ", formula),
Measure = k,
Factors = c("Model", rownames(tmp_model_p), rownames(tmp_coefficients)),
Conditional_R2 = c(tmp_model_R2$R2_conditional, rep(NA, nrow(tmp_model_p) + length(rownames(tmp_coefficients)))),
Marginal_R2 = c(tmp_model_R2$R2_marginal, rep(NA, nrow(tmp_model_p) + length(rownames(tmp_coefficients)))),
Estimate = c(NA, rep(NA, nrow(tmp_model_p)), tmp_coefficients$Estimate),
P.unadj = c(NA, tmp_model_p$`Pr(>Chisq)`, tmp_coefficients$`Pr(>|z|)`)
)
}
compare_result %<>% rbind(., tmp_res)
}
Expand Down
11 changes: 6 additions & 5 deletions R/trans_diff.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ trans_diff <- R6Class(classname = "trans_diff",
#' \item{\strong{'anova'}}{ANOVA for one-way or multi-factor analysis; see \code{cal_diff} function of \code{trans_alpha} class}
#' \item{\strong{'scheirerRayHare'}}{Scheirer Ray Hare test for nonparametric test used for a two-way factorial experiment;
#' see \code{scheirerRayHare} function of \code{rcompanion} package}
#' \item{\strong{'lm'}}{Linear Model based on the \code{lm} function}
#' \item{\strong{'ancombc2'}}{Analysis of Compositions of Microbiomes with Bias Correction (ANCOM-BC)
#' based on the \code{ancombc2} function from \code{ANCOMBC} package;
#' only support the case that \code{group} is same with \code{fix_formula} parameter in order to get well-organized output table;
Expand Down Expand Up @@ -138,7 +139,7 @@ trans_diff <- R6Class(classname = "trans_diff",
#' }
initialize = function(
dataset = NULL,
method = c("lefse", "rf", "metastat", "metagenomeSeq", "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare",
method = c("lefse", "rf", "metastat", "metagenomeSeq", "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm",
"ancombc2", "ALDEx2_t", "ALDEx2_kw", "DESeq2", "linda", "maaslin2", "betareg", "lme", "glmm", "glmm_beta")[1],
group = NULL,
taxa_level = "all",
Expand Down Expand Up @@ -170,11 +171,11 @@ trans_diff <- R6Class(classname = "trans_diff",
message("Input dataset is NULL. Please run the functions with customized data ...")
}else{
method <- match.arg(method, c("lefse", "rf", "metastat", "metagenomeSeq", "KW", "KW_dunn", "wilcox", "t.test",
"anova", "scheirerRayHare", "ancombc2", "ALDEx2_t", "ALDEx2_kw", "DESeq2", "linda", "maaslin2", "betareg", "lme", "glmm", "glmm_beta"))
"anova", "scheirerRayHare", "lm", "ancombc2", "ALDEx2_t", "ALDEx2_kw", "DESeq2", "linda", "maaslin2", "betareg", "lme", "glmm", "glmm_beta"))

tmp_dataset <- clone(dataset)
sampleinfo <- tmp_dataset$sample_table
if(is.null(group) & ! method %in% c("anova", "scheirerRayHare", "betareg", "lme", "glmm", "glmm_beta", "maaslin2")){
if(is.null(group) & ! method %in% c("anova", "scheirerRayHare", "lm", "betareg", "lme", "glmm", "glmm_beta", "maaslin2")){
stop("The group parameter is necessary for differential test method: ", method, " !")
}
if(!is.null(group)){
Expand Down Expand Up @@ -211,7 +212,7 @@ trans_diff <- R6Class(classname = "trans_diff",
abund_table <- filter_output$abund_table
filter_features <- filter_output$filter_features

if(method %in% c("lefse", "rf", "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "betareg", "lme", "glmm", "glmm_beta")){
if(method %in% c("lefse", "rf", "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm", "betareg", "lme", "glmm", "glmm_beta")){
if(remove_unknown){
abund_table %<>% {.[!grepl("__$|uncultured$|Incertae..edis$|_sp$", rownames(.), ignore.case = TRUE), ]}
message(nrow(abund_table), " features are remained after removing unknown features ...")
Expand All @@ -224,7 +225,7 @@ trans_diff <- R6Class(classname = "trans_diff",
self$lefse_norm <- lefse_norm
}

if(method %in% c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "betareg", "lme", "glmm", "glmm_beta")){
if(method %in% c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm", "betareg", "lme", "glmm", "glmm_beta")){
abund_foralpha <- abund_table
if(!is.null(transformation)){
message("Perform the transformation with method: ", transformation, " ...")
Expand Down
5 changes: 3 additions & 2 deletions man/trans_alpha.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/trans_diff.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8c1050a

Please sign in to comment.