Skip to content

Commit

Permalink
Merge pull request #305 from stemangiola/improve-random-effect-disper…
Browse files Browse the repository at this point in the history
…sion

Improve random effect dispersion
  • Loading branch information
stemangiola authored Dec 7, 2023
2 parents 2ae63e6 + 0693ee1 commit 8c17496
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 11 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: tidybulk
Title: Brings transcriptomics to the tidyverse
Version: 1.15.2
Version: 1.15.3
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
role = c("aut", "cre")),
person("Maria", "Doyle", email = "[email protected]",
Expand Down
9 changes: 8 additions & 1 deletion R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -758,10 +758,17 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
# Reorder counts
counts = counts[,rownames(metadata),drop=FALSE]

# Create design matrix for dispersion, removing random effects
design =
model.matrix(
object = .formula |> eliminate_random_effects(),
data = metadata
)

if(quo_is_symbolic(.dispersion))
dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe()
else
dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))

# # Check dispersion
# if(!names(dispersion) |> sort() |> identical(
Expand Down
19 changes: 13 additions & 6 deletions R/functions_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -1133,10 +1133,17 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
.data %>%
assay(my_assay)

# Create design matrix for dispersion, removing random effects
design =
model.matrix(
object = .formula |> eliminate_random_effects(),
data = metadata
)

if(quo_is_symbolic(.dispersion))
dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
else
dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))

# # Check dispersion
# if(!names(dispersion) |> sort() |> identical(
Expand Down Expand Up @@ -1171,11 +1178,11 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
# # Add raw object
# attach_to_internals(glmmSeq_object, "glmmSeq") %>%

# Communicate the attribute added
{
rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once")
(.)
} %>%
# # Communicate the attribute added
# {
# rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once")
# (.)
# } %>%

# Attach prefix
setNames(c(
Expand Down
29 changes: 29 additions & 0 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -2600,6 +2600,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#' All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.
#'
#' Underlying method for edgeR framework:
#'
#' .data |>
#'
#' # Filter
Expand All @@ -2623,7 +2624,10 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#' edgeR::glmQLFit(design) |> // or glmFit according to choice
#' edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) // or glmLRT according to choice
#'
#'
#'
#' Underlying method for DESeq2 framework:
#'
#' keep_abundant(
#' factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]),
#' minimum_counts = minimum_counts,
Expand All @@ -2636,6 +2640,31 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#' DESeq2::results()
#'
#'
#'
#' Underlying method for glmmSeq framework:
#'
#' counts =
#' .data %>%
#' assay(my_assay)
#'
#' # Create design matrix for dispersion, removing random effects
#' design =
#' model.matrix(
#' object = .formula |> eliminate_random_effects(),
#' data = metadata
#' )
#'
#' dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
#'
#' glmmSeq( .formula,
#' countdata = counts ,
#' metadata = metadata |> as.data.frame(),
#' dispersion = dispersion,
#' progress = TRUE,
#' method = method |> str_remove("(?i)^glmmSeq_" ),
#' )
#'
#'
#' @return A consistent object (to the input) with additional columns for the statistics from the test (e.g., log fold change, p-value and false discovery rate).
#'
#'
Expand Down
30 changes: 29 additions & 1 deletion R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -1524,4 +1524,32 @@ feature__ = get_special_column_name_symbol(".feature")
sample__ = get_special_column_name_symbol(".sample")



#' Produced using ChatGPT - Eliminate Random Effects from a Formula
#'
#' This function takes a mixed-effects model formula and returns a modified
#' formula with all random effects removed, leaving only the fixed effects.
#'
#' @param formula An object of class \code{formula}, representing a mixed-effects model formula.
#' @return A formula object with random effects parts removed.
#' @examples
#' eliminate_random_effects(~ age_days * sex + (1 | file_id) + ethnicity_simplified + assay_simplified + .aggregated_cells + (1 + age_days * sex | tissue))
#' @noRd
#' @importFrom stats as.formula
eliminate_random_effects <- function(formula) {
# Convert the formula to a string
formula_str <- deparse(formula)

# Split the string by '+' while keeping the brackets content together
split_str <- unlist(strsplit(formula_str, "(?<=\\))\\s*\\+\\s*|\\+\\s*(?=\\()", perl = TRUE))

# Filter out the random effects parts
fixed_effects <- grep("\\|", split_str, value = TRUE, invert = TRUE)

# Combine the fixed effects parts back into a single string
modified_str <- paste(fixed_effects, collapse = " + ")

# Convert the modified string back to a formula
modified_formula <- as.formula(modified_str)

return(modified_formula)
}
29 changes: 29 additions & 0 deletions man/test_differential_abundance-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-bulk_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ test_that("differential trancript abundance - random effects",{
pull(P_condition_adjusted) |>
head(4) |>
expect_equal(
c(0.02381167, 0.01097209, 0.01056741, 0.02381167),
c(0.03643414, 0.02938584, 0.02938584, 0.03643414),
tolerance=1e-3
)

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-bulk_methods_SummarizedExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ test_that("differential trancript abundance - random effects SE",{
rowData(res)[,"P_condition_adjusted"] |>
head(4) |>
expect_equal(
c(0.1065866, 0.1109067, 0.1116562 , NA),
c(0.03394914, 0.03394914, 0.03394914, NA),
tolerance=1e-2
)

Expand Down

0 comments on commit 8c17496

Please sign in to comment.