Skip to content

Commit

Permalink
Rework default UCell sets (#112)
Browse files Browse the repository at this point in the history
* Rework default UCell sets
  • Loading branch information
bbimber authored Feb 6, 2024
1 parent fb64839 commit fb98952
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 90 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Imports:
BiocParallel,
ggcorrplot,
magrittr,
Matrix
mlr3verse
Suggests:
devtools,
testthat (>= 2.1.0),
Expand All @@ -52,7 +52,7 @@ Remotes:
mojaveazure/seurat-disk,
carmonalab/UCell,
carmonalab/scGate
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Collate:
'Utils.R'
'CellTypist.R'
Expand Down
2 changes: 1 addition & 1 deletion R/CellTypist.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ RunCellTypist <- function(seuratObj, modelName = "Immune_All_Low.pkl", pThreshol

# Cell typist expects a single column:
tbl <- utils::read.table(geneFile, sep = '\t')
write.table(tbl$V1, file = geneFile, row.names = FALSE, col.names = FALSE)
utils::write.table(tbl$V1, file = geneFile, row.names = FALSE, col.names = FALSE)

# Ensure models present:
if (updateModels) {
Expand Down
44 changes: 22 additions & 22 deletions R/Classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ TrainModel <- function(training_matrix, celltype, hyperparameter_tuning = F, lea
} else {
#Set model-independent values for the autotuner
measure <- msr("classif.ce")
terminator <- trm("evals", n_evals = n_models)
terminator <- mlr3verse::trm("evals", n_evals = n_models)

#Define a tuning space 25% as large as the number of models
#In the case of sensitive hyperparameters, resolution = 5 allows for a low/medium-low/medium/medium-high/high type parameter space
Expand Down Expand Up @@ -101,34 +101,34 @@ TrainModel <- function(training_matrix, celltype, hyperparameter_tuning = F, lea
learner <- mlr3::lrn("classif.ranger", importance = "permutation", predict_type = "prob")

#Define Ranger Hyperparameter Space (RandomBotv2)
tune_ps <- ps(
num.trees = p_int(lower = 10, upper = 2000),
sample.fraction = p_dbl(lower = 0.1, upper = 1),
respect.unordered.factors = p_fct(levels = c("ignore", "order", "partition")),
min.node.size = p_int(lower = 1, upper = 100),
splitrule = p_fct(levels = c("gini", "extratrees")),
num.random.splits = p_int(lower = 1, upper = 100, depends = splitrule == "extratrees")
tune_ps <- mlr3verse::ps(
num.trees = mlr3verse::p_int(lower = 10, upper = 2000),
sample.fraction = mlr3verse::p_dbl(lower = 0.1, upper = 1),
respect.unordered.factors = mlr3verse::p_fct(levels = c("ignore", "order", "partition")),
min.node.size = mlr3verse::p_int(lower = 1, upper = 100),
splitrule = mlr3verse::p_fct(levels = c("gini", "extratrees")),
num.random.splits = mlr3verse::p_int(lower = 1, upper = 100, depends = splitrule == "extratrees")
)
} else if (learner == "classif.xgboost"){
#Update task
task <- mlr3::TaskClassif$new(classification.data, id = "CellTypeBinaryClassifier", target = "celltype_binary")
#Define learner
learner <- mlr3::lrn("classif.xgboost", predict_type = "prob")
#Define XGBoost model's Hyperparameter Space (RandomBotv2)
tune_ps <- ps(
booster = p_fct(levels = c("gblinear", "gbtree", "dart")),
nrounds = p_int(lower = 2, upper = 8, trafo = function(x) as.integer(round(exp(x)))),
eta = p_dbl(lower = -4, upper = 0, trafo = function(x) 10^x),
gamma = p_dbl(lower = -5, upper = 1, trafo = function(x) 10^x),
lambda = p_dbl(lower = -4, upper = 3, trafo = function(x) 10^x),
alpha = p_dbl(lower = -4, upper = 3, trafo = function(x) 10^x),
subsample = p_dbl(lower = 0.1, upper = 1),
max_depth = p_int(lower = 1, upper = 15),
min_child_weight = p_dbl(lower = -1, upper = 0, trafo = function(x) 10^x),
colsample_bytree = p_dbl(lower = 0.1, upper = 1),
colsample_bylevel = p_dbl(lower = 0.1, upper = 1),
rate_drop = p_int(lower = 0, upper = 1, depends = booster == 'dart'),
skip_drop = p_int(lower = 0, upper = 1, depends = booster == 'dart')
tune_ps <- mlr3verse::ps(
booster = mlr3verse::p_fct(levels = c("gblinear", "gbtree", "dart")),
nrounds = mlr3verse::p_int(lower = 2, upper = 8, trafo = function(x) as.integer(round(exp(x)))),
eta = mlr3verse::p_dbl(lower = -4, upper = 0, trafo = function(x) 10^x),
gamma = mlr3verse::p_dbl(lower = -5, upper = 1, trafo = function(x) 10^x),
lambda = mlr3verse::p_dbl(lower = -4, upper = 3, trafo = function(x) 10^x),
alpha = mlr3verse::p_dbl(lower = -4, upper = 3, trafo = function(x) 10^x),
subsample = mlr3verse::p_dbl(lower = 0.1, upper = 1),
max_depth = mlr3verse::p_int(lower = 1, upper = 15),
min_child_weight = mlr3verse::p_dbl(lower = -1, upper = 0, trafo = function(x) 10^x),
colsample_bytree = mlr3verse::p_dbl(lower = 0.1, upper = 1),
colsample_bylevel = mlr3verse::p_dbl(lower = 0.1, upper = 1),
rate_drop = mlr3verse::p_int(lower = 0, upper = 1, depends = booster == 'dart'),
skip_drop = mlr3verse::p_int(lower = 0, upper = 1, depends = booster == 'dart')
)
}
}
Expand Down
78 changes: 15 additions & 63 deletions R/GeneModules.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' @param storeRanks Passed directly to UCell::AddModuleScore_UCell. Increases object size but makes future calculations quicker.
#' @param plotCor If true, a plot of correlations between the UCell score and each component gene will be shown
#' @export
CalculateUCellScores <- function(seuratObj, forceRecalculate = FALSE, seed = GetSeed(), ncores = 1, assayName = 'RNA', storeRanks = FALSE, plotCor = TRUE) {
CalculateUCellScores <- function(seuratObj, forceRecalculate = FALSE, seed = GetSeed(), ncores = 1, assayName = 'RNA', storeRanks = TRUE, plotCor = TRUE) {
toCalculate <- list(
TandNK_Activation = GetGeneSet('TandNK_Activation.1'),
TandNK_ActivationCore = GetGeneSet('TandNK_Activation.Core'),
Expand All @@ -22,7 +22,12 @@ CalculateUCellScores <- function(seuratObj, forceRecalculate = FALSE, seed = Get
NaiveT = GetGeneSet('NaiveT'),
Glycolysis = GetGeneSet('Glycolysis'),
Interferon_Response = GetGeneSet('Interferon_Response'),
Interferon_Response_IFI6 = GetGeneSet('Interferon_Response_IFI6_correlated')
Interferon_Response_IFI6 = GetGeneSet('Interferon_Response_IFI6_correlated'),
Ribosomal = GetGeneSet('MMul10_Ribosomal'),
Mitochondrial = GetGeneSet('MMul10_Mitochondrial'),
EffectorCytokines = GetGeneSet('EffectorCytokines'),
ExhaustionOrInhibitory = GetGeneSet('ExhaustionOrInhibitory'),
MAIT_Markers = GetGeneSet('MAIT_Markers')
)

needsRecalc <- forceRecalculate || any(!paste0(names(toCalculate), '_UCell') %in% names(seuratObj@meta.data))
Expand Down Expand Up @@ -91,67 +96,14 @@ CalculateUCellScores <- function(seuratObj, forceRecalculate = FALSE, seed = Get
print('No reductions calculated, cannot plot tSNE/UMAP')
}

if (any(is.na(seuratObj[['TandNK_Activation_UCell']]))) {
print('Data has NAs, cannot make feature plot: TandNK_Activation_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'TandNK_Activation_UCell', min.cutoff = 'q02', max.cutoff = 'q98') + ggtitle('T Cell Activation Score'))
}
}

if (any(is.na(seuratObj[['TandNK_ActivationCore_UCell']]))) {
print('Data has NAs, cannot make feature plot: TandNK_ActivationCore_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'TandNK_ActivationCore_UCell', min.cutoff = 'q02', max.cutoff = 'q98') + ggtitle('T Cell Activation Score (Core Genes)'))
}
}

if (any(is.na(seuratObj[['Cytotoxicity_UCell']]))) {
print('Data has NAs, cannot make feature plot: Cytotoxicity_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'Cytotoxicity_UCell', min.cutoff = 'q05', max.cutoff = 'q95') + ggtitle('Cytotoxicity Score'))
}
}

if (any(is.na(seuratObj[['EffectorT_UCell']]))) {
print('Data has NAs, cannot make feature plot: EffectorT_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'EffectorT_UCell', min.cutoff = 'q05', max.cutoff = 'q95') + ggtitle('Effector T Score'))
}
}

if (any(is.na(seuratObj[['NaiveT_UCell']]))) {
print('Data has NAs, cannot make feature plot: NaiveT_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'NaiveT_UCell', min.cutoff = 'q05', max.cutoff = 'q95') + ggtitle('Naive T Score'))
}
}

if (any(is.na(seuratObj[['CentralMemT_UCell']]))) {
print('Data has NAs, cannot make feature plot: CentralMemT_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'CentralMemT_UCell', min.cutoff = 'q05', max.cutoff = 'q95') + ggtitle('Central Mem T Score'))
}
}

if (any(is.na(seuratObj[['Glycolysis_UCell']]))) {
print('Data has NAs, cannot make feature plot: Glycolysis_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'Glycolysis_UCell', min.cutoff = 'q05', max.cutoff = 'q95') + ggtitle('Glycolysis'))
}
}

if (any(is.na(seuratObj[['Interferon_Response_UCell']]))) {
print('Data has NAs, cannot make feature plot: Interferon_Response_UCell')
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = 'Interferon_Response_UCell', min.cutoff = 'q05', max.cutoff = 'q95') + ggtitle('Interferon_Response'))
for (geneModule in names(toCalculate)) {
ucell <- paste0(geneModule, '_Cell')
if (any(is.na(seuratObj[[ucell]]))) {
print(paste0('Data has NAs, cannot make feature plot: ', ucell))
} else {
if (hasReductions) {
print(Seurat::FeaturePlot(seuratObj, features = ucell, min.cutoff = 'q02', max.cutoff = 'q98') + ggtitle(geneModule))
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion R/Phenotyping.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ PlotImmuneMarkers <- function(seuratObj, reductions = c('tsne', 'umap')) {
PlotMarkerSeries(seuratObj, reductions = reductions, features = c('HAVCR2'), 'Th1')

# ZBTB16 = PLZF
PlotMarkerSeries(seuratObj, reductions = reductions, features = c('KLRB1', 'CEPBD', 'NCR3', 'ZBTB16', 'RORC', 'SLC4A10', 'DPP4'), 'MAIT')
PlotMarkerSeries(seuratObj, reductions = reductions, features = GetGeneSet('MAIT_Markers'), 'MAIT')

# ZNF683 = HOBIT
# LOC100423131 = XCL1, ENSMMUG00000013779, Lymphotactin
Expand Down Expand Up @@ -329,6 +329,8 @@ GetGeneSet <- function(name) {

.RegisterGeneSet('MMul10TcrConstantRegion', c('LOC710951', 'LOC114677140', 'LOC711031', 'LOC720538', 'LOC705095'))

.RegisterGeneSet('MAIT_Markers', c('KLRB1', 'CEPBD', 'NCR3', 'ZBTB16', 'RORC', 'SLC4A10', 'DPP4'))

# Dysfunction?
# 'MT1A', 'MT2A', 'MT1M'
.RegisterGeneSet('MMul10TcrGenes', c('LOC703029','LOC696306','LOC106999340','LOC106996262','LOC106999345','LOC106997707','LOC106997706','LOC710149','LOC700771','LOC699427','LOC711871','LOC709081','LOC698785','LOC114677052','LOC114676933','LOC106999353','LOC106999351','LOC106999350','LOC106999349','LOC106999348','LOC106999347','LOC106999346','LOC106999343','LOC106999341','LOC106999339','LOC106999337','LOC106999336','LOC106999335','LOC106999312','LOC106997705','LOC106997704','LOC106997703','LOC106997702','LOC106997697','LOC106997453','LOC106997452','LOC106997451','LOC106995765','LOC106992460','LOC106992446','LOC106992434','LOC106992433','LOC720456','LOC716949','LOC716866','LOC711537','LOC711386','LOC711194','LOC711141','LOC711066','LOC710821','LOC710627','LOC710455','LOC710361','LOC710183','LOC710093','LOC709531','LOC708581','LOC708328','LOC704883','LOC703153','LOC702904','LOC702550','LOC702113','LOC701992','LOC701875','LOC701745','LOC701395','LOC701262','LOC701152','LOC700224','LOC700154','LOC700105','LOC699912','LOC699790','LOC699543','LOC699298','LOC699162','LOC698913','LOC698543','LOC698289','LOC698161','LOC697792','LOC697466','LOC697234','LOC697054','LOC696752','LOC696684','LOC696557','LOC696075','LOC695943','LOC114679533','LOC114679531','LOC114677139','LOC114677137','LOC114677136','LOC114677055','LOC114677054','LOC114677050','LOC114677049','LOC114677047','LOC114675766'))
Expand Down
2 changes: 1 addition & 1 deletion man/CalculateUCellScores.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit fb98952

Please sign in to comment.