From 980ee33d03ea0a756cb1dbbe190fb6e40d9d9328 Mon Sep 17 00:00:00 2001 From: brgew Date: Thu, 16 May 2024 15:53:08 -0700 Subject: [PATCH] Improve matrix_control handling. --- R/apply_transforms.R | 96 -------------------------------- R/io.R | 21 +++++-- R/matrix.R | 84 +++++++++++++++++++++++----- R/pca.R | 127 +++++++++++++++++++++++++++++-------------- R/preprocess_cds.R | 14 ++++- R/projection.R | 55 +++++++++---------- R/utils.R | 9 ++- R/zzz.R | 13 ++--- 8 files changed, 223 insertions(+), 196 deletions(-) delete mode 100644 R/apply_transforms.R diff --git a/R/apply_transforms.R b/R/apply_transforms.R deleted file mode 100644 index e80176c..0000000 --- a/R/apply_transforms.R +++ /dev/null @@ -1,96 +0,0 @@ -sparse_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale=vscale, block_size=NULL, cores=1, verbose=FALSE) { - if(verbose) { - message('sparse_apply_transform: start') - } - - if(!is.null(block_size)) { - block_size0 <- DelayedArray::getAutoBlockSize() - DelayedArray::setAutoBlockSize(block_size) - } - - # Thank you Maddy. - intersect_genes <- intersect(rownames(rotation_matrix), rownames(FM)) - intersect_indices <- match(intersect_genes, rownames(rotation_matrix)) - - if(any(is.na(intersect_indices))) { - stop('gene sets differ: genes in the subject matrix are not in the reference matrix') - } - - if((length(intersect_indices)/length(rownames(FM))) < 0.5) { - warning('fewer than half the genes in the subject matrix are also in the reference matrix: are the matrices prepared using the same gene set?') - } - - # [intersect_genes,] orders FM rows by intersect_genes - FM <- FM[intersect_genes,] - - xt <- Matrix::t(FM) - xtda <- DelayedArray::DelayedArray(xt) - - vcenter <- vcenter[intersect_indices] - vscale <- vscale[intersect_indices] - - xtdasc <- t(xtda) - vcenter - xtdasc <- t(xtdasc / vscale) - - irlba_res <- list() -# irlba_res$x <- xtdasc %*% rotation_matrix[intersect_indices,] - irlba_res$x <- matrix_multiply_multicore(mat_a=xtdasc, - mat_b=rotation_matrix[intersect_indices,], - cores) - irlba_res$x <- as.matrix(irlba_res$x) - class(irlba_res) <- c('irlba_prcomp', 'prcomp') - - if(verbose) { - message('sparse_apply_transform: finish') - } - - return(irlba_res) -} - - -bpcells_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale=vscale, pca_control=list(), verbose=FALSE) { - if(verbose) { - message('bpcells_apply_transform: start') - } - - # Thank you Maddy. - intersect_genes <- intersect(rownames(rotation_matrix), rownames(FM)) - intersect_indices <- match(intersect_genes, rownames(rotation_matrix)) - - if(any(is.na(intersect_indices))) { - stop('gene sets differ: genes in the subject matrix are not in the reference matrix') - } - - if((length(intersect_indices)/length(rownames(FM))) < 0.5) { - warning('fewer than half the genes in the subject matrix are also in the reference matrix: are the matrices prepared using the same gene set?') - } - - # bge - # [intersect_genes,] orders FM rows by intersect_genes - # This almost certainly requires rewriting the matrix. - FM <- FM[intersect_genes,] - - xt <- BPCells::t(FM) - - vcenter <- vcenter[intersect_indices] - vscale <- vscale[intersect_indices] - - xtsc <- BPCells::t(xt) - vcenter - xtsc <- BPCells::t(xtsc / vscale) - - # make intermediate matrix. - - irlba_res <- list() - irlba_res$x <- xtsc %*% rotation_matrix[intersect_indices,] - - irlba_res$x <- as.matrix(irlba_res$x) - class(irlba_res) <- c('irlba_prcomp', 'prcomp') - - if(verbose) { - message('bpcells_apply_transform: finish') - } - - return(irlba_res) -} - - diff --git a/R/io.R b/R/io.R index a0802f5..bc1dd05 100644 --- a/R/io.R +++ b/R/io.R @@ -822,7 +822,6 @@ load_hnsw_index <- function(nn_index, file_name, metric, ndim) { new_index <- tryCatch( methods::new(RcppHNSW::HnswL2, ndim, file_name), error = function(c) { stop(paste0(trimws(c), - '\n error reading file ', file_name, '\n', dbar40, '\n', report_path_status(file_name, dirname(file_name)), @@ -1512,7 +1511,7 @@ save_transform_models <- function( cds, directory_path, comment="", verbose=TRUE # Make a tar file of output directory, if requested. if(archive_control[['archive_type']] == 'tar') { - tryCatch(make_tar_of_dir(directory_path, archive_control), + tryCatch(make_tar_of_dir(directory_path=directory_path, archive_control=archive_control), error = function(c) { stop(paste0(trimws(c), '\n* error in save_transform_models')) }) } } @@ -2208,7 +2207,7 @@ save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment # Make a tar file of output directory, if requested. if(archive_control[['archive_type']] == 'tar') { - tryCatch(make_tar_of_dir(directory_path, archive_control), + tryCatch(make_tar_of_dir(directory_path=directory_path, archive_control=archive_control), error = function(c) { stop(paste0(trimws(c), '\n* error in save_monocle_objects')) }) } } @@ -2224,7 +2223,7 @@ save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment #' @param directory_path a string giving the name of the directory #' from which to read the saved cell_data_set files. #' @param matrix_control a list that is used only to set the -#' PBCells matrix path when the saved cell_data_set has the +#' BPCells matrix path when the saved cell_data_set has the #' counts matrix stored as a BPCells on-disk matrix. By default, #' the BPCells matrix directory path is set to the current #' working directory. @@ -2240,9 +2239,19 @@ save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment #' @export # Bioconductor forbids writing to user directories so examples # is not run. -load_monocle_objects <- function(directory_path, matrix_control=list(matrix_path='.')) { +load_monocle_objects <- function(directory_path, matrix_control=list()) { appendLF <- FALSE + # + # Prepare matrix_control_res. + # Notes: + # o we use only the matrix_control[['matrix_path']] value at this time for + # making the BPCells temporary matrix directory used while Monocle runs. + # All other matrix_control values are ignored. + # + matrix_control_default <- get_global_variable('matrix_control_bpcells_unrestricted') + matrix_control_res <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='unrestricted') + # Make a 'normalized' path string. The annoy index save function does not # recognize tildes. directory_path <- normalizePath(directory_path, mustWork=FALSE) @@ -2397,7 +2406,7 @@ load_monocle_objects <- function(directory_path, matrix_control=list(matrix_path assay(cds, 'counts_row_order') <- NULL } counts(cds, bpcells_warn=FALSE ) <- tryCatch( - load_bpcells_matrix_dir(file_path, md5sum, matrix_control=matrix_control), + load_bpcells_matrix_dir(file_path, md5sum, matrix_control=matrix_control_res), error = function(c) { stop(paste0(trimws(c), '\n* error in load_monocle_objects')) }) # Rebuild the BPCells row-major order counts matrix. cds <- set_cds_row_order_matrix(cds=cds) diff --git a/R/matrix.R b/R/matrix.R index a51c03e..38eb616 100644 --- a/R/matrix.R +++ b/R/matrix.R @@ -106,6 +106,15 @@ select_matrix_parameter_value <- function(parameter, matrix_control, matrix_cont # matrix_compress: TRUE, FALSE default: FALSE # matrix_buffer_size: default: 8192L # matrix_bpcells_copy: TRUE, FALSE default: TRUE +# +# parameter check_conditional is boolean: conditional=TRUE is more stringent, requiring and checking certain +# values conditioned on other values, for example, 'matrix_buffer_size' is used only when +# matrix_class is 'BPCells' and matrix_mode is 'dir'. check_matrix_control(..., check_conditional=FALSE) is +# called at the start of set_matrix_control() and check_matrix_control(..., check_conditional=TRUE) is +# called at the end of set_matrix_control(). More generally, check_conditional=FALSE is used before trying +# to set consistent matrix_control values and check_conditional=TRUE is used after trying to set consistent +# matrix_control values. +# check_matrix_control <- function(matrix_control=list(), control_type=c('unrestricted', 'pca'), check_conditional=FALSE) { control_type <- match.arg(control_type) assertthat::assert_that(is.list(matrix_control)) @@ -141,6 +150,10 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri allowed_matrix_type[['unrestricted']] <- c('float', 'double') allowed_matrix_type[['pca']] <- c('float', 'double') + allowed_matrix_compress <- list() + allowed_matrix_compress[['unrestricted']] <- c(FALSE, TRUE) + allowed_matrix_compress[['pca']] <- c(FALSE) + error_string <- '' if(!all(names(matrix_control) %in% allowed_control_parameters)) { @@ -148,12 +161,16 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri } if(check_conditional == FALSE) { + # Is matrix_class set and a valid value? if(!(is.null(matrix_control[['matrix_class']])) && !(matrix_control[['matrix_class']] %in% allowed_matrix_class)) { error_string <- paste0('\ninvalid matrix_class "', matrix_control[['matrix_class']], '"') } + # BPCells matrix class tests. if(matrix_control[['matrix_class']] == 'BPCells') { + + # Is matrix_mode set and a valid value? if(control_type == 'unrestricted') allowed_values <- allowed_matrix_mode[['unrestricted']] else @@ -165,7 +182,8 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri !(matrix_control[['matrix_mode']] %in% allowed_values)) { error_string <- paste0('\ninvalid matrix_mode "', matrix_control[['matrix_mode']], '"') } - + + # Is matrix_type set and a valid value? if(control_type == 'unrestricted') allowed_values <- allowed_matrix_type[['unrestricted']] else @@ -178,21 +196,25 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri error_string <- paste0('\ninvalid matrix_type "', matrix_control[['matrix_type']], '"') } + # Is matrix_compress set and a data valid type? if(!(is.null(matrix_control[['matrix_compress']])) && !(is.logical(matrix_control[['matrix_compress']]))) { error_string <- paste0('\nmatrix_compress value must be a logical type') } + # Is matrix_path set and a data valid type? if(!(is.null(matrix_control[['matrix_path']])) && !(is.character(matrix_control[['matrix_path']]))) { error_string <- paste0('\nmatrix_path value must be a character type') } + # Is matrix_buffer_size set and a data valid type? if(!(is.null(matrix_control[['matrix_buffer_size']])) && !(is.integer(matrix_control[['matrix_buffer_size']]))) { error_string <- paste0('\nmatrix_buffer_size value must be an integer type') } + # Is matrix_bpcells_copy set and a valid data type? if(!(is.null(matrix_control[['matrix_bpcells_copy']])) && !(is.logical(matrix_control[['matrix_bpcells_copy']]))) { error_string <- paste0('\nmatrix_bpcells_copy value must be a logical type') @@ -200,7 +222,7 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri } } else { - # Check matrix_class value. + # Is matrix_class set and a valid value? if(is.null(matrix_control[['matrix_class']])) { error_string <- '\nmatrix_class not set' } @@ -209,8 +231,10 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri error_string <- paste0('\ninvalid matrix_class "', matrix_control[['matrix_class']], '\n') } + # BPCells matrix class tests. if(matrix_control[['matrix_class']] == 'BPCells') { - # Check matrix_type value. + + # Is matrix_type a valid value? if(control_type == 'unrestricted') allowed_values <- allowed_matrix_type[['unrestricted']] else @@ -219,15 +243,25 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri else stop('check_matrix_control: unknown control type \'', control_type, '\'') if(!(matrix_control[['matrix_type']] %in% allowed_values)) { - error_string <- paste0('\nbad matrix_type "', matrix_control[['matrix_type']], '"\n') + error_string <- paste0('\nbad matrix_type "', matrix_control[['matrix_type']], '"\n') } # Check matrix_compress value. if(!is.logical(matrix_control[['matrix_compress']])) { - error_string <- '\nmatrix_compress must be as logical type' + error_string <- '\nmatrix_compress must be a logical type' } - - # Check matrix_mode value. + if(control_type == 'unrestricted') + allowed_values <- allowed_matrix_compress[['pca']] + else + if(control_type == 'pca') + allowed_values <- allowed_matrix_compress[['pca']] + else + stop('check_matrix_control: unknown control type \'', control_type, '\'') + if(!(matrix_control[['matrix_compress']] %in% allowed_values)) { + error_string <- paste0('\nbad matrix_compress "', matrix_control[['matrix_compress']], '"\n') + } + + # Is matrix_mode a valid value? if(control_type == 'unrestricted') allowed_values <- allowed_matrix_mode[['unrestricted']] else @@ -239,17 +273,21 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri error_string <- paste0('\ninvalid matrix_mode "', matrix_control[['matrix_mode']], '"') } + # Check values related to matrix_mode = 'dir'. if(matrix_control[['matrix_mode']] == 'dir') { - # Check matrix_path value. + + # Is matrix_path a valid data type? if(!(is.character(matrix_control[['matrix_path']]))) { error_string <- paste0('\nbad matrix_path "', matrix_control[['matrix_path']], '"') } - # Check matrix_buffer_size. + # Is matrix_buffer_size a valid data type? if(!(is.integer(matrix_control[['matrix_buffer_size']]))) { error_string <- paste0('\nmatrix_buffer_size must be an integer') } } + + # Is matrix_bpcells_copy a valid data type? if(!is.logical(matrix_control[['matrix_bpcells_copy']])) { error_string <- paste0('\nmatrix_bpcells_copy value must be a logical type') } @@ -368,7 +406,6 @@ set_matrix_control <- function(matrix_control=list(), matrix_control_default=lis if(matrix_control_out[['matrix_class']] == 'BPCells') { matrix_control_out[['matrix_mode']] <- select_matrix_parameter_value(parameter='matrix_mode', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_mode) - if(matrix_control_out[['matrix_mode']] == 'mem') { matrix_control_out[['matrix_type']] <- select_matrix_parameter_value(parameter='matrix_type', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_type) matrix_control_out[['matrix_compress']] <- select_matrix_parameter_value(parameter='matrix_compress', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_compress) @@ -382,6 +419,26 @@ set_matrix_control <- function(matrix_control=list(), matrix_control_default=lis matrix_control_out[['matrix_buffer_size']] <- select_matrix_parameter_value(parameter='matrix_buffer_size', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_buffer_size) matrix_control_out[['matrix_bpcells_copy']] <- select_matrix_parameter_value(parameter='matrix_bpcells_copy', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_bpcells_copy) } + + # Restrict matrix_control values for matrices used in + # intensive PCA calculations so set matrix_type to double for + # precision, set matrix_compress to FALSE for speed, and + # matrix_bpcells_copy to TRUE because we want a temporary + # matrix for the calculation, after which we remove it. + if(control_type == 'pca') { + if(matrix_control_out[['matrix_type']] == 'uint32_t') { + message('set_matrix_control: forcing matrix_type to \'double\' for PCA.') + matrix_control_out[['matrix_type']] <- 'double' + } + if(matrix_control_out[['matrix_compress']] == TRUE) { + message('set_matrix_control: forcing matrix_compress to \'FALSE\' for PCA.') + matrix_control_out[['matrix_compress']] <- FALSE + } + if(matrix_control_out[['matrix_bpcells_copy']] == FALSE) { + message('set_matrix_control: forcing matrix_bpcells_copy to \'TRUE\' for PCA.') + matrix_control_out[['matrix_bpcells_copy']] <- TRUE + } + } } check_matrix_control(matrix_control=matrix_control_out, control_type=control_type, check_conditional=TRUE) @@ -526,11 +583,12 @@ get_matrix_class <- function(mat) { # Get/infer the matrix information. get_matrix_info <- function(mat) { - matrix_info <- get_matrix_class(mat=mat) + matrix_info <- tryCatch(get_matrix_class(mat=mat), + error=function(c) {stop(paste0(trimws(c), + '\n* error in get_matrix_info')) }) if(is.null(matrix_info[['matrix_class']])) { - message('bad matrix info -- dropping into browser') - browser() + stop('get_matrix_info: unable to infer matrix_class') } if(matrix_info[['matrix_class']] != 'BPCells') { diff --git a/R/pca.R b/R/pca.R index 68e2a50..665c8be 100644 --- a/R/pca.R +++ b/R/pca.R @@ -6,52 +6,98 @@ svd_rebuild_matrix <- function(u, s, v, filename) { # -# This is probably replaced by set_matrix_contol with control_type='pca'. Consider -# removing this function. +# Notes on setting the PCA matrix control using a relatively +# complex approach: do not require input_matrix_control[['matrix_class']] but allow edits +# Possible conditions: +# 1 input_matrix_control has no values set (OK!) +# o want input_matrix_info with missing values from default for matrix_class/pca (probably no missing values) +# +# 2 input_matrix_control has matrix_class and other values set and input_matrix_control[['matrix_class']] == input_matrix_info[['matrix_class']] (same as 4) +# o want input_matrix_info with edits by input_matrix_control +# example: input_matrix_control[['matrix_path'] replaces input_matrix_info[['matrix_path'] +# +# 3 input_matrix_control has matrix_class and other values set and input_matrix_control[['matrix_class']] != input_matrix_info[['matrix_class']] (OK? is there any reason to use input_matrix_info values?) +# o want input_matrix_control with missing values from default for matrix_class/pca +# +# 4 input_matrix_control does not have matrix_class set but other values are set (same as 2) +# o want input_matrix_info with edits from input_matrix_control +# example: input_matrix_control[['matrix_path'] replaces input_matrix_info[['matrix_path'] +# +# +# Is any matrix_control command line parameter value set? (Check the following summary carefully +# before proceeding with it.) +# o yes: is input_matrix_control[['matrix_class']] set? +# o is input_matrix_control[['matrix_class']] the same as the input matrix matrix_class +# o yes: use matrix_control: input_matrix_control (2) (*** wrong ***) +# matrix_control_default: input matrix info (edit matrix_path as required) +# o no: use matrix_control: input_matrix_control (3) +# matrix_control_default: default matrix_control for input_matrix_control[['matrix_class']] (global environment value) +# o no: use matrix_control: input_matrix_control (4) (*** wrong ***) +# matrix_control_default: default matrix_control for input_matrix_control[['matrix_class']] (global environment value) +# o no: use matrix_control: input matrix info (edit matrix_path as required) (1) +# matrix_control_default: default matrix_control for input_matrix_control[['matrix_class']] (global environment value) +# +# Notes: +# o use the simple approach because it's substantially more provable, maintainable, and documentable. +# Also, users cannot set matrix_control in preprocess_cds() and preprocess_transform(). +# o report the resulting matrix_control after calling set_matrix_control_pca(), when verbose=TRUE # -set_pca_matrix_control <- function(mat, matrix_control=list()) { - check_matrix_control(matrix_control=matrix_control, control_type='pca', check_conditional=FALSE) - matrix_info <- get_matrix_info(mat=mat) - if(!is.null(matrix_control[['matrix_class']])) { - if(matrix_control[['matrix_class']] == 'dgCMatrix') { - matrix_control_default <- get_global_variable('matrix_control_csparsematrix_pca') - } +# +# Use the simple approach (relatively simple): require either input_matrix_control +# is zero length list or input_matrix_control[['matrix_class']] is set with all +# desired non-default values. +# +# Notes: +# o if input_matrix_control is not set, use input_matrix_info with modifications required for pca and matrix_path +# o if input_matrix_control is set, use matrix_control=input_matrix_control and matrix_control_default for matrix_class/pca. +# In this case input_matrix_control[['matrix_class']] must be set; otherwise, it's invalid and an error results. Missing +# values in input_matrix_control are taken from the default. +# o watch for issues with matrix_path +# +set_matrix_control_pca <- function(mat, matrix_control=list(), verbose=FALSE) { + + assertthat::assert_that(monocle3:::is_matrix(mat), + msg=paste0('set_matrix_control_pca: input matrix parameter object is not a matrix')) + assertthat::assert_that(is.list(matrix_control), + msg=paste0('set_matrix_control_pca: input matrix_control parameter object is not a list')) + + # Get matrix_control values of input matrix. + matrix_info <- tryCatch(monocle3:::get_matrix_info(mat), + error = function(c) { stop(paste0(trimws(c), + '\n* error in set_matrix_control_pca')) }) + if(length(matrix_control) > 0) { + # Use matrix_control=matrix_control + assertthat::assert_that(!is.null(matrix_control[['matrix_class']]), + msg=paste0('set_matrix_control_pca: matrix_control[[\'matrix_class\']] is not set')) + if(matrix_control[['matrix_class']] == 'BPCells') + matrix_control_default <- get_global_variable('matrix_control_bpcells_pca') else - if(matrix_control[['matrix_class']] == 'BPCells') { + matrix_control_default <- get_global_variable('matrix_control_csparsematrix_pca') + + matrix_control_res <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='pca') + } + else { + # Use matrix_control=matrix_info + if(matrix_info[['matrix_class']] == 'BPCells') { matrix_control_default <- get_global_variable('matrix_control_bpcells_pca') + tmp_matrix_info <- matrix_info + # Trim off name of input matrix directory. + tmp_matrix_info[['matrix_path']] <- dirname(tmp_matrix_info[['matrix_path']]) } else { - stop('unrecognized matrix_control[[\'matrix_class\']] value') - } - matrix_control_res <- set_matrix_control(matrix_control=matrix_control, - matrix_control_default=matrix_control_default, - control_type='pca') - } - else - if(matrix_info[['matrix_class']] == 'r_dense_matrix' || - matrix_info[['matrix_class']] == 'dgCMatrix' || - matrix_info[['matrix_class']] == 'dgTMatrix') { - matrix_control_res = list() - matrix_control_res[['matrix_class']] <- 'dgCMatrix' - } - else - if(matrix_info[['matrix_class']] == 'BPCells') { - matrix_control_res <- matrix_info - if(matrix_control_res[['matrix_type']] == 'uint32_t') { - matrix_control_res[['matrix_type']] <- 'double' + tmp_matrix_info <- matrix_info + matrix_control_default <- get_global_variable('matrix_control_csparsematrix_pca') } - if(matrix_control_res[['matrix_compress']] == TRUE) { - matrix_control_res[['matrix_compress']] <- FALSE - } - if(matrix_control_res[['matrix_mode']] == 'dir') { - matrix_control_res[['matrix_path']] <- dirname(matrix_control_res[['matrix_path']]) - } - matrix_control_res[['matrix_bpcells_copy']] <- TRUE + + matrix_control_res <- set_matrix_control(matrix_control=tmp_matrix_info, matrix_control_default=matrix_control_default, control_type='pca') } - check_matrix_control(matrix_control=matrix_control_res, control_type='pca', check_conditional=TRUE) + if(verbose) { + message('set_matrix_control_pca: matrix_control_res:') + show_matrix_control(matrix_control_res) + } return(matrix_control_res) } @@ -134,6 +180,7 @@ sparse_prcomp_irlba <- function(x, n = 3, retx = TRUE, center = TRUE, { if(verbose) { message('pca: sparse_prcomp_irlba: matrix class: ', class(x)) + message(paste0('matrix_info:\n', show_matrix_info(matrix_info=get_matrix_info(mat=x), indent=' ')), appendLF=FALSE) } a <- names(as.list(match.call())) @@ -301,7 +348,7 @@ bpcells_prcomp_irlba <- function(x, n = 3, retx = TRUE, center = TRUE, { if(verbose) { message('pca: bpcells_prcomp_irlba: matrix class: ', class(x)) - message(show_matrix_info(matrix_info=get_matrix_info(mat=x), indent=' '), appendLF=FALSE) + message(paste0('matrix_info:\n', show_matrix_info(matrix_info=get_matrix_info(mat=x), indent=' ')), appendLF=FALSE) } a <- names(as.list(match.call())) @@ -312,9 +359,9 @@ bpcells_prcomp_irlba <- function(x, n = 3, retx = TRUE, center = TRUE, function to control that algorithm's convergence tolerance. See `?prcomp_irlba` for help.") - matrix_control <- list(matrix_class='BPCells') - matrix_control_default <- get_global_variable('matrix_control_bpcells_pca') - matrix_control_res <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='pca') + # Use the same matrix_control for the 'x_commit' matrix as used for the + # input matrix 'x'. + matrix_control_res <- set_matrix_control_pca(mat=x, verbose=verbose) x_commit <- set_matrix_class(mat=x, matrix_control=matrix_control_res) stats <- BPCells::matrix_stats(matrix = x_commit, row_stats = 'none', col_stats = 'variance') diff --git a/R/preprocess_cds.R b/R/preprocess_cds.R index edcfcd7..f669c0a 100644 --- a/R/preprocess_cds.R +++ b/R/preprocess_cds.R @@ -245,9 +245,9 @@ preprocess_cds <- function(cds, nv = min(num_dim,min(dim(FM)) - 1)) } else { - matrix_control <- list(matrix_class='BPCells') - matrix_control_default <- get_global_variable('matrix_control_bpcells_pca') - matrix_control_res <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='pca') + # Use the same matrix_control for the 'x_commit' matrix as used for the + # input matrix 'FM'. + matrix_control_res <- set_matrix_control_pca(mat=FM, verbose=verbose) preproc_res_commit <- set_matrix_class(mat=BPCells::t(preproc_res), matrix_control=matrix_control_res) irlba_res <- irlba::irlba(A=BPCells:::linear_operator(preproc_res_commit), @@ -256,6 +256,14 @@ preprocess_cds <- function(cds, rm_bpcells_dir(mat=preproc_res_commit) } + if(verbose) { + message('singular values (head)') + message(paste(head(irlba_res$d), collapse=' ')) + message('') + message("umat: ", paste(dim(irlba_res$u), collapse=" ")) + message("vtmat: ", paste(dim(irlba_res$v), collapse=" ")) + } + preproc_res <- irlba_res$u %*% diag(irlba_res$d) row.names(preproc_res) <- colnames(cds) SingleCellExperiment::reducedDims(cds)[[method]] <- as.matrix(preproc_res) diff --git a/R/projection.R b/R/projection.R index b2a37f8..f19a29f 100644 --- a/R/projection.R +++ b/R/projection.R @@ -48,9 +48,9 @@ sparse_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale= } -bpcells_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale=vscale, pca_control=list(), verbose=FALSE) { +bpcells_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale=vscale, verbose=FALSE) { if(verbose) { - message('bpcells_apply_transform: start') + message('bpcells_apply_transform: start (', get_time_stamp(), ')') } # Thank you Maddy. @@ -78,14 +78,23 @@ bpcells_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale xtsc <- BPCells::t(xt) - vcenter xtsc <- BPCells::t(xtsc / vscale) + # Use the same matrix_control for the 'xtsc_commit' matrix as used for the + # input matrix 'FM'. + matrix_control_res <- set_matrix_control_pca(mat=FM, verbose=verbose) + xtsc_commit <- set_matrix_class(mat=xtsc, matrix_control=matrix_control_res) + # make intermediate matrix. irlba_res <- list() - irlba_res$x <- xtsc %*% rotation_matrix[intersect_indices,] + irlba_res$x <- BPCells:::linear_operator(xtsc_commit) %*% rotation_matrix[intersect_indices,] + + # Remove BPCells MatrixDir, if it is defined. + rm_bpcells_dir(mat=xtsc_commit) + irlba_res$x <- as.matrix(irlba_res$x) class(irlba_res) <- c('irlba_prcomp', 'prcomp') if(verbose) { - message('bpcells_apply_transform: finish') + message('bpcells_apply_transform: finish (', get_time_stamp(), ')') } return(irlba_res) @@ -108,12 +117,6 @@ bpcells_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale #' NULL, which does not affect the current block size. #' @param cores the number of cores to use for the matrix #' multiplication. The default is 1. -#' @param matrix_control A list used to control how the temporary -#' version of the counts matrix used by preprocess_transform is -#' stored. By default the matrix is stored in memory as a -#' sparse matrix. Setting -#' 'matrix_control=list(matrix_class="BPCells")' stores the matrix -#' on-disk as a sparse matrix. #' @param verbose logical Whether to emit verbose output during dimensionality #' reduction. #' @return a cell_data_set with a preprocess reduced count @@ -170,7 +173,7 @@ bpcells_apply_transform <- function(FM, rotation_matrix, vcenter=vcenter, vscale #' @export # Bioconductor forbids writing to user directories so examples # is not run. -preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_size=NULL, cores=1, matrix_control=list(), verbose=FALSE) { +preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_size=NULL, cores=1, verbose=FALSE) { # # Need to add processing for LSI. TF-IDF transform etc. # @@ -192,6 +195,10 @@ preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_si msg=paste0("Reduction method '", reduction_method, "' is not in the model", " object.")) + if(reduction_method == 'LSI') { + stop('** preprocess_transform() for LSI has not been tested because I have no suitable data sets **') + } + set.seed(2016) iterable_matrix_flag <- is(counts(cds), 'IterableMatrix') @@ -203,9 +210,7 @@ preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_si vcenter <- cds@reduce_dim_aux[[reduction_method]][['model']]$svd_center vscale <- cds@reduce_dim_aux[[reduction_method]][['model']]$svd_scale - mat_counts <- SingleCellExperiment::counts(cds) - matrix_control_res <- set_pca_matrix_control(mat=mat_counts, matrix_control=matrix_control) - FM <- set_matrix_class(mat=mat_counts, matrix_control=matrix_control_res) + FM <- SingleCellExperiment::counts(cds) FM <- normalize_expr_data(FM=FM, size_factors=size_factors(cds), norm_method=norm_method, pseudo_count=pseudo_count) # OK if (nrow(FM) == 0) { stop("all rows have standard deviation zero") @@ -213,7 +218,6 @@ preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_si # Don't select matrix rows by use_genes because intersect() does # it implicitly through the rotation matrix. - if(!iterable_matrix_flag) { fm_rowsums = Matrix::rowSums(FM) FM <- FM[is.finite(fm_rowsums) & fm_rowsums != 0, ] @@ -224,10 +228,7 @@ preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_si fm_rowsums = BPCells::rowSums(FM) FM <- FM[is.finite(fm_rowsums) & fm_rowsums != 0, ] - irlba_res <- bpcells_apply_transform(FM=FM, rotation_matrix=rotation_matrix, vcenter=vcenter, vscale=vscale, pca_control=pca_control, verbose=verbose) - - # Remove BPCells MatrixDir, if it is defined. - rm_bpcells_dir(mat=FM) + irlba_res <- bpcells_apply_transform(FM=FM, rotation_matrix=rotation_matrix, vcenter=vcenter, vscale=vscale, verbose=verbose) } # 'reference' gene names are in the cds@preproc @@ -249,12 +250,8 @@ preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_si row_sums <- cds@reduce_dim_aux[[reduction_method]][['model']][['row_sums']] num_cols <- cds@reduce_dim_aux[[reduction_method]][['model']][['num_cols']] - mat_counts <- SingleCellExperiment::counts(cds) - -# matrix_control_res <- set_pca_matrix_control(mat=mat_counts, matrix_control=matrix_control) -# FM <- set_matrix_class(mat=mat_counts, matrix_control=matrix_control_res) - - FM <- normalize_expr_data(FM=mat_counts, size_factors=size_factors(cds), norm_method=norm_method, pseudo_count=pseudo_count) + FM <- SingleCellExperiment::counts(cds) + FM <- normalize_expr_data(FM=FM, size_factors=size_factors(cds), norm_method=norm_method, pseudo_count=pseudo_count) if (nrow(FM) == 0) { stop("all rows have standard deviation zero") } @@ -363,12 +360,12 @@ preprocess_transform <- function(cds, reduction_method=c('PCA', 'LSI'), block_si xt <- BPCells::t(tf_idf_counts) - matrix_control_res <- set_pca_matrix_control(mat=mat_counts, matrix_control=matrix_control) - xt <- set_matrix_class(mat=xt, matrix_control=matrix_control_res) + matrix_control_res <- set_matrix_control_pca(mat=FM, verbose=verbose) + xt_commit <- set_matrix_class(mat=xt, matrix_control=matrix_control_res) - irlba_res$x <- BPCells:::linear_operator(xt) %*% rotation_matrix[intersect_indices,] + irlba_res$x <- BPCells:::linear_operator(xt_commit) %*% rotation_matrix[intersect_indices,] - rm_bpcells_dir(mat=xt) + rm_bpcells_dir(mat=xt_commit) irlba_res$x <- as.matrix(irlba_res$x) class(irlba_res) <- c('irlba_prcomp', 'prcomp') diff --git a/R/utils.R b/R/utils.R index 2307ca7..a45da22 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,8 +1,15 @@ -# Test whether a matrix is one of our supported sparse matrices +# Test whether a matrix is one of our supported in-memory sparse matrices is_sparse_matrix <- function(x){ any(class(x) %in% c("dgCMatrix", "dgTMatrix", "lgCMatrix", "CsparseMatrix")) } + +# Test whether an object is a matrix. +is_matrix <- function(x) { + return(is(x, 'matrix') || is_sparse_matrix(x) || is(x, 'IterableMatrix')) +} + +# Test whether #' Function to calculate size factors for single-cell RNA-seq data #' #' @param cds The cell_data_set diff --git a/R/zzz.R b/R/zzz.R index ee72b21..4a35eee 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -13,6 +13,7 @@ dbar40 <- paste(replicate(40,'-'),collapse='') # # Set up a global-variable-like environment. # +#! @export set_global_variable <- function(variable_name, value) { assign(variable_name, value, envir=._._global_variable_env_._.) } @@ -20,6 +21,7 @@ set_global_variable <- function(variable_name, value) { # Return value of variable_name. If variable_name is NULL, return a list # of all global variables. +#! @export get_global_variable <- function(variable_name=NULL) { value <- tryCatch({ v <- get('guard_element', envir=._._global_variable_env_._.) @@ -73,11 +75,6 @@ get_global_variable <- function(variable_name=NULL) { } -# -# TODO: Read $HOME/.monoclerc file. -# - - # Define some global variables. .onLoad <- function(libname, pkgname) { # A value used to ensure that this is the Monocle3 @@ -157,9 +154,9 @@ get_global_variable <- function(variable_name=NULL) { # If ~/.monoclerc exists, read it and execute its contents. dot_monoclerc <- base::path.expand('~/.monoclerc') if(file.exists(dot_monoclerc)){ - message(paste('Read ~/.monoclerc next. The parsed expressions are', - 'read into the user\'s global\nenvironment. Objects', - 'in .monoclerc mask monocle objects with the same names.')) + packageStartupMessage(paste('Read ~/.monoclerc next. The parsed expressions are', + 'read into the user\'s global\nenvironment. Objects', + 'in .monoclerc may mask monocle objects with the same names.')) source(file=dot_monoclerc, local=FALSE, echo=TRUE) } }