Skip to content

Commit

Permalink
Improve matrix_control handling.
Browse files Browse the repository at this point in the history
  • Loading branch information
brgew committed May 16, 2024
1 parent fd49de1 commit 980ee33
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 196 deletions.
96 changes: 0 additions & 96 deletions R/apply_transforms.R

This file was deleted.

21 changes: 15 additions & 6 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,6 @@ load_hnsw_index <- function(nn_index, file_name, metric, ndim) {
new_index <- tryCatch(
methods::new(RcppHNSW::HnswL2, ndim, file_name),
error = function(c) { stop(paste0(trimws(c),

'\n error reading file ', file_name,
'\n', dbar40,
'\n', report_path_status(file_name, dirname(file_name)),
Expand Down Expand Up @@ -1512,7 +1511,7 @@ save_transform_models <- function( cds, directory_path, comment="", verbose=TRUE

# Make a tar file of output directory, if requested.
if(archive_control[['archive_type']] == 'tar') {
tryCatch(make_tar_of_dir(directory_path, archive_control),
tryCatch(make_tar_of_dir(directory_path=directory_path, archive_control=archive_control),
error = function(c) { stop(paste0(trimws(c), '\n* error in save_transform_models')) })
}
}
Expand Down Expand Up @@ -2208,7 +2207,7 @@ save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment

# Make a tar file of output directory, if requested.
if(archive_control[['archive_type']] == 'tar') {
tryCatch(make_tar_of_dir(directory_path, archive_control),
tryCatch(make_tar_of_dir(directory_path=directory_path, archive_control=archive_control),
error = function(c) { stop(paste0(trimws(c), '\n* error in save_monocle_objects')) })
}
}
Expand All @@ -2224,7 +2223,7 @@ save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment
#' @param directory_path a string giving the name of the directory
#' from which to read the saved cell_data_set files.
#' @param matrix_control a list that is used only to set the
#' PBCells matrix path when the saved cell_data_set has the
#' BPCells matrix path when the saved cell_data_set has the
#' counts matrix stored as a BPCells on-disk matrix. By default,
#' the BPCells matrix directory path is set to the current
#' working directory.
Expand All @@ -2240,9 +2239,19 @@ save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment
#' @export
# Bioconductor forbids writing to user directories so examples
# is not run.
load_monocle_objects <- function(directory_path, matrix_control=list(matrix_path='.')) {
load_monocle_objects <- function(directory_path, matrix_control=list()) {
appendLF <- FALSE

#
# Prepare matrix_control_res.
# Notes:
# o we use only the matrix_control[['matrix_path']] value at this time for
# making the BPCells temporary matrix directory used while Monocle runs.
# All other matrix_control values are ignored.
#
matrix_control_default <- get_global_variable('matrix_control_bpcells_unrestricted')
matrix_control_res <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='unrestricted')

# Make a 'normalized' path string. The annoy index save function does not
# recognize tildes.
directory_path <- normalizePath(directory_path, mustWork=FALSE)
Expand Down Expand Up @@ -2397,7 +2406,7 @@ load_monocle_objects <- function(directory_path, matrix_control=list(matrix_path
assay(cds, 'counts_row_order') <- NULL
}
counts(cds, bpcells_warn=FALSE ) <- tryCatch(
load_bpcells_matrix_dir(file_path, md5sum, matrix_control=matrix_control),
load_bpcells_matrix_dir(file_path, md5sum, matrix_control=matrix_control_res),
error = function(c) { stop(paste0(trimws(c), '\n* error in load_monocle_objects')) })
# Rebuild the BPCells row-major order counts matrix.
cds <- set_cds_row_order_matrix(cds=cds)
Expand Down
84 changes: 71 additions & 13 deletions R/matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ select_matrix_parameter_value <- function(parameter, matrix_control, matrix_cont
# matrix_compress: TRUE, FALSE default: FALSE
# matrix_buffer_size: <integer> default: 8192L
# matrix_bpcells_copy: TRUE, FALSE default: TRUE
#
# parameter check_conditional is boolean: conditional=TRUE is more stringent, requiring and checking certain
# values conditioned on other values, for example, 'matrix_buffer_size' is used only when
# matrix_class is 'BPCells' and matrix_mode is 'dir'. check_matrix_control(..., check_conditional=FALSE) is
# called at the start of set_matrix_control() and check_matrix_control(..., check_conditional=TRUE) is
# called at the end of set_matrix_control(). More generally, check_conditional=FALSE is used before trying
# to set consistent matrix_control values and check_conditional=TRUE is used after trying to set consistent
# matrix_control values.
#
check_matrix_control <- function(matrix_control=list(), control_type=c('unrestricted', 'pca'), check_conditional=FALSE) {
control_type <- match.arg(control_type)
assertthat::assert_that(is.list(matrix_control))
Expand Down Expand Up @@ -141,19 +150,27 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri
allowed_matrix_type[['unrestricted']] <- c('float', 'double')
allowed_matrix_type[['pca']] <- c('float', 'double')

allowed_matrix_compress <- list()
allowed_matrix_compress[['unrestricted']] <- c(FALSE, TRUE)
allowed_matrix_compress[['pca']] <- c(FALSE)

error_string <- ''

if(!all(names(matrix_control) %in% allowed_control_parameters)) {
error_string <- paste0('invalid control parameter')
}

if(check_conditional == FALSE) {
# Is matrix_class set and a valid value?
if(!(is.null(matrix_control[['matrix_class']])) &&
!(matrix_control[['matrix_class']] %in% allowed_matrix_class)) {
error_string <- paste0('\ninvalid matrix_class "', matrix_control[['matrix_class']], '"')
}

# BPCells matrix class tests.
if(matrix_control[['matrix_class']] == 'BPCells') {

# Is matrix_mode set and a valid value?
if(control_type == 'unrestricted')
allowed_values <- allowed_matrix_mode[['unrestricted']]
else
Expand All @@ -165,7 +182,8 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri
!(matrix_control[['matrix_mode']] %in% allowed_values)) {
error_string <- paste0('\ninvalid matrix_mode "', matrix_control[['matrix_mode']], '"')
}


# Is matrix_type set and a valid value?
if(control_type == 'unrestricted')
allowed_values <- allowed_matrix_type[['unrestricted']]
else
Expand All @@ -178,29 +196,33 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri
error_string <- paste0('\ninvalid matrix_type "', matrix_control[['matrix_type']], '"')
}

# Is matrix_compress set and a data valid type?
if(!(is.null(matrix_control[['matrix_compress']])) &&
!(is.logical(matrix_control[['matrix_compress']]))) {
error_string <- paste0('\nmatrix_compress value must be a logical type')
}

# Is matrix_path set and a data valid type?
if(!(is.null(matrix_control[['matrix_path']])) &&
!(is.character(matrix_control[['matrix_path']]))) {
error_string <- paste0('\nmatrix_path value must be a character type')
}

# Is matrix_buffer_size set and a data valid type?
if(!(is.null(matrix_control[['matrix_buffer_size']])) &&
!(is.integer(matrix_control[['matrix_buffer_size']]))) {
error_string <- paste0('\nmatrix_buffer_size value must be an integer type')
}

# Is matrix_bpcells_copy set and a valid data type?
if(!(is.null(matrix_control[['matrix_bpcells_copy']])) &&
!(is.logical(matrix_control[['matrix_bpcells_copy']]))) {
error_string <- paste0('\nmatrix_bpcells_copy value must be a logical type')
}
}
}
else {
# Check matrix_class value.
# Is matrix_class set and a valid value?
if(is.null(matrix_control[['matrix_class']])) {
error_string <- '\nmatrix_class not set'
}
Expand All @@ -209,8 +231,10 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri
error_string <- paste0('\ninvalid matrix_class "', matrix_control[['matrix_class']], '\n')
}

# BPCells matrix class tests.
if(matrix_control[['matrix_class']] == 'BPCells') {
# Check matrix_type value.

# Is matrix_type a valid value?
if(control_type == 'unrestricted')
allowed_values <- allowed_matrix_type[['unrestricted']]
else
Expand All @@ -219,15 +243,25 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri
else
stop('check_matrix_control: unknown control type \'', control_type, '\'')
if(!(matrix_control[['matrix_type']] %in% allowed_values)) {
error_string <- paste0('\nbad matrix_type "', matrix_control[['matrix_type']], '"\n')
error_string <- paste0('\nbad matrix_type "', matrix_control[['matrix_type']], '"\n')
}

# Check matrix_compress value.
if(!is.logical(matrix_control[['matrix_compress']])) {
error_string <- '\nmatrix_compress must be as logical type'
error_string <- '\nmatrix_compress must be a logical type'
}

# Check matrix_mode value.
if(control_type == 'unrestricted')
allowed_values <- allowed_matrix_compress[['pca']]
else
if(control_type == 'pca')
allowed_values <- allowed_matrix_compress[['pca']]
else
stop('check_matrix_control: unknown control type \'', control_type, '\'')
if(!(matrix_control[['matrix_compress']] %in% allowed_values)) {
error_string <- paste0('\nbad matrix_compress "', matrix_control[['matrix_compress']], '"\n')
}

# Is matrix_mode a valid value?
if(control_type == 'unrestricted')
allowed_values <- allowed_matrix_mode[['unrestricted']]
else
Expand All @@ -239,17 +273,21 @@ check_matrix_control <- function(matrix_control=list(), control_type=c('unrestri
error_string <- paste0('\ninvalid matrix_mode "', matrix_control[['matrix_mode']], '"')
}

# Check values related to matrix_mode = 'dir'.
if(matrix_control[['matrix_mode']] == 'dir') {
# Check matrix_path value.

# Is matrix_path a valid data type?
if(!(is.character(matrix_control[['matrix_path']]))) {
error_string <- paste0('\nbad matrix_path "', matrix_control[['matrix_path']], '"')
}

# Check matrix_buffer_size.
# Is matrix_buffer_size a valid data type?
if(!(is.integer(matrix_control[['matrix_buffer_size']]))) {
error_string <- paste0('\nmatrix_buffer_size must be an integer')
}
}

# Is matrix_bpcells_copy a valid data type?
if(!is.logical(matrix_control[['matrix_bpcells_copy']])) {
error_string <- paste0('\nmatrix_bpcells_copy value must be a logical type')
}
Expand Down Expand Up @@ -368,7 +406,6 @@ set_matrix_control <- function(matrix_control=list(), matrix_control_default=lis

if(matrix_control_out[['matrix_class']] == 'BPCells') {
matrix_control_out[['matrix_mode']] <- select_matrix_parameter_value(parameter='matrix_mode', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_mode)

if(matrix_control_out[['matrix_mode']] == 'mem') {
matrix_control_out[['matrix_type']] <- select_matrix_parameter_value(parameter='matrix_type', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_type)
matrix_control_out[['matrix_compress']] <- select_matrix_parameter_value(parameter='matrix_compress', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_compress)
Expand All @@ -382,6 +419,26 @@ set_matrix_control <- function(matrix_control=list(), matrix_control_default=lis
matrix_control_out[['matrix_buffer_size']] <- select_matrix_parameter_value(parameter='matrix_buffer_size', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_buffer_size)
matrix_control_out[['matrix_bpcells_copy']] <- select_matrix_parameter_value(parameter='matrix_bpcells_copy', matrix_control=matrix_control, matrix_control_default=matrix_control_default, default_value=default_matrix_bpcells_copy)
}

# Restrict matrix_control values for matrices used in
# intensive PCA calculations so set matrix_type to double for
# precision, set matrix_compress to FALSE for speed, and
# matrix_bpcells_copy to TRUE because we want a temporary
# matrix for the calculation, after which we remove it.
if(control_type == 'pca') {
if(matrix_control_out[['matrix_type']] == 'uint32_t') {
message('set_matrix_control: forcing matrix_type to \'double\' for PCA.')
matrix_control_out[['matrix_type']] <- 'double'
}
if(matrix_control_out[['matrix_compress']] == TRUE) {
message('set_matrix_control: forcing matrix_compress to \'FALSE\' for PCA.')
matrix_control_out[['matrix_compress']] <- FALSE
}
if(matrix_control_out[['matrix_bpcells_copy']] == FALSE) {
message('set_matrix_control: forcing matrix_bpcells_copy to \'TRUE\' for PCA.')
matrix_control_out[['matrix_bpcells_copy']] <- TRUE
}
}
}

check_matrix_control(matrix_control=matrix_control_out, control_type=control_type, check_conditional=TRUE)
Expand Down Expand Up @@ -526,11 +583,12 @@ get_matrix_class <- function(mat) {

# Get/infer the matrix information.
get_matrix_info <- function(mat) {
matrix_info <- get_matrix_class(mat=mat)
matrix_info <- tryCatch(get_matrix_class(mat=mat),
error=function(c) {stop(paste0(trimws(c),
'\n* error in get_matrix_info')) })

if(is.null(matrix_info[['matrix_class']])) {
message('bad matrix info -- dropping into browser')
browser()
stop('get_matrix_info: unable to infer matrix_class')
}

if(matrix_info[['matrix_class']] != 'BPCells') {
Expand Down
Loading

0 comments on commit 980ee33

Please sign in to comment.