From 05ea0964b1ea370ea3163c75f56808baede22910 Mon Sep 17 00:00:00 2001 From: jasenfinch Date: Tue, 2 Jul 2024 13:37:46 +0100 Subject: [PATCH] Reformat source files using styler. --- R/access.R | 126 ++++---- R/allClasses.R | 142 ++++----- R/allGenerics.R | 131 +++++---- R/binMetrics.R | 17 +- R/binParameters.R | 50 ++-- R/binneR.R | 70 ++--- R/binneRlyse.R | 135 +++++---- R/calc.R | 72 +++-- R/detect.R | 105 ++++--- R/get.R | 169 ++++++----- R/internals.R | 175 +++++------ R/plot.R | 575 +++++++++++++++++++------------------ R/readBinningParameters.R | 55 ++-- R/readFiles.R | 68 ++--- R/show-method.R | 103 ++++--- R/singleSample.R | 151 +++++----- R/spectralBinning-method.R | 380 ++++++++++++------------ 17 files changed, 1331 insertions(+), 1193 deletions(-) diff --git a/R/access.R b/R/access.R index 1f62926..e41c2d0 100644 --- a/R/access.R +++ b/R/access.R @@ -2,7 +2,7 @@ #' @rdname results #' @description Methods for accessing spectral binning results from the Binalysis S4 class. #' @param x S4 object of class Binalysis -#' @details +#' @details #' \itemize{ #' \item{version}{ - Extract package version used to create the object.} #' \item{creationDate}{ - Extract the creation date of the object.} @@ -15,78 +15,100 @@ #' @seealso \code{\link{Binalysis-class}}, \code{\link{binneRlyse}} #' @export -setMethod('version',signature = 'Binalysis', - function(x){ - x@version - }) +setMethod("version", + signature = "Binalysis", + function(x) { + x@version + } +) #' @rdname results #' @export -setMethod('creationDate',signature = 'Binalysis', - function(x){ - x@creation_date - }) +setMethod("creationDate", + signature = "Binalysis", + function(x) { + x@creation_date + } +) #' @rdname results #' @export -setMethod('filePaths',signature = 'Binalysis', - function(x){ - x@file_paths - }) +setMethod("filePaths", + signature = "Binalysis", + function(x) { + x@file_paths + } +) #' @rdname results #' @export -setMethod('sampleInfo',signature = 'Binalysis', - function(x){ - x@sample_info - }) +setMethod("sampleInfo", + signature = "Binalysis", + function(x) { + x@sample_info + } +) #' @rdname results #' @export -setMethod('binnedData',signature = 'Binalysis', - function(x){ - x@binned_data - }) - -setMethod('binnedData<-',signature = 'Binalysis', - function(x,value){ - x@binned_data <- value - return(x) - }) +setMethod("binnedData", + signature = "Binalysis", + function(x) { + x@binned_data + } +) + +setMethod("binnedData<-", + signature = "Binalysis", + function(x, value) { + x@binned_data <- value + return(x) + } +) #' @rdname results #' @export -setMethod('accurateData',signature = 'Binalysis', - function(x){ - x@accurate_mz - }) - -setMethod('accurateData<-',signature = 'Binalysis', - function(x,value){ - x@accurate_mz <- value - return(x) - }) +setMethod("accurateData", + signature = "Binalysis", + function(x) { + x@accurate_mz + } +) + +setMethod("accurateData<-", + signature = "Binalysis", + function(x, value) { + x@accurate_mz <- value + return(x) + } +) #' @rdname results #' @export -setMethod('binningParameters',signature = 'Binalysis', - function(x){ - as(x,'BinParameters') - }) - -setMethod('spectra',signature = 'Binalysis', - function(x){ - x@spectra - }) - -setMethod('spectra<-',signature = 'Binalysis', - function(x,value){ - x@spectra <- value - return(x) - }) \ No newline at end of file +setMethod("binningParameters", + signature = "Binalysis", + function(x) { + as(x, "BinParameters") + } +) + +setMethod("spectra", + signature = "Binalysis", + function(x) { + x@spectra + } +) + +setMethod("spectra<-", + signature = "Binalysis", + function(x, value) { + x@spectra <- value + return(x) + } +) diff --git a/R/allClasses.R b/R/allClasses.R index 4c917fd..c56d06a 100644 --- a/R/allClasses.R +++ b/R/allClasses.R @@ -1,96 +1,96 @@ #' Spectral binning parameters class #' @description An S4 class to store spectral binning parameters. #' @slot scans numeric vector containing the scan indexes to use for binning -#' @slot cls the column of class labels to use for aggregating accurate -#' mass data. Defaults to NULL where accurate mass data will be averaged +#' @slot cls the column of class labels to use for aggregating accurate +#' mass data. Defaults to NULL where accurate mass data will be averaged #' across all samples #' @seealso \code{\link{binParameters}} #' @export -setClass('BinParameters', - slots = list( - scans = 'numeric', - cls = 'character' - ), +setClass("BinParameters", + slots = list( + scans = "numeric", + cls = "character" + ), ) #' Spectral binning analysis class -#' @description An S4 class to store spectrally binned data and accurate +#' @description An S4 class to store spectrally binned data and accurate #' mass information. #' @slot version package version #' @slot creation_date creation date #' @slot file_paths file paths for raw data #' @slot sample_info tibble containing runinfo data -#' @slot binned_data list containing tibbles of spectrally binned data +#' @slot binned_data list containing tibbles of spectrally binned data #' for each acquisition mode #' @slot accurate_mz tibble containin accurate mass information -#' @slot spectra list containing tibbles of headers and class master +#' @slot spectra list containing tibbles of headers and class master #' mix fingerprints -#' @seealso \code{\link{binneRlyse}} +#' @seealso \code{\link{binneRlyse}} #' @export -setClass('Binalysis', - slots = list( - version = 'character', - creation_date = 'character', - file_paths = 'character', - sample_info = 'tbl_df', - binned_data = 'list', - accurate_mz = 'tbl_df', - spectra = 'list' - ), - contains = 'BinParameters', - prototype = list( - version = packageVersion('binneR') %>% - as.character(), - creation_date = date(), - sample_info = tibble( - fileOrder = character(), - injOrder = numeric(), - fileName = character(), - batch = numeric(), - block = numeric(), - name = character(), - class = character() - ), - accurate_mz = tibble() - ) +setClass("Binalysis", + slots = list( + version = "character", + creation_date = "character", + file_paths = "character", + sample_info = "tbl_df", + binned_data = "list", + accurate_mz = "tbl_df", + spectra = "list" + ), + contains = "BinParameters", + prototype = list( + version = packageVersion("binneR") %>% + as.character(), + creation_date = date(), + sample_info = tibble( + fileOrder = character(), + injOrder = numeric(), + fileName = character(), + batch = numeric(), + block = numeric(), + name = character(), + class = character() + ), + accurate_mz = tibble() + ) ) -setValidity('Binalysis',function(object){ - necessary_names <- c('fileOrder','injOrder','fileName','batch','block','name','class') - - info_names <- object %>% - sampleInfo() %>% - colnames() - - presence <- necessary_names %in% info_names - - if (FALSE %in% presence) { - str_c('Sample information should contain the following column names: ', - str_c(necessary_names,collapse = ', '), - '.') - } else { - TRUE - } +setValidity("Binalysis", function(object) { + necessary_names <- c("fileOrder", "injOrder", "fileName", "batch", "block", "name", "class") + + info_names <- object %>% + sampleInfo() %>% + colnames() + + presence <- necessary_names %in% info_names + + if (FALSE %in% presence) { + str_c( + "Sample information should contain the following column names: ", + str_c(necessary_names, collapse = ", "), + "." + ) + } else { + TRUE + } }) -setValidity('Binalysis',function(object){ - file_path_names <- object %>% - filePaths() %>% - basename() - - info_file_names <- object %>% - sampleInfo() %>% - .$fileName - - matching <- file_path_names == info_file_names - - if (FALSE %in% matching) { - 'File names in paths do not match file names in the sample information.' - } else { - TRUE - } - - +setValidity("Binalysis", function(object) { + file_path_names <- object %>% + filePaths() %>% + basename() + + info_file_names <- object %>% + sampleInfo() %>% + .$fileName + + matching <- file_path_names == info_file_names + + if (FALSE %in% matching) { + "File names in paths do not match file names in the sample information." + } else { + TRUE + } }) diff --git a/R/allGenerics.R b/R/allGenerics.R index b4e2612..acc80aa 100644 --- a/R/allGenerics.R +++ b/R/allGenerics.R @@ -1,92 +1,119 @@ +setGeneric("spectralBinning", function(x, verbose = TRUE) { + standardGeneric("spectralBinning") +}) -setGeneric("spectralBinning", function(x,verbose = TRUE) - standardGeneric("spectralBinning")) - -setGeneric("ss", function(x, verbose = TRUE) - standardGeneric("ss")) +setGeneric("ss", function(x, verbose = TRUE) { + standardGeneric("ss") +}) #' @rdname results -setGeneric("version", function(x) - standardGeneric("version")) +setGeneric("version", function(x) { + standardGeneric("version") +}) #' @rdname results -setGeneric("creationDate", function(x) - standardGeneric("creationDate")) +setGeneric("creationDate", function(x) { + standardGeneric("creationDate") +}) #' @rdname results -setGeneric("filePaths", function(x) - standardGeneric("filePaths")) +setGeneric("filePaths", function(x) { + standardGeneric("filePaths") +}) #' @rdname results -setGeneric("sampleInfo", function(x) - standardGeneric("sampleInfo")) +setGeneric("sampleInfo", function(x) { + standardGeneric("sampleInfo") +}) #' @rdname results -setGeneric("binnedData", function(x) - standardGeneric("binnedData")) +setGeneric("binnedData", function(x) { + standardGeneric("binnedData") +}) -setGeneric("binnedData<-", function(x,value) - standardGeneric("binnedData<-")) +setGeneric("binnedData<-", function(x, value) { + standardGeneric("binnedData<-") +}) #' @rdname results -setGeneric("accurateData", function(x) - standardGeneric("accurateData")) +setGeneric("accurateData", function(x) { + standardGeneric("accurateData") +}) -setGeneric("accurateData<-", function(x,value) - standardGeneric("accurateData<-")) +setGeneric("accurateData<-", function(x, value) { + standardGeneric("accurateData<-") +}) #' @rdname results -setGeneric("binningParameters", function(x) - standardGeneric("binningParameters")) +setGeneric("binningParameters", function(x) { + standardGeneric("binningParameters") +}) -setGeneric("spectra", function(x) - standardGeneric("spectra")) +setGeneric("spectra", function(x) { + standardGeneric("spectra") +}) -setGeneric("spectra<-", function(x,value) - standardGeneric("spectra<-")) +setGeneric("spectra<-", function(x, value) { + standardGeneric("spectra<-") +}) #' @rdname plotBin -setGeneric('plotBin', - function(x,bin, - type = c('all','cls','sample')) - standardGeneric('plotBin')) +setGeneric( + "plotBin", + function(x, bin, + type = c("all", "cls", "sample")) { + standardGeneric("plotBin") + } +) #' @rdname plotChromatogram -setGeneric('plotChromatogram',function(x) - standardGeneric('plotChromatogram')) +setGeneric("plotChromatogram", function(x) { + standardGeneric("plotChromatogram") +}) #' @rdname plotTIC -setGeneric('plotTIC', - function(x, by = 'injOrder', colour = 'block') - standardGeneric('plotTIC')) +setGeneric( + "plotTIC", + function(x, by = "injOrder", colour = "block") { + standardGeneric("plotTIC") + } +) #' @rdname plotFingerprint -setGeneric('plotFingerprint',function(x) - standardGeneric('plotFingerprint')) +setGeneric("plotFingerprint", function(x) { + standardGeneric("plotFingerprint") +}) #' @rdname parameters -setGeneric('scans',function(x) - standardGeneric('scans')) +setGeneric("scans", function(x) { + standardGeneric("scans") +}) #' @rdname parameters -setGeneric('scans<-',function(x,value) - standardGeneric('scans<-')) +setGeneric("scans<-", function(x, value) { + standardGeneric("scans<-") +}) #' @rdname parameters -setGeneric('cls',function(x) - standardGeneric('cls')) +setGeneric("cls", function(x) { + standardGeneric("cls") +}) #' @rdname parameters -setGeneric('cls<-',function(x,value) - standardGeneric('cls<-')) +setGeneric("cls<-", function(x, value) { + standardGeneric("cls<-") +}) #' @rdname plotPurity -setGeneric('plotPurity',function(x,histBins = 30) - standardGeneric('plotPurity')) +setGeneric("plotPurity", function(x, histBins = 30) { + standardGeneric("plotPurity") +}) #' @rdname plotCentrality -setGeneric('plotCentrality',function(x,histBins = 30) - standardGeneric('plotCentrality')) +setGeneric("plotCentrality", function(x, histBins = 30) { + standardGeneric("plotCentrality") +}) -setGeneric('nScans',function(x) - standardGeneric('nScans')) +setGeneric("nScans", function(x) { + standardGeneric("nScans") +}) diff --git a/R/binMetrics.R b/R/binMetrics.R index 7dfdf77..3fb8f88 100644 --- a/R/binMetrics.R +++ b/R/binMetrics.R @@ -1,16 +1,15 @@ - -binMean <- function(mz,intensity) { - sum(mz * intensity)/sum(intensity) +binMean <- function(mz, intensity) { + sum(mz * intensity) / sum(intensity) } -binMAE <- function(mz,intensity) { - sum(abs(mz - binMean(mz,intensity)) * intensity) / sum(intensity) +binMAE <- function(mz, intensity) { + sum(abs(mz - binMean(mz, intensity)) * intensity) / sum(intensity) } -binPurity <- function(mz,intensity,dp = 2){ - 1 - binMAE(mz,intensity) / (1*10^-dp/2) +binPurity <- function(mz, intensity, dp = 2) { + 1 - binMAE(mz, intensity) / (1 * 10^-dp / 2) } -binCentrality <- function(mz,intensity,dp = 2){ - 1 - abs(binMean(mz,intensity) - round(mz[1],dp))/(1*10^-dp/2) +binCentrality <- function(mz, intensity, dp = 2) { + 1 - abs(binMean(mz, intensity) - round(mz[1], dp)) / (1 * 10^-dp / 2) } diff --git a/R/binParameters.R b/R/binParameters.R index c90d609..776e9be 100644 --- a/R/binParameters.R +++ b/R/binParameters.R @@ -1,25 +1,25 @@ #' Set spectral binning parameters #' @description Selection of parameters to use for spectral binning. #' @param scans numeric vector containing the scan indexes to use for binning -#' @param cls the column of class labels to use for aggregating accurate -#' mass data. Defaults to NULL where accurate mass data will be averaged +#' @param cls the column of class labels to use for aggregating accurate +#' mass data. Defaults to NULL where accurate mass data will be averaged #' accross all samples #' @return S4 object of class BinParameters -#' @examples +#' @examples #' p <- binParameters(scans = 6:17) -#' @seealso \code{\link{BinParameters-class}}, \code{\link{scans}}, +#' @seealso \code{\link{BinParameters-class}}, \code{\link{scans}}, #' \code{\link{cls}} #' @importFrom parallel detectCores #' @export -binParameters <- function(scans = 5:12, - cls = character()){ - p <- new('BinParameters', - scans = scans, - cls = cls - ) - - return(p) +binParameters <- function(scans = 5:12, + cls = character()) { + p <- new("BinParameters", + scans = scans, + cls = cls + ) + + return(p) } @@ -31,30 +31,32 @@ binParameters <- function(scans = 5:12, #' @seealso \code{\link{BinParameters-class}}, \code{\link{binParameters}} #' @export -setMethod('scans',signature = 'BinParameters',function(x){ - x@scans +setMethod("scans", signature = "BinParameters", function(x) { + x@scans }) #' @rdname parameters #' @export -setMethod('scans<-',signature = 'BinParameters', - function(x,value){ - x@scans <- value - return(x) -}) +setMethod("scans<-", + signature = "BinParameters", + function(x, value) { + x@scans <- value + return(x) + } +) #' @rdname parameters #' @export -setMethod('cls',signature = 'BinParameters',function(x){ - x@cls +setMethod("cls", signature = "BinParameters", function(x) { + x@cls }) #' @rdname parameters #' @export -setMethod('cls<-',signature = 'BinParameters',function(x,value){ - x@cls <- value - return(x) +setMethod("cls<-", signature = "BinParameters", function(x, value) { + x@cls <- value + return(x) }) diff --git a/R/binneR.R b/R/binneR.R index 8064e40..7a1fa1e 100644 --- a/R/binneR.R +++ b/R/binneR.R @@ -1,36 +1,36 @@ - -globalVariables(c('.', - 'bin', - 'Class', - 'count', - 'File', - 'Mode', - 'Scan', - 'intensity', - 'mz', - 'n', - 'Sample', - 'seqNum', - 'acquisitionNum', - 'polarity', - 'totIonCurrent', - 'fileName', - 'Colour', - 'Index', - 'LowerOut', - 'Median', - 'Q1', - 'Q3', - 'UpperOut', - 'TIC', - 'Feature', - 'Intensity', - 'm/z', - 'filterString', - 'purity', - 'centrality', - 'Purity', - 'Centrality', - 'FileName', - 'idx' +globalVariables(c( + ".", + "bin", + "Class", + "count", + "File", + "Mode", + "Scan", + "intensity", + "mz", + "n", + "Sample", + "seqNum", + "acquisitionNum", + "polarity", + "totIonCurrent", + "fileName", + "Colour", + "Index", + "LowerOut", + "Median", + "Q1", + "Q3", + "UpperOut", + "TIC", + "Feature", + "Intensity", + "m/z", + "filterString", + "purity", + "centrality", + "Purity", + "Centrality", + "FileName", + "idx" )) diff --git a/R/binneRlyse.R b/R/binneRlyse.R index 40265fb..8b15602 100644 --- a/R/binneRlyse.R +++ b/R/binneRlyse.R @@ -2,36 +2,37 @@ #' @description perform spectral binning. #' @param files character vector of file paths to use for spectral binning #' @param info tibble containing sample information -#' @param parameters object of class BinParameters containing parameters +#' @param parameters object of class BinParameters containing parameters #' for spectral binning #' @param verbose show console output #' @return S4 object of class Binalysis. -#' @details -#' Parallel processing is managed by the \code{future} package. This can -#' be specified using the \code{plan() function}. See the example below +#' @details +#' Parallel processing is managed by the \code{future} package. This can +#' be specified using the \code{plan() function}. See the example below #' and \code{?future::plan} for details on how this can be specified. -#' -#' By default, spectral binning is performed at the recommended 2 decimal -#' places. This can be altered by setting either the global option +#' +#' By default, spectral binning is performed at the recommended 2 decimal +#' places. This can be altered by setting either the global option #' \code{binner_dp} or the environment variable \code{BINNER_DP}. -#' -#' @seealso \code{\link{Binalysis-class}}, \code{\link{binParameters}}, +#' +#' @seealso \code{\link{Binalysis-class}}, \code{\link{binParameters}}, #' \code{\link{sampleInfo}}, \code{\link{binnedData}}, \code{\link{accurateData}} -#' @examples +#' @examples #' \dontrun{ -#' files <- metaboData::filePaths('FIE-HRMS','BdistachyonTechnical') -#' -#' info <- metaboData::runinfo('FIE-HRMS','BdistachyonTechnical') -#' +#' files <- metaboData::filePaths("FIE-HRMS", "BdistachyonTechnical") +#' +#' info <- metaboData::runinfo("FIE-HRMS", "BdistachyonTechnical") +#' #' parameters <- detectParameters(files) -#' cls(parameters) <- 'class' -#' +#' cls(parameters) <- "class" +#' #' ## Optionally declare parallel processing backend #' # plan(future::multisession,workers = 2) -#' -#' analysis <- binneRlyse(files, -#' info, -#' parameters = parameters) +#' +#' analysis <- binneRlyse(files, +#' info, +#' parameters = parameters +#' ) #' } #' @importFrom dplyr ungroup n #' @importFrom magrittr %>% @@ -41,46 +42,56 @@ #' @importFrom lubridate seconds_to_period #' @export -binneRlyse <- function(files, - info, - parameters = binParameters(), - verbose = TRUE){ - - x <- new('Binalysis', - parameters, - creation_date = date(), - file_paths = files, - sample_info = info) - - if (verbose == TRUE) { - startTime <- proc.time() - message(str_c(blue('binneR'), - red(str_c('v', - version(x))), - creationDate(x), - sep = ' ')) - message(str_c(str_c(rep('_',console_width()),collapse = ''),sep = '')) - params <- parameters %>% - {capture.output(print(.))} %>% - {.[-1]} %>% - str_c(collapse = '\n') - message(params) - message(str_c(str_c(rep('_',console_width()),collapse = ''),'\n',sep = '')) - } - - x <- x %>% - spectralBinning(verbose = verbose) - - if (verbose == TRUE) { - message() - endTime <- proc.time() - ellapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - message(str_c(green('Completed! '),ellapsed,sep = '')) - } - - return(x) +binneRlyse <- function(files, + info, + parameters = binParameters(), + verbose = TRUE) { + x <- new("Binalysis", + parameters, + creation_date = date(), + file_paths = files, + sample_info = info + ) + + if (verbose == TRUE) { + startTime <- proc.time() + message(str_c(blue("binneR"), + red(str_c( + "v", + version(x) + )), + creationDate(x), + sep = " " + )) + message(str_c(str_c(rep("_", console_width()), collapse = ""), sep = "")) + params <- parameters %>% + { + capture.output(print(.)) + } %>% + { + .[-1] + } %>% + str_c(collapse = "\n") + message(params) + message(str_c(str_c(rep("_", console_width()), collapse = ""), "\n", sep = "")) + } + + x <- x %>% + spectralBinning(verbose = verbose) + + if (verbose == TRUE) { + message() + endTime <- proc.time() + ellapsed <- + { + endTime - startTime + } %>% + .[3] %>% + round(1) %>% + seconds_to_period() %>% + str_c("[", ., "]") + message(str_c(green("Completed! "), ellapsed, sep = "")) + } + + return(x) } diff --git a/R/calc.R b/R/calc.R index 30cf3ec..0eacbce 100644 --- a/R/calc.R +++ b/R/calc.R @@ -1,41 +1,51 @@ - -calcBinList <- function(pks){ +calcBinList <- function(pks) { bins <- pks %>% - group_by(idx,fileName,polarity,scan,bin) %>% - summarise(intensity = sum(intensity), - .groups = 'drop') %>% - group_by(polarity,bin) %>% - summarise(count = n(), - .groups = 'drop') %>% + group_by(idx, fileName, polarity, scan, bin) %>% + summarise( + intensity = sum(intensity), + .groups = "drop" + ) %>% + group_by(polarity, bin) %>% + summarise( + count = n(), + .groups = "drop" + ) %>% filter(count > 1) %>% select(-count) } -calcBinMeasures <- function(pks,cls){ - +calcBinMeasures <- function(pks, cls) { dp <- binnerDP() - + binMeasures <- pks %>% - group_by_at(vars(all_of(c('idx', - 'fileName', - cls, - 'polarity', - 'bin')))) %>% - summarise(purity = binPurity(mz, - intensity, - dp = dp), - centrality = binCentrality(mz, - intensity, - dp = dp), - .groups = 'drop') - + group_by_at(vars(all_of(c( + "idx", + "fileName", + cls, + "polarity", + "bin" + )))) %>% + summarise( + purity = binPurity(mz, + intensity, + dp = dp + ), + centrality = binCentrality(mz, + intensity, + dp = dp + ), + .groups = "drop" + ) + return(binMeasures) } -setMethod('nScans',signature = 'Binalysis', - function(x){ - x %>% - scans() %>% - unique() %>% - length() - }) +setMethod("nScans", + signature = "Binalysis", + function(x) { + x %>% + scans() %>% + unique() %>% + length() + } +) diff --git a/R/detect.R b/R/detect.R index 49a3204..6b35b91 100644 --- a/R/detect.R +++ b/R/detect.R @@ -1,75 +1,74 @@ #' Detect infusion profile scan range #' @description Detect infusion scans for a set of FIE-MS infusion profiles. #' @param files character vector of file paths to use -#' @param thresh detection threshold as a proportion of the peak of the +#' @param thresh detection threshold as a proportion of the peak of the #' infusion profile #' @return Numeric vector of detected infusion scans. #' @seealso \code{\link{detectParameters}} -#' @examples -#' file_paths <- system.file('example-data/1.mzML.gz',package = 'binneR') -#' +#' @examples +#' file_paths <- system.file("example-data/1.mzML.gz", package = "binneR") +#' #' detectInfusionScans(file_paths) #' @importFrom mzR openMSfile header #' @importFrom dplyr group_by summarise #' @export -detectInfusionScans <- function(files, - thresh = 0.5){ - - idx <- tibble( - Sample = files - ) %>% - rowid_to_column(var = 'idx') - - ms <- files %>% - future_map(~{ - ms <- .x %>% - openMSfile() - - file_header <- ms %>% - header() %>% - as_tibble() - - return(file_header) - }) %>% - set_names(idx$idx) %>% - bind_rows(.id = 'idx') %>% - mutate( - idx = as.numeric(idx) - ) %>% - left_join(idx, - by = 'idx', - relationship = 'many-to-many') - - hd <- ms %>% - select(idx,seqNum,acquisitionNum,polarity,totIonCurrent,filterString) %>% - group_by(idx,polarity,filterString) %>% - mutate(acquisitionNum = seq_len(n())) %>% - group_by(acquisitionNum) %>% - summarise(totIonCurrent = mean(totIonCurrent)) - - mTIC <- hd$totIonCurrent %>% - max() - - TICthresh <- mTIC * thresh - scans <- hd$acquisitionNum[hd$totIonCurrent > TICthresh] - return(min(scans):max(scans)) +detectInfusionScans <- function(files, + thresh = 0.5) { + idx <- tibble( + Sample = files + ) %>% + rowid_to_column(var = "idx") + + ms <- files %>% + future_map(~ { + ms <- .x %>% + openMSfile() + + file_header <- ms %>% + header() %>% + as_tibble() + + return(file_header) + }) %>% + set_names(idx$idx) %>% + bind_rows(.id = "idx") %>% + mutate( + idx = as.numeric(idx) + ) %>% + left_join(idx, + by = "idx", + relationship = "many-to-many" + ) + + hd <- ms %>% + select(idx, seqNum, acquisitionNum, polarity, totIonCurrent, filterString) %>% + group_by(idx, polarity, filterString) %>% + mutate(acquisitionNum = seq_len(n())) %>% + group_by(acquisitionNum) %>% + summarise(totIonCurrent = mean(totIonCurrent)) + + mTIC <- hd$totIonCurrent %>% + max() + + TICthresh <- mTIC * thresh + scans <- hd$acquisitionNum[hd$totIonCurrent > TICthresh] + return(min(scans):max(scans)) } #' Detect suitable spectral binning parameters #' @description Detect binning parameters from a given list of file paths. #' @param files character vector of file paths #' @return S4 object of class BinParameters -#' @examples -#' file_paths <-system.file('example-data/1.mzML.gz',package = 'binneR') +#' @examples +#' file_paths <- system.file("example-data/1.mzML.gz", package = "binneR") #' parameters <- detectParameters(file_paths) #' @seealso \code{\link{BinParameters-class}}, \code{\link{binParameters}} #' @export -detectParameters <- function(files){ - - scans <- detectInfusionScans(files) - - bp <- binParameters(scans = scans) - return(bp) +detectParameters <- function(files) { + scans <- detectInfusionScans(files) + + bp <- binParameters(scans = scans) + return(bp) } diff --git a/R/get.R b/R/get.R index 100f234..2ac7d1d 100644 --- a/R/get.R +++ b/R/get.R @@ -2,14 +2,13 @@ #' @importFrom dplyr bind_rows group_by summarise #' @importFrom magrittr %>% -sampProcess <- function(file,scans,dp){ - - pl <- getFile(file,scans) %>% - mutate(mz = round(mz,dp)) %>% - group_by(polarity,mz) %>% - summarise(intensity = sum(intensity)/length(scans)) - - return(pl) +sampProcess <- function(file, scans, dp) { + pl <- getFile(file, scans) %>% + mutate(mz = round(mz, dp)) %>% + group_by(polarity, mz) %>% + summarise(intensity = sum(intensity) / length(scans)) + + return(pl) } #' @importFrom mzR openMSfile peaks close @@ -18,90 +17,90 @@ sampProcess <- function(file,scans,dp){ #' @importFrom tibble as_tibble #' @importFrom magrittr set_colnames set_names -getFile <- function(file,scans){ - - ms <- openMSfile(file,backend = 'pwiz') - - hd <- header(ms) - - headerTemp(file,hd) - - hd <- hd %>% - select(seqNum,polarity,filterString) %>% - group_by(polarity,filterString) %>% - mutate(scan = 1:dplyr::n()) %>% - filter(scan %in% scans) - - hd$polarity[hd$polarity == 0] <- 'n' - hd$polarity[hd$polarity == 1] <- 'p' - - file_peaks <- ms %>% - peaks() %>% - .[hd$seqNum] %>% - map(~{ - d <- . - d %>% - set_colnames(c('mz','intensity')) %>% - as_tibble() %>% - filter(intensity > 0) - }) %>% - set_names(hd$seqNum) %>% - bind_rows(.id = 'seqNum') %>% - mutate(seqNum = as.numeric(seqNum)) %>% - left_join(hd, by = "seqNum") %>% - select(-filterString,-seqNum) - - return(file_peaks) +getFile <- function(file, scans) { + ms <- openMSfile(file, backend = "pwiz") + + hd <- header(ms) + + headerTemp(file, hd) + + hd <- hd %>% + select(seqNum, polarity, filterString) %>% + group_by(polarity, filterString) %>% + mutate(scan = 1:dplyr::n()) %>% + filter(scan %in% scans) + + hd$polarity[hd$polarity == 0] <- "n" + hd$polarity[hd$polarity == 1] <- "p" + + file_peaks <- ms %>% + peaks() %>% + .[hd$seqNum] %>% + map(~ { + d <- . + d %>% + set_colnames(c("mz", "intensity")) %>% + as_tibble() %>% + filter(intensity > 0) + }) %>% + set_names(hd$seqNum) %>% + bind_rows(.id = "seqNum") %>% + mutate(seqNum = as.numeric(seqNum)) %>% + left_join(hd, by = "seqNum") %>% + select(-filterString, -seqNum) + + return(file_peaks) } #' @importFrom parallel makeCluster stopCluster parLapply clusterExport #' @importFrom dplyr mutate -getPeaks <- function(files,scans){ - - idx <- tibble( - fileName = files - ) %>% - rowid_to_column(var = 'idx') - - dp <- binnerDP() - - pks <- future_map(files,getFile,scans = scans) %>% - set_names(idx$idx) %>% - bind_rows(.id = 'idx') %>% - mutate(idx = as.numeric(idx)) %>% - left_join(idx, - by = 'idx') %>% - mutate(fileName = basename(fileName), - mz = round(mz,5),bin = round(mz,dp)) - return(pks) +getPeaks <- function(files, scans) { + idx <- tibble( + fileName = files + ) %>% + rowid_to_column(var = "idx") + + dp <- binnerDP() + + pks <- future_map(files, getFile, scans = scans) %>% + set_names(idx$idx) %>% + bind_rows(.id = "idx") %>% + mutate(idx = as.numeric(idx)) %>% + left_join(idx, + by = "idx" + ) %>% + mutate( + fileName = basename(fileName), + mz = round(mz, 5), bin = round(mz, dp) + ) + return(pks) } #' @importFrom mzR header -getHeaders <- function(files){ - - idx <- tibble( - FileName = files - ) %>% - rowid_to_column(var = 'idx') - - available_header_temps <- availableHeaderTemps(files) - - file_headers <- available_header_temps %>% - future_map(readRDS) - - file_headers <- file_headers %>% - set_names(idx$idx) %>% - bind_rows(.id = 'idx') %>% - mutate( - idx = as.numeric(idx) - ) %>% - left_join(idx, - by = 'idx') %>% - select(idx,FileName,acquisitionNum,totIonCurrent,polarity,filterString) %>% - as_tibble() - - return(file_headers) -} +getHeaders <- function(files) { + idx <- tibble( + FileName = files + ) %>% + rowid_to_column(var = "idx") + + available_header_temps <- availableHeaderTemps(files) + file_headers <- available_header_temps %>% + future_map(readRDS) + + file_headers <- file_headers %>% + set_names(idx$idx) %>% + bind_rows(.id = "idx") %>% + mutate( + idx = as.numeric(idx) + ) %>% + left_join(idx, + by = "idx" + ) %>% + select(idx, FileName, acquisitionNum, totIonCurrent, polarity, filterString) %>% + as_tibble() + + return(file_headers) +} diff --git a/R/internals.R b/R/internals.R index 2ffa04c..e0bb002 100644 --- a/R/internals.R +++ b/R/internals.R @@ -1,96 +1,103 @@ +binnerDPenv <- function() { + dp <- Sys.getenv("BINNER_DP") -binnerDPenv <- function(){ - dp <- Sys.getenv('BINNER_DP') - - if (dp != '') { - dp <- suppressWarnings(as.numeric(dp)) - } else { - dp <- 2 - } - - if (is.na(dp)){ - warning("The environment variable 'BINNER_DP' is not numeric. Using 2 decimal places instead.", - call. = FALSE) - dp <- 2 - } - - if (dp > 5){ - warning("The environment variable 'BINNER_DP' is greater than 5. Using 2 decimal places instead.", - call. = FALSE) - dp <- 2 - } - - return(dp) + if (dp != "") { + dp <- suppressWarnings(as.numeric(dp)) + } else { + dp <- 2 + } + + if (is.na(dp)) { + warning("The environment variable 'BINNER_DP' is not numeric. Using 2 decimal places instead.", + call. = FALSE + ) + dp <- 2 + } + + if (dp > 5) { + warning("The environment variable 'BINNER_DP' is greater than 5. Using 2 decimal places instead.", + call. = FALSE + ) + dp <- 2 + } + + return(dp) } -binnerDPopt <- function(){ - dp <- options()$binner_dp - - if (is.null(dp)){ - dp <- 2 - } - - if (!is.numeric(dp)){ - warning("The global option 'binner_dp' is not numeric. Using 2 decimal places instead.", - call. = FALSE) - dp <- 2 - } - - if (dp > 5){ - warning("The global option 'binner_dp' is greater than 5. Using 2 decimal places instead.", - call. = FALSE) - dp <- 2 - } - - return(dp) +binnerDPopt <- function() { + dp <- options()$binner_dp + + if (is.null(dp)) { + dp <- 2 + } + + if (!is.numeric(dp)) { + warning("The global option 'binner_dp' is not numeric. Using 2 decimal places instead.", + call. = FALSE + ) + dp <- 2 + } + + if (dp > 5) { + warning("The global option 'binner_dp' is greater than 5. Using 2 decimal places instead.", + call. = FALSE + ) + dp <- 2 + } + + return(dp) } -binnerDP <- function(){ - - dp_env <- binnerDPenv() - - dp_opt <- binnerDPopt() - - dp <- c(dp_opt, - dp_env) %>% - unique() - - if (length(dp) > 1) { - warning("The environment variable 'BINNER_DP' and global option 'binner_dp' are differentially set. Using the value of 'binner_dp'.", - call. = FALSE) - - dp <- dp[1] - } - - return(dp) +binnerDP <- function() { + dp_env <- binnerDPenv() + + dp_opt <- binnerDPopt() + + dp <- c( + dp_opt, + dp_env + ) %>% + unique() + + if (length(dp) > 1) { + warning("The environment variable 'BINNER_DP' and global option 'binner_dp' are differentially set. Using the value of 'binner_dp'.", + call. = FALSE + ) + + dp <- dp[1] + } + + return(dp) } #' @importFrom tools file_path_sans_ext -headerTemp <- function(file,header_table){ - temp_dir <- paste0(tempdir(),'/binneR-headers') - - if (!dir.exists(temp_dir)){ - dir.create(temp_dir) - } - - file_name <- file %>% - basename() %>% - file_path_sans_ext(compression = TRUE) - - temp_file <- paste0(temp_dir,'/',file_name,'.rds') - saveRDS(header_table,temp_file) +headerTemp <- function(file, header_table) { + temp_dir <- paste0(tempdir(), "/binneR-headers") + + if (!dir.exists(temp_dir)) { + dir.create(temp_dir) + } + + file_name <- file %>% + basename() %>% + file_path_sans_ext(compression = TRUE) + + temp_file <- paste0(temp_dir, "/", file_name, ".rds") + saveRDS(header_table, temp_file) } -availableHeaderTemps <- function(files){ - temp_dir <- paste0(tempdir(),'/binneR-headers') - - temp_files <- files %>% - basename() %>% - file_path_sans_ext(compression = TRUE) %>% - {paste0(temp_dir,'/',.,'.rds')} - - temp_files <- temp_files[file.exists(temp_files)] - - return(temp_files) +availableHeaderTemps <- function(files) { + temp_dir <- paste0(tempdir(), "/binneR-headers") + + temp_files <- files %>% + basename() %>% + file_path_sans_ext(compression = TRUE) %>% + { + paste0(temp_dir, "/", ., ".rds") + } + + temp_files <- temp_files[file.exists(temp_files)] + + return(temp_files) } diff --git a/R/plot.R b/R/plot.R index 49ad45e..05021c6 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,46 +1,52 @@ #' @importFrom ggplot2 theme_bw element_blank element_line element_text -plotTheme <- function(){ - theme_bw() + - theme(plot.title = element_text(face = 'bold',hjust = 0.5), - plot.caption = element_text(hjust = 0), - panel.border = element_blank(), - panel.grid = element_blank(), - axis.title = element_text(face = 'bold'), - axis.line = element_line(), - legend.title = element_text(face = 'bold'), - strip.background = element_blank(), - strip.text = element_text(face = 'bold')) +plotTheme <- function() { + theme_bw() + + theme( + plot.title = element_text(face = "bold", hjust = 0.5), + plot.caption = element_text(hjust = 0), + panel.border = element_blank(), + panel.grid = element_blank(), + axis.title = element_text(face = "bold"), + axis.line = element_line(), + legend.title = element_text(face = "bold"), + strip.background = element_blank(), + strip.text = element_text(face = "bold") + ) } -binPlot <- function(dat,bin,m,dp,type,cls){ - pl <- ggplot(dat,aes(x = mz)) + - geom_density() + - xlim(m - 5 * 10^-(dp + 1), - m + 5 * 10^-(dp + 1)) + - plotTheme() + - scale_y_continuous(expand = c(0,0)) + - labs(title = bin, - x = 'm/z', - y = 'Density') - - if (type == 'cls') { - - if (length(cls) == 0) { - stop('No "cls" parameter found for this Binalysis class object.', - call. = FALSE) - } - - pl <- pl + - facet_wrap(as.formula(paste("~", cls))) - } - - if (type == 'sample') { - pl <- pl + - facet_wrap(~fileName) - } - - return(pl) +binPlot <- function(dat, bin, m, dp, type, cls) { + pl <- ggplot(dat, aes(x = mz)) + + geom_density() + + xlim( + m - 5 * 10^-(dp + 1), + m + 5 * 10^-(dp + 1) + ) + + plotTheme() + + scale_y_continuous(expand = c(0, 0)) + + labs( + title = bin, + x = "m/z", + y = "Density" + ) + + if (type == "cls") { + if (length(cls) == 0) { + stop('No "cls" parameter found for this Binalysis class object.', + call. = FALSE + ) + } + + pl <- pl + + facet_wrap(as.formula(paste("~", cls))) + } + + if (type == "sample") { + pl <- pl + + facet_wrap(~fileName) + } + + return(pl) } #' Plot a spectral bin feature @@ -48,7 +54,7 @@ binPlot <- function(dat,bin,m,dp,type,cls){ #' @description Kernal density plot of a specified spectral bin feature. #' @param x S4 object of class Binalysis #' @param bin 0.01amu bin to plot -#' @param type bin to be plotted as a total (all), class (cls) or +#' @param type bin to be plotted as a total (all), class (cls) or #' sample spectra. #' @seealso \code{\link{accurateData}}, \code{\link{binneRlyse}} #' @importFrom ggplot2 ggplot geom_density xlim xlab ggtitle theme @@ -57,144 +63,150 @@ binPlot <- function(dat,bin,m,dp,type,cls){ #' @importFrom stats as.formula #' @export -setMethod('plotBin',signature = 'Binalysis', - function(x,bin,type = c('all','cls','sample')){ - - type <- match.arg(type,c('all','cls','sample')) - - m <- bin %>% - str_replace_all('[:alpha:]','') %>% - as.numeric() - - mode <- bin %>% - str_sub(1,1) - - dat <- x %>% - spectra() %>% - .$fingerprints %>% - filter(polarity == mode & bin == m) - - if (nrow(dat) == 0) { - stop('Bin not found.',call. = FALSE) - } - - dp <- str_extract(bin,'(?<=[.])[\\w+.-]+') %>% - nchar() - - class <- cls(x) - - binPlot(dat,bin,m,dp,type,class) - } +setMethod("plotBin", + signature = "Binalysis", + function(x, bin, type = c("all", "cls", "sample")) { + type <- match.arg(type, c("all", "cls", "sample")) + + m <- bin %>% + str_replace_all("[:alpha:]", "") %>% + as.numeric() + + mode <- bin %>% + str_sub(1, 1) + + dat <- x %>% + spectra() %>% + .$fingerprints %>% + filter(polarity == mode & bin == m) + + if (nrow(dat) == 0) { + stop("Bin not found.", call. = FALSE) + } + + dp <- str_extract(bin, "(?<=[.])[\\w+.-]+") %>% + nchar() + + class <- cls(x) + + binPlot(dat, bin, m, dp, type, class) + } ) -plotChrom <- function(chromatograms,scans){ - pl <- ggplot(chromatograms, - aes(x = acquisitionNum,y = totIonCurrent)) + - geom_line() + - plotTheme() + - scale_y_continuous(expand = c(0,0)) + - labs(title = 'TIC chromatograms of infusion profile') + - facet_wrap(~polarity, - scales = 'free', - ncol = 1) + - xlab('Scan') + - ylab('Total Ion Current') - - if (length(scans) > 0) { - pl <- pl + - labs(caption = 'Red lines indcate scan range used for spectral binning.') + - geom_vline(xintercept = min(scans),colour = 'red',linetype = 2) + - geom_vline(xintercept = max(scans),colour = 'red',linetype = 2) - } - - return(pl) +plotChrom <- function(chromatograms, scans) { + pl <- ggplot( + chromatograms, + aes(x = acquisitionNum, y = totIonCurrent) + ) + + geom_line() + + plotTheme() + + scale_y_continuous(expand = c(0, 0)) + + labs(title = "TIC chromatograms of infusion profile") + + facet_wrap(~polarity, + scales = "free", + ncol = 1 + ) + + xlab("Scan") + + ylab("Total Ion Current") + + if (length(scans) > 0) { + pl <- pl + + labs(caption = "Red lines indcate scan range used for spectral binning.") + + geom_vline(xintercept = min(scans), colour = "red", linetype = 2) + + geom_vline(xintercept = max(scans), colour = "red", linetype = 2) + } + + return(pl) } #' Plot an infusion profile chromatogram #' @rdname plotChromatogram -#' @description Plot an averaged infusion profile chromatogram from a -#' Binalysis object. +#' @description Plot an averaged infusion profile chromatogram from a +#' Binalysis object. #' @param x S4 object of class \code{Binalysis} #' @seealso \code{\link{binneRlyse}} #' @importFrom ggplot2 geom_vline geom_line labs ylab #' @export -setMethod('plotChromatogram',signature = 'Binalysis', - function(x){ - - chromatograms <- x %>% - spectra() %>% - .$headers - scans <- x %>% - scans() - chromatograms <- chromatograms %>% - select(idx, - FileName, - acquisitionNum, - totIonCurrent, - polarity, - filterString) %>% - group_by(polarity,idx,filterString) %>% - mutate(acquisitionNum = seq_len(n())) %>% - group_by(polarity,acquisitionNum) %>% - summarise(totIonCurrent = mean(totIonCurrent)) - - chromatograms$polarity[chromatograms$polarity == 0] <- 'Negative mode' - chromatograms$polarity[chromatograms$polarity == 1] <- 'Positive mode' - - chromatograms %>% - plotChrom(scans) - } +setMethod("plotChromatogram", + signature = "Binalysis", + function(x) { + chromatograms <- x %>% + spectra() %>% + .$headers + scans <- x %>% + scans() + chromatograms <- chromatograms %>% + select( + idx, + FileName, + acquisitionNum, + totIonCurrent, + polarity, + filterString + ) %>% + group_by(polarity, idx, filterString) %>% + mutate(acquisitionNum = seq_len(n())) %>% + group_by(polarity, acquisitionNum) %>% + summarise(totIonCurrent = mean(totIonCurrent)) + + chromatograms$polarity[chromatograms$polarity == 0] <- "Negative mode" + chromatograms$polarity[chromatograms$polarity == 1] <- "Positive mode" + + chromatograms %>% + plotChrom(scans) + } ) #' Plot an infusion profile chromatogram from a file -#' @description Plot and averaged infusion profile from a vector of specified +#' @description Plot and averaged infusion profile from a vector of specified #' file paths. #' @param files character vector of file paths to use #' @param scans specify scans to highlight within the plot -#' @examples -#' file_paths <- system.file('example-data/1.mzML.gz',package = 'binneR') -#' -#' plotChromFromFile(file_paths, -#' scans = detectInfusionScans(file_paths)) +#' @examples +#' file_paths <- system.file("example-data/1.mzML.gz", package = "binneR") +#' +#' plotChromFromFile(file_paths, +#' scans = detectInfusionScans(file_paths) +#' ) #' @export -plotChromFromFile <- function(files, scans = c()){ - - chromatograms <- files %>% - map(~{ - openMSfile(.,backend = 'pwiz') %>% - header() %>% - select(acquisitionNum,totIonCurrent,polarity,filterString) %>% - group_by(polarity,filterString) %>% - mutate(acquisitionNum = seq_len(n())) %>% - group_by(polarity,acquisitionNum) %>% - summarise(totIonCurrent = mean(totIonCurrent)) - }) %>% - bind_rows(.id = 'FileName') %>% - group_by(polarity,acquisitionNum) %>% - summarise(totIonCurrent = mean(totIonCurrent)) %>% - as_tibble() - - chromatograms$polarity[chromatograms$polarity == 0] <- 'Negative mode' - chromatograms$polarity[chromatograms$polarity == 1] <- 'Positive mode' - - chromatograms %>% - plotChrom(scans) - +plotChromFromFile <- function(files, scans = c()) { + chromatograms <- files %>% + map(~ { + openMSfile(., backend = "pwiz") %>% + header() %>% + select(acquisitionNum, totIonCurrent, polarity, filterString) %>% + group_by(polarity, filterString) %>% + mutate(acquisitionNum = seq_len(n())) %>% + group_by(polarity, acquisitionNum) %>% + summarise(totIonCurrent = mean(totIonCurrent)) + }) %>% + bind_rows(.id = "FileName") %>% + group_by(polarity, acquisitionNum) %>% + summarise(totIonCurrent = mean(totIonCurrent)) %>% + as_tibble() + + chromatograms$polarity[chromatograms$polarity == 0] <- "Negative mode" + chromatograms$polarity[chromatograms$polarity == 1] <- "Positive mode" + + chromatograms %>% + plotChrom(scans) } #' @importFrom ggplot2 geom_segment -plotSpectrum <- function(spectra){ - ggplot(spectra,aes(x = `m/z`,xend = `m/z`,y = 0,yend = Intensity)) + - geom_segment() + - plotTheme() + - scale_y_continuous(expand = c(0,0)) + - facet_wrap(~Mode,ncol = 1,scales = 'free') + - labs(title = 'Averaged spectrum fingerprint', - x = 'm/z', - y = 'Intensity') +plotSpectrum <- function(spectra) { + ggplot(spectra, aes(x = `m/z`, xend = `m/z`, y = 0, yend = Intensity)) + + geom_segment() + + plotTheme() + + scale_y_continuous(expand = c(0, 0)) + + facet_wrap(~Mode, ncol = 1, scales = "free") + + labs( + title = "Averaged spectrum fingerprint", + x = "m/z", + y = "Intensity" + ) } #' Plot a fingerprint mass spectrum @@ -206,36 +218,44 @@ plotSpectrum <- function(spectra){ #' @importFrom dplyr summarise_all #' @export -setMethod('plotFingerprint',signature = 'Binalysis', - function(x){ - spectra <- x %>% - binnedData() %>% - map(~{ - tibble(Feature = colnames(.), - Intensity = colSums(.)) - }) %>% - bind_rows() %>% - mutate(Mode = str_sub(Feature,1,1), - `m/z` = str_remove_all(Feature,'[:alpha:]') %>% - as.numeric()) - - spectra$Mode[spectra$Mode == 'n'] <- 'Negative mode' - spectra$Mode[spectra$Mode == 'p'] <- 'Positive mode' - - plotSpectrum(spectra) - }) +setMethod("plotFingerprint", + signature = "Binalysis", + function(x) { + spectra <- x %>% + binnedData() %>% + map(~ { + tibble( + Feature = colnames(.), + Intensity = colSums(.) + ) + }) %>% + bind_rows() %>% + mutate( + Mode = str_sub(Feature, 1, 1), + `m/z` = str_remove_all(Feature, "[:alpha:]") %>% + as.numeric() + ) + + spectra$Mode[spectra$Mode == "n"] <- "Negative mode" + spectra$Mode[spectra$Mode == "p"] <- "Positive mode" + + plotSpectrum(spectra) + } +) #' @importFrom ggplot2 aes_string geom_histogram scale_y_continuous -plotHist <- function(d,x,histBins,title,xlab,ylab){ - ggplot(d,aes_string(x = x)) + - geom_histogram(fill = ggthemes::ptol_pal()(1),colour = 'black',bins = histBins) + - plotTheme() + - facet_wrap(~polarity) + - scale_y_continuous(expand = c(0,0)) + - labs(title = title, - x = xlab, - y = ylab) +plotHist <- function(d, x, histBins, title, xlab, ylab) { + ggplot(d, aes_string(x = x)) + + geom_histogram(fill = ggthemes::ptol_pal()(1), colour = "black", bins = histBins) + + plotTheme() + + facet_wrap(~polarity) + + scale_y_continuous(expand = c(0, 0)) + + labs( + title = title, + x = xlab, + y = ylab + ) } #' Plot bin purity histogram @@ -243,28 +263,27 @@ plotHist <- function(d,x,histBins,title,xlab,ylab){ #' @description Plot the bin purity distribution for a Binalysis object. #' @param x S4 object of class Binalysis #' @param histBins number of bins to use for histogram plotting -#' @seealso \code{\link{accurateData}}, \code{\link{binneRlyse}}, +#' @seealso \code{\link{accurateData}}, \code{\link{binneRlyse}}, #' \code{\link{plotCentrality}} #' @export -setMethod('plotPurity',signature = 'Binalysis',function(x,histBins = 30){ - - pur <- x %>% - accurateData() %>% - select(polarity,bin,purity) %>% - group_by(polarity,bin) %>% - summarise(purity = mean(purity),.groups = 'drop') - - pur$polarity[pur$polarity == 'n'] <- 'Negative mode' - pur$polarity[pur$polarity == 'p'] <- 'Positive mode' - - pur %>% - plotHist('purity', - histBins = histBins, - title = 'Bin Purity Distribution', - xlab = 'Purity', - ylab = 'Frequency') - +setMethod("plotPurity", signature = "Binalysis", function(x, histBins = 30) { + pur <- x %>% + accurateData() %>% + select(polarity, bin, purity) %>% + group_by(polarity, bin) %>% + summarise(purity = mean(purity), .groups = "drop") + + pur$polarity[pur$polarity == "n"] <- "Negative mode" + pur$polarity[pur$polarity == "p"] <- "Positive mode" + + pur %>% + plotHist("purity", + histBins = histBins, + title = "Bin Purity Distribution", + xlab = "Purity", + ylab = "Frequency" + ) }) #' Plot bin centrality histogram @@ -272,56 +291,57 @@ setMethod('plotPurity',signature = 'Binalysis',function(x,histBins = 30){ #' @description Plot the bin centrality distribution for a Binalysis object. #' @param x S4 object of class Binalysis #' @param histBins number of bins to use for histogram plotting -#' @seealso \code{\link{accurateData}}, \code{\link{binneRlyse}}, +#' @seealso \code{\link{accurateData}}, \code{\link{binneRlyse}}, #' \code{\link{plotPurity}} #' @export -setMethod('plotCentrality',signature = 'Binalysis',function(x,histBins = 30){ - - pur <- x %>% - accurateData() %>% - select(polarity,bin,centrality) %>% - group_by(polarity,bin) %>% - summarise(centrality = mean(centrality),.groups = 'drop') - - pur$polarity[pur$polarity == 'n'] <- 'Negative mode' - pur$polarity[pur$polarity == 'p'] <- 'Positive mode' - - pur %>% - plotHist('centrality', - histBins = histBins, - title = 'Bin Centrality Distribution', - xlab = 'Centrality', - ylab = 'Frequency') - +setMethod("plotCentrality", signature = "Binalysis", function(x, histBins = 30) { + pur <- x %>% + accurateData() %>% + select(polarity, bin, centrality) %>% + group_by(polarity, bin) %>% + summarise(centrality = mean(centrality), .groups = "drop") + + pur$polarity[pur$polarity == "n"] <- "Negative mode" + pur$polarity[pur$polarity == "p"] <- "Positive mode" + + pur %>% + plotHist("centrality", + histBins = histBins, + title = "Bin Centrality Distribution", + xlab = "Centrality", + ylab = "Frequency" + ) }) #' @importFrom ggplot2 geom_point guide_legend guides geom_hline -TICplot <- function(TICdat,TICmedian,by,colour){ - pl <- ggplot(TICdat,aes(x = Index,y = TIC,fill = Colour)) + - geom_hline(data = TICmedian,aes(yintercept = Median)) + - geom_hline(data = TICmedian,aes(yintercept = Q1),linetype = 2) + - geom_hline(data = TICmedian,aes(yintercept = Q3),linetype = 2) + - geom_hline(data = TICmedian,aes(yintercept = LowerOut),linetype = 3) + - geom_hline(data = TICmedian,aes(yintercept = UpperOut),linetype = 3) + - geom_point(shape = 21) + - plotTheme() + - facet_wrap(~Mode) + - labs(title = 'Sample TICs', - caption = 'The solid line shows the median TIC across the sample set. -The dashed line shows the inter-quartile range (IQR) and -the dotted line shows the outlier boundary (1.5 X IQR).') + - ylab('Total Ion Count') + - xlab(by) + - guides(fill = guide_legend(title = colour)) - - if (length(unique(TICdat$Colour)) <= 12) { - pl <- pl + - scale_fill_ptol() - } - - return(pl) +TICplot <- function(TICdat, TICmedian, by, colour) { + pl <- ggplot(TICdat, aes(x = Index, y = TIC, fill = Colour)) + + geom_hline(data = TICmedian, aes(yintercept = Median)) + + geom_hline(data = TICmedian, aes(yintercept = Q1), linetype = 2) + + geom_hline(data = TICmedian, aes(yintercept = Q3), linetype = 2) + + geom_hline(data = TICmedian, aes(yintercept = LowerOut), linetype = 3) + + geom_hline(data = TICmedian, aes(yintercept = UpperOut), linetype = 3) + + geom_point(shape = 21) + + plotTheme() + + facet_wrap(~Mode) + + labs( + title = "Sample TICs", + caption = "The solid line shows the median TIC across the sample set. +The dashed line shows the inter-quartile range (IQR) and +the dotted line shows the outlier boundary (1.5 X IQR)." + ) + + ylab("Total Ion Count") + + xlab(by) + + guides(fill = guide_legend(title = colour)) + + if (length(unique(TICdat$Colour)) <= 12) { + pl <- pl + + scale_fill_ptol() + } + + return(pl) } #' Plot sample total ion counts @@ -329,7 +349,7 @@ the dotted line shows the outlier boundary (1.5 X IQR).') + #' @description Plot sample total ion counts. #' @param x S4 object of class Binalysis #' @param by info column to plot against -#' @param colour info column to provide colour labels +#' @param colour info column to provide colour labels #' @seealso \code{\link{binneRlyse}} #' @importFrom stats IQR median #' @importFrom dplyr bind_cols @@ -338,34 +358,39 @@ the dotted line shows the outlier boundary (1.5 X IQR).') + #' @importFrom tidyr gather #' @export -setMethod('plotTIC',signature = 'Binalysis', - function(x, by = 'injOrder', colour = 'block'){ - rawInfo <- x %>% - sampleInfo() - - TICdat <- x %>% - binnedData %>% - map(rowSums) %>% - bind_cols() %>% - rowid_to_column(var = 'Sample') %>% - mutate(Colour = rawInfo[,colour] %>% unlist() %>% factor(), - Index = rawInfo[,by] %>% unlist()) %>% - gather('Mode','TIC',-Sample,-Colour,-Index) - - TICdat$Mode[TICdat$Mode == 'n'] <- 'Negative mode' - TICdat$Mode[TICdat$Mode == 'p'] <- 'Positive mode' - - TICmedian <- TICdat %>% - group_by(Mode) %>% - summarise(Median = median(TIC), - Q1 = Median - IQR(TIC) / 2, - Q3 = Median + IQR(TIC) / 2, - LowerOut = Q1 - IQR(TIC) * 1.5, - UpperOut = Q3 + IQR(TIC) * 1.5) - - TICmedian[TICmedian < 0] <- 0 - - pl <- TICplot(TICdat,TICmedian,by,colour) - return(pl) - } +setMethod("plotTIC", + signature = "Binalysis", + function(x, by = "injOrder", colour = "block") { + rawInfo <- x %>% + sampleInfo() + + TICdat <- x %>% + binnedData() %>% + map(rowSums) %>% + bind_cols() %>% + rowid_to_column(var = "Sample") %>% + mutate( + Colour = rawInfo[, colour] %>% unlist() %>% factor(), + Index = rawInfo[, by] %>% unlist() + ) %>% + gather("Mode", "TIC", -Sample, -Colour, -Index) + + TICdat$Mode[TICdat$Mode == "n"] <- "Negative mode" + TICdat$Mode[TICdat$Mode == "p"] <- "Positive mode" + + TICmedian <- TICdat %>% + group_by(Mode) %>% + summarise( + Median = median(TIC), + Q1 = Median - IQR(TIC) / 2, + Q3 = Median + IQR(TIC) / 2, + LowerOut = Q1 - IQR(TIC) * 1.5, + UpperOut = Q3 + IQR(TIC) * 1.5 + ) + + TICmedian[TICmedian < 0] <- 0 + + pl <- TICplot(TICdat, TICmedian, by, colour) + return(pl) + } ) diff --git a/R/readBinningParameters.R b/R/readBinningParameters.R index 674e8e0..847bd0f 100644 --- a/R/readBinningParameters.R +++ b/R/readBinningParameters.R @@ -3,37 +3,38 @@ #' @param file file path #' @return S4 object of class BinParameters. #' @seealso \code{\link{BinParameters-class}} -#' @examples -#' file <- system.file('binning_parameters.yml',package = 'binneR') +#' @examples +#' file <- system.file("binning_parameters.yml", package = "binneR") #' parameters <- readBinningParameters(file) #' @importFrom yaml read_yaml #' @importFrom stringr str_detect regex #' @export -readBinningParameters <- function(file){ - - parameters <- read_yaml(file) - - if ('scans' %in% names(parameters)) { - parameters$scans <- checkScans(parameters$scans) - } - - bp <- do.call(binParameters,parameters) - - return(bp) +readBinningParameters <- function(file) { + parameters <- read_yaml(file) + + if ("scans" %in% names(parameters)) { + parameters$scans <- checkScans(parameters$scans) + } + + bp <- do.call(binParameters, parameters) + + return(bp) } -checkScans <- function(scan_range){ - if ((!str_detect(scan_range,regex('c\\((.*?)\\)'))) & - (!str_detect(scan_range,regex('[0-9]+:[0-9]+')))){ - - stop(str_c('Field "scans" should be a either a numeric vector ', - '(eg. "c(1,3)") or numeric range (eg. "5:12").'), - call. = FALSE) - - } - - scan_range <- eval(parse(text = scan_range)) - - return(scan_range) -} +checkScans <- function(scan_range) { + if ((!str_detect(scan_range, regex("c\\((.*?)\\)"))) & + (!str_detect(scan_range, regex("[0-9]+:[0-9]+")))) { + stop( + str_c( + 'Field "scans" should be a either a numeric vector ', + '(eg. "c(1,3)") or numeric range (eg. "5:12").' + ), + call. = FALSE + ) + } + + scan_range <- eval(parse(text = scan_range)) + + return(scan_range) +} diff --git a/R/readFiles.R b/R/readFiles.R index b17853c..0a86de6 100644 --- a/R/readFiles.R +++ b/R/readFiles.R @@ -2,50 +2,50 @@ #' @name readFiles #' @description Apply spectral binning on multiple data files. #' @param files A vector of converted data file paths -#' @param dp An integer denoting the number of decimal places for spectral +#' @param dp An integer denoting the number of decimal places for spectral #' binning -#' @param scans A vector of scan numbers that should be retrieved -#' @return A list containing peak lists for the relevant scans with combined -#' scan ranges for each present mode in the data file. -#' @details -#' Parallel processing is managed by the \code{future} package. This can -#' be specified using the \code{plan() function}. See the example below +#' @param scans A vector of scan numbers that should be retrieved +#' @return A list containing peak lists for the relevant scans with combined +#' scan ranges for each present mode in the data file. +#' @details +#' Parallel processing is managed by the \code{future} package. This can +#' be specified using the \code{plan() function}. See the example below #' and \code{?future::plan} for details on how this can be specified. -#' @examples +#' @examples #' ## Example file path -#' file_paths <- system.file('example-data/1.mzML.gz',package = 'binneR') -#' +#' file_paths <- system.file("example-data/1.mzML.gz", package = "binneR") +#' #' ## Optionally declare parallel processing backend #' # plan(future::multisession,workers = 2) -#' +#' #' ## Process example file #' res <- readFiles(file_paths, -#' dp = 2, -#' scans = detectInfusionScans(file_paths)) +#' dp = 2, +#' scans = detectInfusionScans(file_paths) +#' ) #' #' @importFrom furrr future_map -#' @importFrom dplyr bind_rows +#' @importFrom dplyr bind_rows #' @importFrom tidyr spread #' @export readFiles <- function(files, - dp, - scans){ - - pl <- future_map(files, - sampProcess, - scans = scans, - dp = dp) %>% - set_names(files) %>% - bind_rows(.id = 'file') %>% - mutate(mz = str_c(polarity,mz)) %>% - split(.$polarity) %>% - future_map(~{ - - .x %>% - spread(key = 'mz',value = 'intensity',fill = 0) %>% - ungroup() %>% - select(-file,-polarity) - }) - return(pl) -} + dp, + scans) { + pl <- future_map(files, + sampProcess, + scans = scans, + dp = dp + ) %>% + set_names(files) %>% + bind_rows(.id = "file") %>% + mutate(mz = str_c(polarity, mz)) %>% + split(.$polarity) %>% + future_map(~ { + .x %>% + spread(key = "mz", value = "intensity", fill = 0) %>% + ungroup() %>% + select(-file, -polarity) + }) + return(pl) +} diff --git a/R/show-method.R b/R/show-method.R index 50922b0..4f49e85 100644 --- a/R/show-method.R +++ b/R/show-method.R @@ -7,52 +7,65 @@ #' @importFrom purrr map_chr #' @export -setMethod('show',signature = 'BinParameters', - function(object){ - cat('\n') - cat('Scans:', - paste(min(scans(object)),':', - max(scans(object)),sep = ''), - '\n') - if (length(cls(object)) > 0) { - cat('Class:',cls(object),'\n') - } - }) +setMethod("show", + signature = "BinParameters", + function(object) { + cat("\n") + cat( + "Scans:", + paste(min(scans(object)), ":", + max(scans(object)), + sep = "" + ), + "\n" + ) + if (length(cls(object)) > 0) { + cat("Class:", cls(object), "\n") + } + } +) #' @rdname show #' @export -setMethod('show',signature = 'Binalysis', - function(object){ - - cat('\n') - cat(str_c(blue('binneR'),red(str_c('v',version(object))),sep = ' ')) - cat('\n') - cat(creationDate(object)) - cat('\n') - - cat('Samples:',length(filePaths(object))) - cat('\n') - - if (length(binnedData(object)) > 0) { - var <- lapply(binnedData(object),ncol) - var <- map_chr(names(var),~{ - str_c(.,': ',var[[.]],' features') - }) %>% - str_c(collapse = '\n') - cat(var,sep = '\n') - } - if (nrow(accurateData(object)) > 0) { - cat('Average Purity:', - mean(accurateData(object)$purity, - na.rm = TRUE) %>% - round(3), - '\n') - cat('Average Centrality:', - mean(accurateData(object)$centrality, - na.rm = TRUE) %>% - round(3), - '\n') - } - cat('\n') - }) +setMethod("show", + signature = "Binalysis", + function(object) { + cat("\n") + cat(str_c(blue("binneR"), red(str_c("v", version(object))), sep = " ")) + cat("\n") + cat(creationDate(object)) + cat("\n") + + cat("Samples:", length(filePaths(object))) + cat("\n") + + if (length(binnedData(object)) > 0) { + var <- lapply(binnedData(object), ncol) + var <- map_chr(names(var), ~ { + str_c(., ": ", var[[.]], " features") + }) %>% + str_c(collapse = "\n") + cat(var, sep = "\n") + } + if (nrow(accurateData(object)) > 0) { + cat( + "Average Purity:", + mean(accurateData(object)$purity, + na.rm = TRUE + ) %>% + round(3), + "\n" + ) + cat( + "Average Centrality:", + mean(accurateData(object)$centrality, + na.rm = TRUE + ) %>% + round(3), + "\n" + ) + } + cat("\n") + } +) diff --git a/R/singleSample.R b/R/singleSample.R index b80d067..caeb933 100644 --- a/R/singleSample.R +++ b/R/singleSample.R @@ -5,85 +5,94 @@ #' @param verbose show console output #' @seealso \code{\link{Binalysis-class}} #' @return S4 object of class Binalysis. -#' @details -#' Parallel processing is managed by the \code{future} package. This can -#' be specified using the \code{plan() function}. See the example below +#' @details +#' Parallel processing is managed by the \code{future} package. This can +#' be specified using the \code{plan() function}. See the example below #' and \code{?future::plan} for details on how this can be specified. -#' -#' By default, spectral binning is performed at the recommended 2 decimal -#' places. This can be altered by setting either the global option +#' +#' By default, spectral binning is performed at the recommended 2 decimal +#' places. This can be altered by setting either the global option #' \code{binner_dp} or the environment variable \code{BINNER_DP}. -#' -#' @examples +#' +#' @examples #' \dontrun{ -#' file_path <- metaboData::filePaths('FIE-HRMS','BdistachyonTechnical')[1] -#' +#' file_path <- metaboData::filePaths("FIE-HRMS", "BdistachyonTechnical")[1] +#' #' ## Optionally declare parallel processing backend #' # plan(future::multisession,workers = 2) -#' +#' #' bd <- singleSample(file_path) #' } #' @importFrom utils capture.output #' @export -singleSample <- function(file, - class = NA, - verbose = TRUE){ - - if (length(file) > 1) { - stop('Only suitable for a single file!') - } - - if (length(class) > 1) { - stop('Only a single class can be affiliated!') - } - - - parameters <- detectParameters(file) - - i <- tibble(fileOrder = seq_len(length(scans(parameters))), - fileName = basename(file), - injOrder = seq_len(length(scans(parameters))), - name = str_c('Scan ',scans(parameters)), - class = class, - batch = 1, - block = 1) - - x <- new('Binalysis', - parameters, - creation_date = date(), - file_paths = file, - sample_info = i) - - if (!is.na(class)) { - cls(x) <- class - } - - if (verbose == TRUE) { - startTime <- proc.time() - message(str_c(blue('binneR'),red(str_c('v',version(x))),creationDate(x),sep = ' ')) - message(str_c(str_c(rep('_',console_width()),collapse = ''),sep = '')) - params <- parameters %>% - {capture.output(print(.))} %>% - {.[-1]} %>% - str_c(collapse = '\n') - message(params) - message(str_c(str_c(rep('_',console_width()),collapse = ''),'\n',sep = '')) - } - - x <- x %>% - ss(verbose = verbose) - - if (verbose == TRUE) { - message() - endTime <- proc.time() - ellapsed <- {endTime - startTime} %>% - .[3] %>% - round(1) %>% - seconds_to_period() %>% - str_c('[',.,']') - message(str_c(green('Completed! '),ellapsed,sep = '')) - } - - return(x) +singleSample <- function(file, + class = NA, + verbose = TRUE) { + if (length(file) > 1) { + stop("Only suitable for a single file!") + } + + if (length(class) > 1) { + stop("Only a single class can be affiliated!") + } + + + parameters <- detectParameters(file) + + i <- tibble( + fileOrder = seq_len(length(scans(parameters))), + fileName = basename(file), + injOrder = seq_len(length(scans(parameters))), + name = str_c("Scan ", scans(parameters)), + class = class, + batch = 1, + block = 1 + ) + + x <- new("Binalysis", + parameters, + creation_date = date(), + file_paths = file, + sample_info = i + ) + + if (!is.na(class)) { + cls(x) <- class + } + + if (verbose == TRUE) { + startTime <- proc.time() + message(str_c(blue("binneR"), red(str_c("v", version(x))), creationDate(x), sep = " ")) + message(str_c(str_c(rep("_", console_width()), collapse = ""), sep = "")) + params <- parameters %>% + { + capture.output(print(.)) + } %>% + { + .[-1] + } %>% + str_c(collapse = "\n") + message(params) + message(str_c(str_c(rep("_", console_width()), collapse = ""), "\n", sep = "")) + } + + x <- x %>% + ss(verbose = verbose) + + if (verbose == TRUE) { + message() + endTime <- proc.time() + ellapsed <- + { + endTime - startTime + } %>% + .[3] %>% + round(1) %>% + seconds_to_period() %>% + str_c("[", ., "]") + message(str_c(green("Completed! "), ellapsed, sep = "")) + } + + return(x) } diff --git a/R/spectralBinning-method.R b/R/spectralBinning-method.R index de00ffa..06ac034 100644 --- a/R/spectralBinning-method.R +++ b/R/spectralBinning-method.R @@ -1,4 +1,4 @@ -#' @importFrom dplyr group_by summarise arrange inner_join select +#' @importFrom dplyr group_by summarise arrange inner_join select #' @importFrom dplyr left_join filter distinct rename vars all_of #' @importFrom dplyr group_by_at #' @importFrom tibble tibble deframe @@ -6,188 +6,202 @@ #' @importFrom tidyr spread #' @importFrom stringr str_c -setMethod("spectralBinning", - signature = "Binalysis", - function(x,verbose = TRUE){ - - info <- sampleInfo(x) - files <- filePaths(x) - - if (isTRUE(verbose)) message('Reading raw data') - pks <- getPeaks(files,scans(x)) - - if (isTRUE(verbose)) message('Gathering bins') - bin_list <- calcBinList(pks) - - if (isTRUE(verbose)) message('Removing single scan events') - pks <- pks %>% - inner_join(bin_list,by = c("polarity", "bin")) - - if (length(cls(x)) > 0) { - cls <- cls(x) - classes <- info %>% - select(fileName,all_of(cls(x))) - } else { - cls <- 'class' - classes <- info %>% - select(fileName) %>% - mutate(class = NA) - } - - classes <- classes %>% - rowid_to_column(var = 'idx') - - n_scans <- nScans(x) - - if (isTRUE(verbose)) message('Averaging intensities across scans') - binned_data <- pks %>% - split(.$idx) %>% - future_map(~{ - .x %>% - group_by(idx,fileName,polarity,bin,scan) %>% - summarise(intensity = sum(intensity), - .groups = 'drop') %>% - group_by(idx,fileName,polarity,bin) %>% - summarise(intensity = sum(intensity)/n_scans, - .groups = 'drop') - }) %>% - bind_rows() - - pks <- pks %>% - left_join(classes,by = c("idx",'fileName')) %>% - split(.$idx) %>% - future_map(~{ - .x %>% - group_by_at( - vars( - all_of(c('idx', - 'fileName', - cls, - 'polarity','mz','bin')))) %>% - summarise(intensity = sum(intensity)/n_scans, - .groups = 'drop') - }) %>% - bind_rows() - - if (isTRUE(verbose)) message('Calculating bin metrics') - bin_measures <- calcBinMeasures(pks, - cls) - - if (isTRUE(verbose)) message('Calculating accurate m/z') - accurate_mz <- pks %>% - group_by_at(vars(all_of(c('idx','fileName',cls,'polarity','bin')))) %>% - filter(intensity == max(intensity)) %>% - arrange(bin) %>% - left_join(bin_measures,by = c('idx','fileName',cls, "polarity", "bin")) %>% - ungroup() - - mz <- accurate_mz %>% - select(polarity,bin,mz,intensity) %>% - group_by(polarity,bin) %>% - filter(intensity == max(intensity)) %>% - select(-intensity) %>% - mutate(mz = str_c(polarity,mz)) %>% - ungroup() %>% - distinct() - - if (isTRUE(verbose)) message('Building intensity matrix') - binned_data <- binned_data %>% - left_join(mz,by = c("polarity", "bin")) %>% - select(-bin) %>% - split(.$polarity) %>% - future_map(~{ - .x %>% - spread(mz,intensity,fill = 0) %>% - select(-idx,-fileName,-polarity) - }) - - if (isTRUE(verbose)) message('Gathering file headers') - headers <- getHeaders(files) - - binnedData(x) <- binned_data - accurateData(x) <- accurate_mz - spectra(x) <- list(headers = headers, - fingerprints = pks) - - return(x) - } +setMethod("spectralBinning", + signature = "Binalysis", + function(x, verbose = TRUE) { + info <- sampleInfo(x) + files <- filePaths(x) + + if (isTRUE(verbose)) message("Reading raw data") + pks <- getPeaks(files, scans(x)) + + if (isTRUE(verbose)) message("Gathering bins") + bin_list <- calcBinList(pks) + + if (isTRUE(verbose)) message("Removing single scan events") + pks <- pks %>% + inner_join(bin_list, by = c("polarity", "bin")) + + if (length(cls(x)) > 0) { + cls <- cls(x) + classes <- info %>% + select(fileName, all_of(cls(x))) + } else { + cls <- "class" + classes <- info %>% + select(fileName) %>% + mutate(class = NA) + } + + classes <- classes %>% + rowid_to_column(var = "idx") + + n_scans <- nScans(x) + + if (isTRUE(verbose)) message("Averaging intensities across scans") + binned_data <- pks %>% + split(.$idx) %>% + future_map(~ { + .x %>% + group_by(idx, fileName, polarity, bin, scan) %>% + summarise( + intensity = sum(intensity), + .groups = "drop" + ) %>% + group_by(idx, fileName, polarity, bin) %>% + summarise( + intensity = sum(intensity) / n_scans, + .groups = "drop" + ) + }) %>% + bind_rows() + + pks <- pks %>% + left_join(classes, by = c("idx", "fileName")) %>% + split(.$idx) %>% + future_map(~ { + .x %>% + group_by_at( + vars( + all_of(c( + "idx", + "fileName", + cls, + "polarity", "mz", "bin" + )) + ) + ) %>% + summarise( + intensity = sum(intensity) / n_scans, + .groups = "drop" + ) + }) %>% + bind_rows() + + if (isTRUE(verbose)) message("Calculating bin metrics") + bin_measures <- calcBinMeasures( + pks, + cls + ) + + if (isTRUE(verbose)) message("Calculating accurate m/z") + accurate_mz <- pks %>% + group_by_at(vars(all_of(c("idx", "fileName", cls, "polarity", "bin")))) %>% + filter(intensity == max(intensity)) %>% + arrange(bin) %>% + left_join(bin_measures, by = c("idx", "fileName", cls, "polarity", "bin")) %>% + ungroup() + + mz <- accurate_mz %>% + select(polarity, bin, mz, intensity) %>% + group_by(polarity, bin) %>% + filter(intensity == max(intensity)) %>% + select(-intensity) %>% + mutate(mz = str_c(polarity, mz)) %>% + ungroup() %>% + distinct() + + if (isTRUE(verbose)) message("Building intensity matrix") + binned_data <- binned_data %>% + left_join(mz, by = c("polarity", "bin")) %>% + select(-bin) %>% + split(.$polarity) %>% + future_map(~ { + .x %>% + spread(mz, intensity, fill = 0) %>% + select(-idx, -fileName, -polarity) + }) + + if (isTRUE(verbose)) message("Gathering file headers") + headers <- getHeaders(files) + + binnedData(x) <- binned_data + accurateData(x) <- accurate_mz + spectra(x) <- list( + headers = headers, + fingerprints = pks + ) + + return(x) + } ) -setMethod('ss',signature = 'Binalysis', - function(x,verbose){ - - file <- filePaths(x) - class <- cls(x) - - if (length(class) == 0){ - class <- NA - } - - if (isTRUE(verbose)) message('Reading raw data') - pks <- getPeaks(file,scans(x)) %>% - mutate(fileName = str_c('Scan ',scan)) - - if (isTRUE(verbose)) message('Calculating bins') - bin_list <- calcBinList(pks) - - if (isTRUE(verbose)) message('Removing single scan events') - pks <- pks %>% - inner_join(bin_list,by = c("polarity", "bin")) %>% - mutate(class = class) - - if (isTRUE(verbose)) message('Calculating intensity totals') - binned_data <- pks %>% - split(.$fileName) %>% - future_map(~{ - .x %>% - group_by(fileName,polarity,bin) %>% - summarise(intensity = sum(intensity)) - }) %>% - bind_rows() - - if (isTRUE(verbose)) message('Calculating bin measures') - bin_measures <- calcBinMeasures(pks, - 'class') - - if (isTRUE(verbose)) message('Calculating accurate m/z') - accurate_mz <- pks %>% - group_by(fileName,scan,class,polarity,bin) %>% - filter(intensity == max(intensity)) %>% - arrange(bin) - - accurate_mz <- accurate_mz %>% - left_join(bin_measures,by = c('fileName',"class", "polarity", "bin")) %>% - ungroup() %>% - select(scan,polarity,bin,mz,intensity,purity,centrality) - - mz <- accurate_mz %>% - group_by(polarity,bin) %>% - filter(intensity == max(intensity)) %>% - select(polarity,bin,mz) - - if (isTRUE(verbose)) message('Building intensity matrix') - binned_data <- binned_data %>% - left_join(mz,by = c("polarity", "bin")) %>% - select(-bin) %>% - ungroup() %>% - split(.$polarity) %>% - future_map(~{ - .x %>% - ungroup() %>% - mutate(mz = str_c(polarity,mz)) %>% - spread(mz,intensity,fill = 0) %>% - select(-fileName,-polarity) - }) - - headers <- getHeaders(file) - - cls(x) <- 'scan' - binnedData(x) <- binned_data - accurateData(x) <- accurate_mz %>% - ungroup() - spectra(x) <- list(headers = headers, fingerprints = pks %>% - ungroup() - ) - return(x) - } +setMethod("ss", + signature = "Binalysis", + function(x, verbose) { + file <- filePaths(x) + class <- cls(x) + + if (length(class) == 0) { + class <- NA + } + + if (isTRUE(verbose)) message("Reading raw data") + pks <- getPeaks(file, scans(x)) %>% + mutate(fileName = str_c("Scan ", scan)) + + if (isTRUE(verbose)) message("Calculating bins") + bin_list <- calcBinList(pks) + + if (isTRUE(verbose)) message("Removing single scan events") + pks <- pks %>% + inner_join(bin_list, by = c("polarity", "bin")) %>% + mutate(class = class) + + if (isTRUE(verbose)) message("Calculating intensity totals") + binned_data <- pks %>% + split(.$fileName) %>% + future_map(~ { + .x %>% + group_by(fileName, polarity, bin) %>% + summarise(intensity = sum(intensity)) + }) %>% + bind_rows() + + if (isTRUE(verbose)) message("Calculating bin measures") + bin_measures <- calcBinMeasures( + pks, + "class" + ) + + if (isTRUE(verbose)) message("Calculating accurate m/z") + accurate_mz <- pks %>% + group_by(fileName, scan, class, polarity, bin) %>% + filter(intensity == max(intensity)) %>% + arrange(bin) + + accurate_mz <- accurate_mz %>% + left_join(bin_measures, by = c("fileName", "class", "polarity", "bin")) %>% + ungroup() %>% + select(scan, polarity, bin, mz, intensity, purity, centrality) + + mz <- accurate_mz %>% + group_by(polarity, bin) %>% + filter(intensity == max(intensity)) %>% + select(polarity, bin, mz) + + if (isTRUE(verbose)) message("Building intensity matrix") + binned_data <- binned_data %>% + left_join(mz, by = c("polarity", "bin")) %>% + select(-bin) %>% + ungroup() %>% + split(.$polarity) %>% + future_map(~ { + .x %>% + ungroup() %>% + mutate(mz = str_c(polarity, mz)) %>% + spread(mz, intensity, fill = 0) %>% + select(-fileName, -polarity) + }) + + headers <- getHeaders(file) + + cls(x) <- "scan" + binnedData(x) <- binned_data + accurateData(x) <- accurate_mz %>% + ungroup() + spectra(x) <- list(headers = headers, fingerprints = pks %>% + ungroup()) + return(x) + } )