From c6bd03c31a040ec4b1a2df4ea66a09e0b25ccecd Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:43:59 +0100 Subject: [PATCH 1/5] Addition of cbind2 --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ NEWS.md | 5 +++ R/MsBackend.R | 31 ++++++++++++++++++- R/MsBackendDataFrame.R | 17 ++++++++++ R/MsBackendMemory.R | 17 ++++++++++ R/Spectra.R | 30 +++++++++++++++++- .../test_MsBackend/test_spectra_subsetting.R | 13 ++++++++ man/MsBackend.Rd | 17 ++++++++-- man/combineSpectra.Rd | 22 ++++++++++++- man/hidden_aliases.Rd | 6 ++++ tests/testthat/test_MsBackendDataFrame.R | 16 ++++++++++ tests/testthat/test_MsBackendMemory.R | 16 ++++++++++ 13 files changed, 187 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2057828e..b1a33c7d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.17.1 +Version: 1.17.2 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index 8d8185f9..d70ef776 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,6 +75,7 @@ exportMethods(backendParallelFactor) exportMethods(backendRequiredSpectraVariables) exportMethods(bin) exportMethods(c) +exportMethods(cbind2) exportMethods(centroided) exportMethods(collisionEnergy) exportMethods(combinePeaks) @@ -309,4 +310,5 @@ importMethodsFrom(S4Vectors,extractROWS) importMethodsFrom(S4Vectors,isEmpty) importMethodsFrom(S4Vectors,lapply) importMethodsFrom(S4Vectors,split) +importMethodsFrom(methods,cbind2) importMethodsFrom(methods,show) diff --git a/NEWS.md b/NEWS.md index f498b62d..cd6c6579 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # Spectra 1.17 +## Change in 1.17.2 + +- Add `cbind2()` method to easily add multiple `spectraVariables` and their + content to the `spectraData` of a `Spectra` object. + ## Changes in 1.17.1 - Refactor `containsMz()` to support chunk-wise processing. diff --git a/R/MsBackend.R b/R/MsBackend.R index 186f26c6..8a72e416 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -182,10 +182,14 @@ #' @param value replacement value for `<-` methods. See individual #' method description or expected data type. #' -#' @param values for `filterValues()`: A `numeric` vector that define the +#' @param values For `filterValues()`: A `numeric` vector that define the #' values to filter the `object`. `values` needs to be of same length than #' parameter `spectraVariables` and in the same order. #' +#' @param y For `cbind2()`: A `data.frame` or `DataFrame` with the +#' spectra variables to be added to the backend. Need to be of the same +#' length as the number of spectra in the backend. +#' #' @param x Object extending `MsBackend`. #' #' @param ... Additional arguments. @@ -313,6 +317,11 @@ #' `dropNaSpectraVariables()` might still show columns containing `NA` values #' for *core* spectra variables. #' +#' - `cbind2()`: allows to appends multiple spectra variables to the backend at +#' once. It does so *blindly* and is therefore at the risk of the user. For a +#' more controlled way of adding spectra variables, the `joinSpectraData()` +#' should be used. +#' #' - `centroided()`, `centroided<-`: gets or sets the centroiding #' information of the spectra. `centroided()` returns a `logical` #' vector of length equal to the number of spectra with `TRUE` if a @@ -1022,6 +1031,26 @@ setMethod("peaksVariables", "MsBackend", function(object) { c("mz", "intensity") }) + +setClassUnion("dataframeOrDataFrameOrmatrix", c("data.frame", "DataFrame", "matrix")) +#' @exportMethod cbind2 +#' +#' @importMethodsFrom methods cbind2 +#' +#' @rdname MsBackend +setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (nrow(y) != length(x)) + stop("Length of 'y' does not match the number of spectra in 'x'") + for (i in colnames(y)) { + x[[i]] <- y[, i] + } + x +}) + + #' @exportMethod centroided #' #' @aliases centroided<-,MsBackend-method diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index 6959d771..1fe4f872 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -567,6 +567,23 @@ setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) +setClassUnion("dataframeOrDataFrameOrmatrix", + c("data.frame", "DataFrame", "matrix")) +#' @importMethodsFrom methods cbind2 +#' +#' @rdname hidden_aliases +setMethod("cbind2", signature = c("MsBackendDataFrame", + "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (nrow(y) != length(x)) + stop("Length of 'y' does not match the number of spectra in 'x'") + x@spectraData <- cbind(x@spectraData, y) + validObject(x) + x + }) + #' @rdname hidden_aliases setMethod("split", "MsBackendDataFrame", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 4bde69ac..8c2b31c4 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -670,6 +670,23 @@ setMethod("[", "MsBackendMemory", function(x, i, j, ..., drop = FALSE) { .df_subset(x, i) }) +setClassUnion("dataframeOrDataFrameOrmatrix", + c("data.frame", "DataFrame", "matrix")) +#' @importMethodsFrom methods cbind2 +#' +#' @rdname hidden_aliases +setMethod("cbind2", signature = c("MsBackendMemory", + "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (nrow(y) != length(x)) + stop("Length of 'y' does not match the number of spectra in 'x'") + x@spectraData <- cbind(x@spectraData, y) + validObject(x) + x + }) + #' @rdname hidden_aliases setMethod("split", "MsBackendMemory", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/Spectra.R b/R/Spectra.R index 73520422..69661c3a 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -1447,6 +1447,7 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' @aliases combineSpectra #' @aliases split #' @aliases joinSpectraData +#' @aliases cbind2 #' #' @description #' @@ -1463,6 +1464,15 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' function and to eventually (if needed) apply the processing queue using #' the [applyProcessing()] function. #' +#' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, +#' `DataFrame` or `matrix` to the `Spectra` object at once. It does so +#' *blindly* (e.g. do not check rownames compatibility) and is therefore at +#' the risk of the user. For a more controlled way of adding spectra +#' variables, the `joinSpectraData()` should be used. It will return a +#' `Spectra` object with the appended spectra variables. `cbind2()` does +#' check however that the number of rows of the `data.frame` or `DataFrame` +#' matches the number of spectra in the `Spectra` object. +#' #' - `combineSpectra()`: combines sets of spectra (defined with parameter `f`) #' into a single spectrum per set aggregating their MS data (i.e. their #' *peaks data* matrices with the *m/z* and intensity values of their @@ -1507,6 +1517,8 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' should be explored and ideally be removed using for #' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar #' functions. +#' For a more general function that allows to append `data.frame`, +#' `DataFrame` and `matrix` see `cbind2()`. #' #' - `split()`: splits the `Spectra` object based on parameter `f` into a `list` #' of `Spectra` objects. @@ -1543,7 +1555,9 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' #' @param x A `Spectra` object. #' -#' @param y A `DataFrame` with the spectra variables to join/add. +#' @param y For `joinSpectraData()`: `DataFrame` with the spectra variables +#' to join/add. For `cbind2()`: a `data.frame`, `DataFrame` or +#' `matrix`. #' #' @param ... Additional arguments. #' @@ -1660,6 +1674,10 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' #' spectraVariables(sciex2) #' spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] +#' +#' ## Append new spectra variables with cbind2() +#' df <- data.frame(cola = 4:5, colb = "b") +#' data_append <- cbind2(sciex1, df) NULL #' @rdname combineSpectra @@ -1669,6 +1687,16 @@ setMethod("c", "Spectra", function(x, ...) { .concatenate_spectra(unname(list(unname(x), ...))) }) +setClassUnion("dataframeOrDataFrame", c("data.frame", "DataFrame")) +#' @rdname combineSpectra +#' +#' @export +setMethod("cbind2", signature(x = "Spectra", + y = "dataframeOrDataFrame"), function(x, y, ...) { + x@backend <- cbind2(x@backend, y, ...) + x + }) + #' @rdname combineSpectra setMethod("split", "Spectra", function(x, f, drop = FALSE, ...) { bcknds <- split(x@backend, f, ...) diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index 93adce0d..cadb2542 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -59,6 +59,19 @@ test_that("[", { expect_equal(res, be[which(l)]) }) +test_that("cbind2 works", { + seql <- length(be) + df <- data.frame(cola = seq_len(seql), colb = "b", colz = "z") + res <- cbind2(be, df) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 3) + expect_equal(res$cola, seq_len(seql)) + expect_equal(res$colb, rep("b", seql)) + expect_equal(res$colz, rep("z", seql)) + df2 <- data.frame(cola = 3:6, colb = "b", colz = "z") + expect_error(cbind2(be, df2), "does not match") +}) + #' extractByIndex. Uses [ if not implemented test_that("extractByIndex", { i <- sample(seq_along(be), floor(length(be) / 2)) diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 279576a5..833cd2f6 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -34,6 +34,7 @@ \alias{acquisitionNum,MsBackend-method} \alias{peaksData,MsBackend-method} \alias{peaksVariables,MsBackend-method} +\alias{cbind2,MsBackend,dataframeOrDataFrameOrmatrix-method} \alias{centroided,MsBackend-method} \alias{centroided<-,MsBackend-method} \alias{collisionEnergy,MsBackend-method} @@ -133,6 +134,8 @@ \S4method{peaksVariables}{MsBackend}(object) +\S4method{cbind2}{MsBackend,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{centroided}{MsBackend}(object) \S4method{centroided}{MsBackend}(object) <- value @@ -325,6 +328,12 @@ in the individual \code{matrix} of the returned \code{list}. Defaults to \code{peaksVariables(object)} and depends on what \emph{peaks variables} the backend provides.} +\item{x}{Object extending \code{MsBackend}.} + +\item{y}{For \code{cbind2()}: A \code{data.frame} or \code{DataFrame} with the +spectra variables to be added to the backend. Need to be of the same +length as the number of spectra in the backend.} + \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} @@ -410,12 +419,10 @@ to be used to subset/filter \code{object}.} \item{msLevel.}{same as \code{msLevel} above.} -\item{values}{for \code{filterValues()}: A \code{numeric} vector that define the +\item{values}{For \code{filterValues()}: A \code{numeric} vector that define the values to filter the \code{object}. \code{values} needs to be of same length than parameter \code{spectraVariables} and in the same order.} -\item{x}{Object extending \code{MsBackend}.} - \item{use.names}{For \code{lengths()}: whether spectrum names should be used.} \item{drop}{For \code{[}: not considered.} @@ -600,6 +607,10 @@ object's \code{spectraData} that contain only missing values (\code{NA}). Note t while columns with only \code{NA}s are removed, a \code{spectraData()} call after \code{dropNaSpectraVariables()} might still show columns containing \code{NA} values for \emph{core} spectra variables. +\item \code{cbind2()}: allows to appends multiple spectra variables to the backend at +once. It does so \emph{blindly} and is therefore at the risk of the user. For a +more controlled way of adding spectra variables, the \code{joinSpectraData()} +should be used. \item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding information of the spectra. \code{centroided()} returns a \code{logical} vector of length equal to the number of spectra with \code{TRUE} if a diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd index d4f7bdb0..07311ba6 100644 --- a/man/combineSpectra.Rd +++ b/man/combineSpectra.Rd @@ -5,7 +5,9 @@ \alias{combineSpectra} \alias{joinSpectraData} \alias{split} +\alias{cbind2} \alias{c,Spectra-method} +\alias{cbind2,Spectra,dataframeOrDataFrame-method} \alias{split,Spectra,ANY-method} \title{Merging, aggregating and splitting Spectra} \usage{ @@ -24,6 +26,8 @@ joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y") \S4method{c}{Spectra}(x, ...) +\S4method{cbind2}{Spectra,dataframeOrDataFrame}(x, y, ...) + \S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...) } \arguments{ @@ -48,7 +52,9 @@ of the spectra. Defaults to \code{\link[=combinePeaksData]{combinePeaksData()}}. information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method of the \linkS4class{MsBackend}.} -\item{y}{A \code{DataFrame} with the spectra variables to join/add.} +\item{y}{For \code{joinSpectraData()}: \code{DataFrame} with the spectra variables +to join/add. For \code{cbind2()}: a \code{data.frame}, \code{DataFrame} or +\code{matrix}.} \item{by.x}{A \code{character(1)} specifying the spectra variable used for merging. Default is \code{"spectrumId"}.} @@ -76,6 +82,14 @@ objects. In such cases it is suggested to first change the backends of all \code{Spectra} to the same type of backend (using the \code{\link[=setBackend]{setBackend()}} function and to eventually (if needed) apply the processing queue using the \code{\link[=applyProcessing]{applyProcessing()}} function. +\item \code{cbind2()}: Appends multiple spectra variables from a \code{data.frame}, +\code{DataFrame} or \code{matrix} to the \code{Spectra} object at once. It does so +\emph{blindly} (e.g. do not check rownames compatibility) and is therefore at +the risk of the user. For a more controlled way of adding spectra +variables, the \code{joinSpectraData()} should be used. It will return a +\code{Spectra} object with the appended spectra variables. \code{cbind2()} does +check however that the number of rows of the \code{data.frame} or \code{DataFrame} +matches the number of spectra in the \code{Spectra} object. \item \code{combineSpectra()}: combines sets of spectra (defined with parameter \code{f}) into a single spectrum per set aggregating their MS data (i.e. their \emph{peaks data} matrices with the \emph{m/z} and intensity values of their @@ -120,6 +134,8 @@ throw a warning and only the last occurrence is kept. These should be explored and ideally be removed using for \code{QFeatures::reduceDataFrame()}, \code{PMS::reducePSMs()} or similar functions. +For a more general function that allows to append \code{data.frame}, +\code{DataFrame} and \code{matrix} see \code{cbind2()}. } \item \code{split()}: splits the \code{Spectra} object based on parameter \code{f} into a \code{list} of \code{Spectra} objects. @@ -228,6 +244,10 @@ sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") spectraVariables(sciex2) spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] + +## Append new spectra variables with cbind2() +df <- data.frame(cola = 4:5, colb = "b") +data_append <- cbind2(data, df) } \seealso{ \itemize{ diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index ce4e63e9..c03adb62 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -59,6 +59,7 @@ \alias{tic,MsBackendDataFrame-method} \alias{$,MsBackendDataFrame-method} \alias{$<-,MsBackendDataFrame-method} +\alias{cbind2,MsBackendDataFrame,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendDataFrame,ANY-method} \alias{filterAcquisitionNum,MsBackendDataFrame-method} \alias{backendRequiredSpectraVariables,MsBackendHdf5Peaks-method} @@ -133,6 +134,7 @@ \alias{$,MsBackendMemory-method} \alias{$<-,MsBackendMemory-method} \alias{[,MsBackendMemory-method} +\alias{cbind2,MsBackendMemory,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendMemory,ANY-method} \alias{filterAcquisitionNum,MsBackendMemory-method} \alias{backendRequiredSpectraVariables,MsBackendMzR-method} @@ -272,6 +274,8 @@ \S4method{[}{MsBackendDataFrame}(x, i, j, ..., drop = FALSE) +\S4method{cbind2}{MsBackendDataFrame,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{split}{MsBackendDataFrame,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendDataFrame}( @@ -432,6 +436,8 @@ \S4method{[}{MsBackendMemory}(x, i, j, ..., drop = FALSE) +\S4method{cbind2}{MsBackendMemory,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{split}{MsBackendMemory,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendMemory}( diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index 238e945a..2cc04795 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -635,6 +635,22 @@ test_that("[,MsBackendDataFrame works", { expect_equal(res, res_2) }) +test_that("cbind2, MsBackendDataFrame works", { + be <- MsBackendDataFrame() + df <- DataFrame(scanIndex = 1:2, a = "a", b = "b") + be <- backendInitialize(be, df) + df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") + res <- cbind2(be, df2) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) + expect_equal(res$cola, c(3, 4)) + expect_equal(res$colb, c("b", "b")) + expect_equal(res$colz, c("z", "z")) + expect_equal(res$scanIndex, 1:2) + df3 <- data.frame(colv = 1:6, colw = "b") + expect_error(cbind2(be, df3), "does not match") +}) + test_that("selectSpectraVariables,MsBackendDataFrame works", { be <- MsBackendDataFrame() res <- selectSpectraVariables(be, c("dataStorage", "msLevel")) diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index 119e2c56..c4df695f 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -564,6 +564,22 @@ test_that("[,MsBackendMemory works", { expect_equal(res, res_2) }) +test_that("cbind2, MsBackendMemory works", { + be <- new("MsBackendMemory") + df <- data.frame(scanIndex = 1:2, a = "a", b = "b") + be <- backendInitialize(be, df) + df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") + res <- cbind2(be, df2) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) + expect_equal(res$cola, c(3, 4)) + expect_equal(res$colb, c("b", "b")) + expect_equal(res$colz, c("z", "z")) + expect_equal(res$scanIndex, 1:2) + df3 <- data.frame(colv = 1:6, colw = "b") + expect_error(cbind2(be, df3), "does not match") +}) + test_that("split,MsBackendMemory works", { be <- new("MsBackendMemory") be <- backendInitialize(be, test_df) From 666172f50030b6ed7884a4804fa88a768742df33 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:56:59 +0100 Subject: [PATCH 2/5] fix example --- R/Spectra.R | 2 +- inst/test_backends/test_MsBackend/test_spectra_subsetting.R | 6 ++++++ man/combineSpectra.Rd | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/R/Spectra.R b/R/Spectra.R index 69661c3a..d57c49fa 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -1676,7 +1676,7 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] #' #' ## Append new spectra variables with cbind2() -#' df <- data.frame(cola = 4:5, colb = "b") +#' df <- data.frame(cola = seq_len(length(sciex1)), colb = "b") #' data_append <- cbind2(sciex1, df) NULL diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index cadb2542..b17a8bec 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -70,6 +70,12 @@ test_that("cbind2 works", { expect_equal(res$colz, rep("z", seql)) df2 <- data.frame(cola = 3:6, colb = "b", colz = "z") expect_error(cbind2(be, df2), "does not match") + ## with matrix + m <- matrix(1:seql, ncol = 1, dimnames = list(NULL, "m")) + res <- cbind2(be, m) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 1) + expect_equal(res$m, 1:seql) }) #' extractByIndex. Uses [ if not implemented diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd index 07311ba6..36fedfa1 100644 --- a/man/combineSpectra.Rd +++ b/man/combineSpectra.Rd @@ -246,8 +246,8 @@ spectraVariables(sciex2) spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] ## Append new spectra variables with cbind2() -df <- data.frame(cola = 4:5, colb = "b") -data_append <- cbind2(data, df) +df <- data.frame(cola = seq_len(length(sciex1)), colb = "b") +data_append <- cbind2(sciex1, df) } \seealso{ \itemize{ From 01f29afeab93f53704c0840b648c8afe1b990b89 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:15:30 +0100 Subject: [PATCH 3/5] add issue number to News.md --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index cd6c6579..40622b24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,8 @@ ## Change in 1.17.2 - Add `cbind2()` method to easily add multiple `spectraVariables` and their - content to the `spectraData` of a `Spectra` object. + content to the `spectraData` of a `Spectra` object. + See also [issue #342](https://github.com/rformassspectrometry/Spectra/issues/342) ## Changes in 1.17.1 From 24e065754d9532bda282f8ac7f193755784b1c75 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:47:35 +0100 Subject: [PATCH 4/5] fix jo's comments --- R/MsBackend.R | 19 ++++++++++++------- R/MsBackendDataFrame.R | 8 +++++--- R/MsBackendMemory.R | 9 ++++++--- R/Spectra.R | 17 +++++++++-------- man/MsBackend.Rd | 10 ++++++---- man/combineSpectra.Rd | 10 ++++++---- 6 files changed, 44 insertions(+), 29 deletions(-) diff --git a/R/MsBackend.R b/R/MsBackend.R index 8a72e416..bf70aca5 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -188,7 +188,8 @@ #' #' @param y For `cbind2()`: A `data.frame` or `DataFrame` with the #' spectra variables to be added to the backend. Need to be of the same -#' length as the number of spectra in the backend. +#' length as the number of spectra in the backend. The number of rows and +#' their order has to match the number of spectra and their order in x. #' #' @param x Object extending `MsBackend`. #' @@ -318,9 +319,10 @@ #' for *core* spectra variables. #' #' - `cbind2()`: allows to appends multiple spectra variables to the backend at -#' once. It does so *blindly* and is therefore at the risk of the user. For a -#' more controlled way of adding spectra variables, the `joinSpectraData()` -#' should be used. +#' once. The `Spectra` and the values for the new spectra variables have to +#' be in a matching order. Replacing existing spectra variables is not +#' supported through this function. For a more controlled way of adding +#' spectra variables, the `joinSpectraData()` should be used. #' #' - `centroided()`, `centroided<-`: gets or sets the centroiding #' information of the spectra. `centroided()` returns a `logical` @@ -1042,8 +1044,11 @@ setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), function(x, y = data.frame(), ...) { if (is(y, "matrix")) y <- as.data.frame(y) + if (any(colnames(x) %in% colnames(y))) + stop("spectra variables in 'y' are already present in 'x' ", + "replacing them is not allowed") if (nrow(y) != length(x)) - stop("Length of 'y' does not match the number of spectra in 'x'") + stop("Number of row in 'y' does not match the number of spectra in 'x'") for (i in colnames(y)) { x[[i]] <- y[, i] } @@ -1373,7 +1378,7 @@ setMethod("filterRanges", "MsBackend", return(object) if (!is.numeric(ranges)) stop("filterRanges only support filtering for numerical ", - "'spectraVariables'") + "'spectraVariables'") match <- match.arg(match) if (is.character(spectraVariables)){ if(!all(spectraVariables %in% spectraVariables(object))) @@ -1383,7 +1388,7 @@ setMethod("filterRanges", "MsBackend", "function to list possible values.") } else stop("The 'spectraVariables' parameter needs to be of type ", - "'character'.") + "'character'.") if (length(spectraVariables) != length(ranges) / 2) stop("Length of 'ranges' needs to be twice the length of ", "the parameter 'spectraVariables' and define the lower ", diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index 1fe4f872..7046f103 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -567,8 +567,6 @@ setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) -setClassUnion("dataframeOrDataFrameOrmatrix", - c("data.frame", "DataFrame", "matrix")) #' @importMethodsFrom methods cbind2 #' #' @rdname hidden_aliases @@ -577,8 +575,12 @@ setMethod("cbind2", signature = c("MsBackendDataFrame", function(x, y = data.frame(), ...) { if (is(y, "matrix")) y <- as.data.frame(y) + if (any(colnames(x) %in% colnames(y))) + stop("spectra variables in 'y' are already present in 'x' ", + "replacing them is not allowed") if (nrow(y) != length(x)) - stop("Length of 'y' does not match the number of spectra in 'x'") + stop("Number of row in 'y' does not match the number of ", + "spectra in 'x'") x@spectraData <- cbind(x@spectraData, y) validObject(x) x diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 8c2b31c4..b25c969d 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -670,8 +670,6 @@ setMethod("[", "MsBackendMemory", function(x, i, j, ..., drop = FALSE) { .df_subset(x, i) }) -setClassUnion("dataframeOrDataFrameOrmatrix", - c("data.frame", "DataFrame", "matrix")) #' @importMethodsFrom methods cbind2 #' #' @rdname hidden_aliases @@ -680,8 +678,13 @@ setMethod("cbind2", signature = c("MsBackendMemory", function(x, y = data.frame(), ...) { if (is(y, "matrix")) y <- as.data.frame(y) + if (any(colnames(x) %in% colnames(y))) + stop("spectra variables in 'y' are already present in 'x' ", + "replacing them is not allowed") + if (nrow(y) != length(x)) - stop("Length of 'y' does not match the number of spectra in 'x'") + stop("Number of row in'y' does not match the number of ", + "spectra in 'x'") x@spectraData <- cbind(x@spectraData, y) validObject(x) x diff --git a/R/Spectra.R b/R/Spectra.R index d57c49fa..aa45025a 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -1467,7 +1467,8 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, #' `DataFrame` or `matrix` to the `Spectra` object at once. It does so #' *blindly* (e.g. do not check rownames compatibility) and is therefore at -#' the risk of the user. For a more controlled way of adding spectra +#' the risk of the user. The function also does not allow to replace existing +#' spectra variables. For a more controlled way of adding spectra #' variables, the `joinSpectraData()` should be used. It will return a #' `Spectra` object with the appended spectra variables. `cbind2()` does #' check however that the number of rows of the `data.frame` or `DataFrame` @@ -1556,8 +1557,9 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' @param x A `Spectra` object. #' #' @param y For `joinSpectraData()`: `DataFrame` with the spectra variables -#' to join/add. For `cbind2()`: a `data.frame`, `DataFrame` or -#' `matrix`. +#' to join/add. For `cbind2()`: a `data.frame`, `DataFrame` or +#' `matrix`. The number of rows and their order has to match the +#' number of spectra in `x`, respectively their order. #' #' @param ... Additional arguments. #' @@ -1687,15 +1689,14 @@ setMethod("c", "Spectra", function(x, ...) { .concatenate_spectra(unname(list(unname(x), ...))) }) -setClassUnion("dataframeOrDataFrame", c("data.frame", "DataFrame")) #' @rdname combineSpectra #' #' @export setMethod("cbind2", signature(x = "Spectra", - y = "dataframeOrDataFrame"), function(x, y, ...) { - x@backend <- cbind2(x@backend, y, ...) - x - }) + y = "dataframeOrDataFrameOrmatrix"), + function(x, y, ...) { + x@backend <- cbind2(x@backend, y, ...) + }) #' @rdname combineSpectra setMethod("split", "Spectra", function(x, f, drop = FALSE, ...) { diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 833cd2f6..3ff8c4e5 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -332,7 +332,8 @@ backend provides.} \item{y}{For \code{cbind2()}: A \code{data.frame} or \code{DataFrame} with the spectra variables to be added to the backend. Need to be of the same -length as the number of spectra in the backend.} +length as the number of spectra in the backend. The number of rows and +their order has to match the number of spectra and their order in x.} \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} @@ -608,9 +609,10 @@ while columns with only \code{NA}s are removed, a \code{spectraData()} call afte \code{dropNaSpectraVariables()} might still show columns containing \code{NA} values for \emph{core} spectra variables. \item \code{cbind2()}: allows to appends multiple spectra variables to the backend at -once. It does so \emph{blindly} and is therefore at the risk of the user. For a -more controlled way of adding spectra variables, the \code{joinSpectraData()} -should be used. +once. The \code{Spectra} and the values for the new spectra variables have to +be in a matching order. Replacing existing spectra variables is not +supported through this function. For a more controlled way of adding +spectra variables, the \code{joinSpectraData()} should be used. \item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding information of the spectra. \code{centroided()} returns a \code{logical} vector of length equal to the number of spectra with \code{TRUE} if a diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd index 36fedfa1..19fcded3 100644 --- a/man/combineSpectra.Rd +++ b/man/combineSpectra.Rd @@ -7,7 +7,7 @@ \alias{split} \alias{cbind2} \alias{c,Spectra-method} -\alias{cbind2,Spectra,dataframeOrDataFrame-method} +\alias{cbind2,Spectra,dataframeOrDataFrameOrmatrix-method} \alias{split,Spectra,ANY-method} \title{Merging, aggregating and splitting Spectra} \usage{ @@ -26,7 +26,7 @@ joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y") \S4method{c}{Spectra}(x, ...) -\S4method{cbind2}{Spectra,dataframeOrDataFrame}(x, y, ...) +\S4method{cbind2}{Spectra,dataframeOrDataFrameOrmatrix}(x, y, ...) \S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...) } @@ -54,7 +54,8 @@ of the \linkS4class{MsBackend}.} \item{y}{For \code{joinSpectraData()}: \code{DataFrame} with the spectra variables to join/add. For \code{cbind2()}: a \code{data.frame}, \code{DataFrame} or -\code{matrix}.} +\code{matrix}. The number of rows and their order has to match the +number of spectra in \code{x}, respectively their order.} \item{by.x}{A \code{character(1)} specifying the spectra variable used for merging. Default is \code{"spectrumId"}.} @@ -85,7 +86,8 @@ the \code{\link[=applyProcessing]{applyProcessing()}} function. \item \code{cbind2()}: Appends multiple spectra variables from a \code{data.frame}, \code{DataFrame} or \code{matrix} to the \code{Spectra} object at once. It does so \emph{blindly} (e.g. do not check rownames compatibility) and is therefore at -the risk of the user. For a more controlled way of adding spectra +the risk of the user. The function also does not allow to replace existing +spectra variables. For a more controlled way of adding spectra variables, the \code{joinSpectraData()} should be used. It will return a \code{Spectra} object with the appended spectra variables. \code{cbind2()} does check however that the number of rows of the \code{data.frame} or \code{DataFrame} From 0d53e26dc6ffb8254475dda22ebe7de29626c861 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:38:38 +0100 Subject: [PATCH 5/5] fix colnames check + test --- R/MsBackend.R | 2 +- R/MsBackendDataFrame.R | 2 +- R/MsBackendMemory.R | 2 +- inst/test_backends/test_MsBackend/test_spectra_subsetting.R | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/MsBackend.R b/R/MsBackend.R index bf70aca5..92b8214a 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -1044,7 +1044,7 @@ setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), function(x, y = data.frame(), ...) { if (is(y, "matrix")) y <- as.data.frame(y) - if (any(colnames(x) %in% colnames(y))) + if (any(colnames(spectraData(x)) %in% colnames(y))) stop("spectra variables in 'y' are already present in 'x' ", "replacing them is not allowed") if (nrow(y) != length(x)) diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index 7046f103..28d692cc 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -575,7 +575,7 @@ setMethod("cbind2", signature = c("MsBackendDataFrame", function(x, y = data.frame(), ...) { if (is(y, "matrix")) y <- as.data.frame(y) - if (any(colnames(x) %in% colnames(y))) + if (any(colnames(spectraData(x)) %in% colnames(y))) stop("spectra variables in 'y' are already present in 'x' ", "replacing them is not allowed") if (nrow(y) != length(x)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index b25c969d..4d232496 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -678,7 +678,7 @@ setMethod("cbind2", signature = c("MsBackendMemory", function(x, y = data.frame(), ...) { if (is(y, "matrix")) y <- as.data.frame(y) - if (any(colnames(x) %in% colnames(y))) + if (any(colnames(spectraData(x)) %in% colnames(y))) stop("spectra variables in 'y' are already present in 'x' ", "replacing them is not allowed") diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index b17a8bec..76d676a4 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -76,6 +76,9 @@ test_that("cbind2 works", { expect_true(validObject(res)) expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 1) expect_equal(res$m, 1:seql) + ## no replacing + expect_error(cbind2(be, data.frame(scanIndex = 1:seql)), + "are already present") }) #' extractByIndex. Uses [ if not implemented