diff --git a/DESCRIPTION b/DESCRIPTION index b4cfacc7..6159510b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mixOmics Type: Package Title: Omics Data Integration Project -Version: 6.17.23 +Version: 6.17.24 Depends: R (>= 3.5.0), MASS, lattice, @@ -50,7 +50,7 @@ URL: http://www.mixOmics.org BugReports: https://github.com/mixOmicsTeam/mixOmics/issues/ Repository: Bioconductor VignetteBuilder: knitr -Date: 2021-07-13 +Date: 2021-07-15 Packaged: 2017-02-06 06:49:17 UTC; klecao NeedsCompilation: no biocViews: ImmunoOncology, diff --git a/NAMESPACE b/NAMESPACE index 5ae5e015..3ca7f05b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -146,6 +146,7 @@ export(plotArrow) export(plotDiablo) export(plotIndiv) export(plotLoadings) +export(plotMarkers) export(plotVar) export(pls) export(plsda) diff --git a/NEWS.md b/NEWS.md index 5ffb0f18..ffc21b0d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ### new features / enhancements / changes +* new function `plotMarkers` to visualise the selected features in block analyses (see #134) * `auroc` title now fixed (#135) * `cimDiablo` takes `trim` argument to customise outlier filtering (#136) * `plotIndiv.pca` default shape set to `16` diff --git a/R/plotMarkers.R b/R/plotMarkers.R new file mode 100644 index 00000000..0857be14 --- /dev/null +++ b/R/plotMarkers.R @@ -0,0 +1,110 @@ +#' Plot the values for multivariate markers in block analyses +#' +#' Plots the standardised values (after centring and/or scaling) for the +#' selected variables for a given block on a given component. Only applies to +#' \code{block.splsda} or \code{block.spls}. +#' +#' @param object An object of class \code{block.splsda} or \code{block.spls} +#' @param block Name or index of the block to use +#' @param comp Integer, the component to use +#' @param markers Character or integer, only include these markers. If integer, +#' the top 'markers' features are shown +#' @param group Factor, the grouping variable (only required for +#' \code{block.spls} objects) +#' @template arg/col.per.group +#' @param global Logical indicating whether to show the global plots (TRUE) or +#' segregate by feature (FALSE) +#' @param title The plot title +#' +#' @return A ggplot object +#' @seealso \code{\link{plotLoadings}}, \code{\link{block.splsda}}, \code{\link{block.spls}} +#' @export +#' +#' @examples +#' # see ?block.splsda and ?block.spls +plotMarkers <- + function(object, + block, + markers = NULL, + comp = 1, + group = NULL, + col.per.group = NULL, + global = FALSE, + title = NULL) + { + + blocks <- names(object$X) + if (is.numeric(block)) + { + blocks <- blocks[block] + } + if (!block %in% blocks) + stop(message = sprintf("block must be one of: %s", paste0(blocks, collapse = ', '))) + df <- data.frame(object$X[[block]], check.names = FALSE) + + ## group factor + group <- .get.group(group, object, n_ind = nrow(df)) + + col.group <- .get.cols.and.group(col.per.group = col.per.group, + group = group) + group <- col.group$group + col.per.group <- col.group$col.per.group + vars <- selectVar(object, block=block, comp=comp)[[1]]$value + + var.names <- rownames(vars) + + df <- df[,var.names] + df$group <- group + df <- + melt(df, + id.vars = 'group', + variable.name = 'feature', + value.name = 'value') + df$feature <- factor(df$feature, levels = var.names, ordered = TRUE) + df$sign <- ifelse(df$value > 0, 'Positive Loading', 'Negative Loading') + # to show +ves on top + df$sign <- factor(df$sign, levels = c('Positive Loading', 'Negative Loading'), ordered = TRUE) + + if (!is.null(markers)) + { + if (is.numeric(markers)) + { + markers <- selectVar(object = object, comp = comp)[[block]]$name + } + if ( is.character(markers)) + { + invalid.markers <- setdiff(markers, df$feature) + if (length(invalid.markers) > 0) + stop("invalid feature names: ", paste0(invalid.markers, collapse = ", "), call. = FALSE) + #' @importFrom dplyr filter + feature <- NULL + df <- filter(df, feature %in% markers) + } + } + if (global) + { + df$feature <- 'plotMarkers' + } + + if (is.null(title)) + title <- sprintf("Block: %s | Component: %s", block, comp) + p <- ggplot(df, aes_string('group', 'value', fill='group')) + + geom_violin(adjust=0.9) + + geom_boxplot(width=0.1) + + scale_fill_manual(values = col.per.group) + + theme_classic() + + labs(x='', + y='value (standardised)', + title = title) + + theme(legend.position = 'none', + axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), + plot.title = element_text(hjust = 0.5, colour = "grey40"), + strip.background = element_rect(colour="black", fill="grey80")) + if (global) + { + p <- p + facet_grid(sign~feature, scales = 'free') + } else { + p <- p + facet_grid(.~feature, scales = 'free') + } + p +} diff --git a/R/selectVar.R b/R/selectVar.R index a39516b4..099db16e 100644 --- a/R/selectVar.R +++ b/R/selectVar.R @@ -10,7 +10,7 @@ #' #' \code{selectVar} provides the variables selected on a given component. \ #' \describe{ \item{list("name")}{outputs the name of the selected variables -#' (provided that the input data have colnames) ranked in decreasing order of +#' (provided that the input data have column names) ranked in decreasing order of #' importance.} \item{list("value")}{outputs the loading value for each #' selected variable, the loadings are ranked according to their absolute #' value.} } These functions are only implemented for the sparse versions. @@ -18,7 +18,8 @@ #' @aliases selectVar selectVar.mixo_pls selectVar.mixo_spls selectVar.pca #' selectVar.sgcca selectVar.rgcca select.var #' @param object object of class inherited from \code{"pls"}, \code{"spls"}, -#' \code{"plsda"},\code{"splsda"}, \code{"pca"}, \code{"spca"}, \code{"sipca"}. +#' \code{"plsda"},\code{"splsda"},\code{"sgcca"}, \code{"rgcca"}, +#' \code{"pca"}, \code{"spca"}, \code{"sipca"}. #' @param comp integer value indicating the component of interest. #' @param block for an object of class \code{"sgcca"}, the block data sets can #' be specified as an input vector, for example \code{c(1,2)} for the first two diff --git a/README.md b/README.md index 4c43e240..2048633d 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,9 @@ Thank you for using `mixOmics`! #### July 2021 +- new function `plotMarkers` to visualise the selected features in + block analyses (see + ) - `tune.spls` now able to tune the selected variables on both `X` and `Y`. See `?tune.spls` - new function `impute.nipals` to impute missing values using the diff --git a/examples/block.spls-examples.R b/examples/block.spls-examples.R index 708d171d..ab591bb2 100644 --- a/examples/block.spls-examples.R +++ b/examples/block.spls-examples.R @@ -11,7 +11,7 @@ design # set number of component per data set ncomp = c(2) # set number of variables to select, per component and per data set (this is set arbitrarily) -list.keepX = list(mrna = rep(20, 2), mirna = rep(10,2)) +list.keepX = list(mrna = rep(5, 2), mirna = rep(5,2)) list.keepY = c(rep(10, 2)) TCGA.block.spls = block.spls(X = data, Y = breast.TCGA$data.train$protein, @@ -22,6 +22,16 @@ plotIndiv(TCGA.block.spls, group = breast.TCGA$data.train$subtype, ind.names = # illustrates coefficient weights in each block plotLoadings(TCGA.block.spls, ncomp = 1) plotVar(TCGA.block.spls, style = 'graphics', legend = TRUE) + +## plot markers (selected markers) for mrna and mirna +group <- breast.TCGA$data.train$subtype +# mrna: show each selected feature separately and group by subtype +plotMarkers(object = TCGA.block.spls, comp = 1, block = 'mrna', group = group) +# mrna: aggregate all selected features, separate by loadings signs and group by subtype +plotMarkers(object = TCGA.block.spls, comp = 1, block = 'mrna', group = group, global = TRUE) +# proteins +plotMarkers(object = TCGA.block.spls, comp = 1, block = 'Y', group = group) + \dontrun{ network(TCGA.block.spls) } diff --git a/examples/block.splsda-examples.R b/examples/block.splsda-examples.R index 828cc7b0..a9a35227 100644 --- a/examples/block.splsda-examples.R +++ b/examples/block.splsda-examples.R @@ -12,7 +12,7 @@ design # set number of component per data set ncomp = c(2) # set number of variables to select, per component and per data set (this is set arbitrarily) -list.keepX = list(mrna = rep(20, 2), mirna = rep(10,2), protein = rep(10, 2)) +list.keepX = list(mrna = rep(5,2), mirna = rep(5,2), protein = rep(5,2)) TCGA.block.splsda = block.splsda(X = data, Y = breast.TCGA$data.train$subtype, @@ -34,3 +34,17 @@ TCGA.block.splsda$design # illustrates coefficient weights in each block plotLoadings(TCGA.block.splsda, ncomp = 1, contrib = 'max') plotVar(TCGA.block.splsda, style = 'graphics', legend = TRUE) + +## plot markers (selected variables) for mrna and mirna +# mrna: show each selected feature separately +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'mrna') +# mrna: aggregate all selected features and separate by loadings signs +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'mrna', global = TRUE) +# proteins +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'protein') +# show top 5 markers +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'protein', markers = 1:5) +# show specific markers +my.markers <- selectVar(TCGA.block.splsda, comp = 1)[['protein']]$name[c(1,3,5)] +my.markers +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'protein', markers = my.markers) diff --git a/inst/README-WhatsNew.Rmd b/inst/README-WhatsNew.Rmd index f83f8552..5e7ccfe4 100644 --- a/inst/README-WhatsNew.Rmd +++ b/inst/README-WhatsNew.Rmd @@ -19,6 +19,7 @@ opts_chunk$set( echo = TRUE, eval = FALSE, warning = FALSE, message = FALSE) #### July 2021 +* new function `plotMarkers` to visualise the selected features in block analyses (see https://github.com/mixOmicsTeam/mixOmics/issues/134) * `tune.spls` now able to tune the selected variables on both `X` and `Y`. See `?tune.spls` * new function `impute.nipals` to impute missing values using the nipals algorithm * new function `tune.spca` to tune the number of selected variables for pca components diff --git a/man/block.spls.Rd b/man/block.spls.Rd index bded0629..98510258 100644 --- a/man/block.spls.Rd +++ b/man/block.spls.Rd @@ -148,7 +148,7 @@ design # set number of component per data set ncomp = c(2) # set number of variables to select, per component and per data set (this is set arbitrarily) -list.keepX = list(mrna = rep(20, 2), mirna = rep(10,2)) +list.keepX = list(mrna = rep(5, 2), mirna = rep(5,2)) list.keepY = c(rep(10, 2)) TCGA.block.spls = block.spls(X = data, Y = breast.TCGA$data.train$protein, @@ -159,6 +159,16 @@ plotIndiv(TCGA.block.spls, group = breast.TCGA$data.train$subtype, ind.names = # illustrates coefficient weights in each block plotLoadings(TCGA.block.spls, ncomp = 1) plotVar(TCGA.block.spls, style = 'graphics', legend = TRUE) + +## plot markers (selected markers) for mrna and mirna +group <- breast.TCGA$data.train$subtype +# mrna: show each selected feature separately and group by subtype +plotMarkers(object = TCGA.block.spls, comp = 1, block = 'mrna', group = group) +# mrna: aggregate all selected features, separate by loadings signs and group by subtype +plotMarkers(object = TCGA.block.spls, comp = 1, block = 'mrna', group = group, global = TRUE) +# proteins +plotMarkers(object = TCGA.block.spls, comp = 1, block = 'Y', group = group) + \dontrun{ network(TCGA.block.spls) } diff --git a/man/block.splsda.Rd b/man/block.splsda.Rd index 92b14d88..459f5aca 100644 --- a/man/block.splsda.Rd +++ b/man/block.splsda.Rd @@ -155,7 +155,7 @@ design # set number of component per data set ncomp = c(2) # set number of variables to select, per component and per data set (this is set arbitrarily) -list.keepX = list(mrna = rep(20, 2), mirna = rep(10,2), protein = rep(10, 2)) +list.keepX = list(mrna = rep(5,2), mirna = rep(5,2), protein = rep(5,2)) TCGA.block.splsda = block.splsda(X = data, Y = breast.TCGA$data.train$subtype, @@ -177,6 +177,20 @@ TCGA.block.splsda$design # illustrates coefficient weights in each block plotLoadings(TCGA.block.splsda, ncomp = 1, contrib = 'max') plotVar(TCGA.block.splsda, style = 'graphics', legend = TRUE) + +## plot markers (selected variables) for mrna and mirna +# mrna: show each selected feature separately +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'mrna') +# mrna: aggregate all selected features and separate by loadings signs +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'mrna', global = TRUE) +# proteins +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'protein') +# show top 5 markers +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'protein', markers = 1:5) +# show specific markers +my.markers <- selectVar(TCGA.block.splsda, comp = 1)[['protein']]$name[c(1,3,5)] +my.markers +plotMarkers(object = TCGA.block.splsda, comp = 1, block = 'protein', markers = my.markers) } \references{ On multiple integration with sPLS-DA and 4 data blocks: diff --git a/man/plotMarkers.Rd b/man/plotMarkers.Rd new file mode 100644 index 00000000..5ca17632 --- /dev/null +++ b/man/plotMarkers.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotMarkers.R +\name{plotMarkers} +\alias{plotMarkers} +\title{Plot the values for multivariate markers in block analyses} +\usage{ +plotMarkers( + object, + block, + markers = NULL, + comp = 1, + group = NULL, + col.per.group = NULL, + global = FALSE, + title = NULL +) +} +\arguments{ +\item{object}{An object of class \code{block.splsda} or \code{block.spls}} + +\item{block}{Name or index of the block to use} + +\item{markers}{Character or integer, only include these markers. If integer, +the top 'markers' features are shown} + +\item{comp}{Integer, the component to use} + +\item{group}{Factor, the grouping variable (only required for +\code{block.spls} objects)} + +\item{col.per.group}{character (or symbol) color to be used when 'group' is +defined. Vector of the same length as the number of groups.} + +\item{global}{Logical indicating whether to show the global plots (TRUE) or +segregate by feature (FALSE)} + +\item{title}{The plot title} +} +\value{ +A ggplot object +} +\description{ +Plots the standardised values (after centring and/or scaling) for the +selected variables for a given block on a given component. Only applies to +\code{block.splsda} or \code{block.spls}. +} +\examples{ +# see ?block.splsda and ?block.spls +} +\seealso{ +\code{\link{plotLoadings}}, \code{\link{block.splsda}}, \code{\link{block.spls}} +} diff --git a/man/selectVar.Rd b/man/selectVar.Rd index 92f5602f..394ef939 100644 --- a/man/selectVar.Rd +++ b/man/selectVar.Rd @@ -26,7 +26,8 @@ selectVar(...) \item{...}{other arguments.} \item{object}{object of class inherited from \code{"pls"}, \code{"spls"}, -\code{"plsda"},\code{"splsda"}, \code{"pca"}, \code{"spca"}, \code{"sipca"}.} +\code{"plsda"},\code{"splsda"},\code{"sgcca"}, \code{"rgcca"}, +\code{"pca"}, \code{"spca"}, \code{"sipca"}.} \item{comp}{integer value indicating the component of interest.} @@ -45,7 +46,7 @@ versions for our internal functions). \details{ \code{selectVar} provides the variables selected on a given component. \ \describe{ \item{list("name")}{outputs the name of the selected variables -(provided that the input data have colnames) ranked in decreasing order of +(provided that the input data have column names) ranked in decreasing order of importance.} \item{list("value")}{outputs the loading value for each selected variable, the loadings are ranked according to their absolute value.} } These functions are only implemented for the sparse versions.