-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #627 from RECETOX/hechth/issue626
Added first set of bioconductor-msnbase tools
- Loading branch information
Showing
6 changed files
with
2,263 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: bioconductor-msnbase | ||
owner: recetox | ||
remote_repository_url: "https://github.com/RECETOX/galaxytools/tree/master/tools/bioconductor-msnbase" | ||
homepage_url: "https://bioconductor.org/packages/release/bioc/html/MSnbase.html" | ||
categories: | ||
- Metabolomics | ||
- Proteomics | ||
description: "MSnbase provides infrastructure for manipulation, processing and visualisation of mass spectrometry and proteomics data, ranging from raw to quantitative and annotated data. " | ||
long_description: | | ||
"MSnbase is an R/Bioconductor package that provides infrastructure for plotting, manipulation and processing mass spectrometry and proteomics data. | ||
The project was started by Laurent Gatto in October 2010 (Mon Oct 4 23:35:23 2010, according to the git log) and has, since then, | ||
benefited from various contributions, in particular Sebastian Gibb and Johannes Rainer. | ||
The official package page is the Bioconductor landing page (release or devel versions). | ||
The github page page is for active development, issue tracking and forking/pulling purposes." | ||
auto_tool_repositories: | ||
name_template: "{{ tool_id }}" | ||
description_template: "{{ tool_name }} tool from the bioconductor-msnbase package" | ||
suite: | ||
name: suite_bioconductor_msnbase | ||
description: MSnbase is an R/Bioconductor package that provides infrastructure for plotting, manipulation and processing mass spectrometry and proteomics data. | ||
type: repository_suite_definition |
142 changes: 142 additions & 0 deletions
142
tools/bioconductor-msnbase/bioconductor_msnbase_centroid.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
<tool id="bioconductor_msnbase_centroid" name="bioconductor-msnbase centroid" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | ||
<description>centroid raw profile-mode MS data</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="xrefs"/> | ||
<expand macro="creator"/> | ||
<expand macro="requirements"/> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
Rscript "${run_script}" | ||
]]></command> | ||
<configfiles> | ||
<configfile name="run_script"><![CDATA[ | ||
data_prof <- MSnbase::readMSData("$input_file", msLevel = $mslevel) | ||
data_centroided <- MSnbase::pickPeaks( | ||
data_prof, | ||
halfWindowSize = ${halfWindowSize}, | ||
method = "${estimate_noise_method}", | ||
SNR = ${snr}, | ||
refineMz = "${refinement.method}", | ||
#if "$refinement.method" == "kNeighbors" | ||
k = ${refinement.k} | ||
#else if "$refinement.method" == "descendPeak" | ||
signalPercentage = ${refinement.signal_percentage}, | ||
stopAtTwo = ${refinement.stop_at_two} | ||
#end if | ||
) | ||
MSnbase::writeMSData( | ||
data_centroided, | ||
file = "centroided.mzml", | ||
copy = TRUE, | ||
outformat = "mzml" | ||
) | ||
]]></configfile> | ||
</configfiles> | ||
<inputs> | ||
<param name="input_file" type="data" format="mzml" label="Input mzML File" | ||
help="The input mzML file containing the mass spectrometry data to be centroided."/> | ||
<param argument="mslevel" type="boolean" truevalue="2" falsevalue="1" checked="false" label="MS2" | ||
help="Specify if the dataset contains MS2 (tandem mass spectrometry) data." /> | ||
<param name="halfWindowSize" type="integer" label="Half window size" min="1" value="2" | ||
help="The half window size for the centroiding method. This determines the number of data points on either side of the center point to include in the centroiding calculation."/> | ||
<param name="estimate_noise_method" type="select" label="Noise estimation method" | ||
help="Method to choose to estimate the noise in the spectrum."> | ||
<option value="MAD" selected="true">Median Absolute Deviation</option> | ||
<option value="SuperSmoother">Friedman's Super Smoother</option> | ||
</param> | ||
<param argument="--snr" type="float" min="0" value="3" label="Signal-to-noise ratio (SNR)" | ||
help="The signal-to-noise ratio threshold for removing noisy signals." /> | ||
<conditional name="refinement"> | ||
<param name="method" type="select" label="Peak refinement method" | ||
help="The method refines the m/z value of the identified centroids by considering data points that belong (most likely) to the same mass peak. | ||
The m/z value is calculated as an intensity weighted average of the m/z values within the peak region. | ||
How the peak region is defined depends on the method chosen."> | ||
<option value="none" selected="true">None</option> | ||
<option value="kNeighbors">K-Neighbors</option> | ||
<option value="descendPeak">Descend Peak</option> | ||
</param> | ||
<when value="kNeighbors"> | ||
<param argument="--k" type="integer" min="1" max="10" value="2" label="K" | ||
help="The number of 2*K nearest neighbors to consider for m/z interpolation during peak refinement."/> | ||
</when> | ||
<when value="descendPeak"> | ||
<param argument="--signal_percentage" type="integer" min="0" max="100" value="20" label="Intensity threshold (%)" | ||
help="The signal intensity cutoff threshold (as a percentage) for including values in the m/z calculation during peak refinement." /> | ||
<param argument="stop_at_two" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Stop at two" | ||
help="Specify whether to stop the descent only after encountering two increasing scans, instead of stopping at the first increasing scan." /> | ||
</when> | ||
<when value="none"/> | ||
</conditional> | ||
</inputs> | ||
<outputs> | ||
<data name="output_file" format="mzml" label="${on_string} centroided with refinement ${refinement.method}" from_work_dir="centroided.mzml"/> | ||
</outputs> | ||
<tests> | ||
<test> | ||
<param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/> | ||
<output name="output_file"> | ||
<assert_contents> | ||
<expand macro="assertions_centroiding"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test> | ||
<param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/> | ||
<param name="method" value="kNeighbors"/> | ||
<param name="k" value="3"/> | ||
<output name="output_file"> | ||
<assert_contents> | ||
<expand macro="assertions_centroiding"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test> | ||
<param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/> | ||
<param name="method" value="descendPeak"/> | ||
<param name="signal_percentage" value="10"/> | ||
<output name="output_file"> | ||
<assert_contents> | ||
<expand macro="assertions_centroiding"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
.. class:: infomark | ||
**What it does** | ||
This tool performs centroiding on mass spectrometry data using the MSnbase package in R. Centroiding is a process that converts profile mode data to centroid mode by identifying the peaks in the mass spectrum and representing them as single points. | ||
**Usage** | ||
- **Input**: Provide the input mzML file containing the mass spectrometry data to be centroided. | ||
- **Parameters**: | ||
- **Input mzML File**: The input mzML file containing the mass spectrometry data to be centroided. | ||
- **MS2**: Specify if the dataset contains MS2 (tandem mass spectrometry) data. | ||
- **Half window size**: The number of data points on either side of the center point to include in the centroiding calculation. | ||
- **Noise estimation method**: Choose the method to estimate the noise in the spectrum. Options include Median Absolute Deviation (MAD) and Friedman's Super Smoother. | ||
- **Signal-to-noise ratio (SNR)**: The signal-to-noise ratio threshold for removing noisy signals. A higher value will result in more noise being filtered out. | ||
- **Peak refinement method**: Select the method to refine the m/z value of the identified centroids. Options include None, K-Neighbors, and Descend Peak. | ||
- **K**: The number of 2*K nearest neighbors to consider for m/z interpolation during peak refinement (only applicable if K-Neighbors method is selected). | ||
- **Intensity threshold (%)**: The signal intensity cutoff threshold (as a percentage) for including values in the m/z calculation during peak refinement (only applicable if Descend Peak method is selected). | ||
- **Stop at two**: Specify whether to stop the descent only after encountering two increasing scans, instead of stopping at the first increasing scan (only applicable if Descend Peak method is selected). | ||
- **Output**: The centroided mzML file. | ||
**Input** | ||
- **Input mzML File**: The input mzML file containing the mass spectrometry data to be centroided. | ||
**Output** | ||
- **Output mzML File**: The resulting mzML file after applying the centroiding algorithm. | ||
**References** | ||
For more detailed information, please refer to the original documentation available via Bioconductor: https://bioconductor.org/packages/release/bioc/html/MSnbase.html | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
105 changes: 105 additions & 0 deletions
105
tools/bioconductor-msnbase/bioconductor_msnbase_smooth_chromatogram.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
<tool id="bioconductor_msnbase_smooth_chromatogram" name="bioconductor-msnbase smooth chromatogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | ||
<description>smooth consecutive spectra using a moving window filter (combineSpectraMovingWindow)</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<edam_topics> | ||
<edam_topic>topic_3170</edam_topic> <!-- Proteomics --> | ||
<edam_topic>topic_3391</edam_topic> <!-- Data handling --> | ||
</edam_topics> | ||
<edam_operations> | ||
<edam_operation>operation_3632</edam_operation> <!-- Data smoothing --> | ||
<edam_operation>operation_2945</edam_operation> <!-- Data handling --> | ||
</edam_operations> | ||
|
||
<expand macro="xrefs"/> | ||
<expand macro="creator"/> | ||
<expand macro="requirements"/> | ||
|
||
<command detect_errors="exit_code"><![CDATA[ | ||
Rscript "${run_script}" | ||
]]></command> | ||
<configfiles> | ||
<configfile name="run_script"><![CDATA[ | ||
data_prof <- MSnbase::readMSData("$input_file", mode = "onDisk") | ||
data_smoothed <- MSnbase::combineSpectraMovingWindow( | ||
data_prof, | ||
halfWindowSize = ${halfWindowSize}, | ||
intensityFun = ${intensity_function}, | ||
mzd = 0, | ||
timeDomain = $qtof, | ||
weighted = $weighted, | ||
ppm = $ppm, | ||
BPPARAM = BiocParallel::bpparam() | ||
) | ||
MSnbase::writeMSData( | ||
data_smoothed, | ||
file = "smoothed.mzml", | ||
copy = TRUE, | ||
outformat = "mzml" | ||
) | ||
]]></configfile> | ||
</configfiles> | ||
<inputs> | ||
<param name="input_file" type="data" format="mzml" label="Input mzML File" help="The input mzML file containing the MSnbase data."/> | ||
<param name="halfWindowSize" type="integer" label="Half window size" min="2" value="4" | ||
help="The half window size for the moving window smoothing method. This determines the number of data points on either side of the center point to include in the smoothing calculation."/> | ||
<param name="weighted" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Weighted" | ||
help="Specify whether to apply a weighted moving average, where the weights depend on the distance from the center of the window." /> | ||
<param name="qtof" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="QTOF" | ||
help="Indicate if the data was acquired using a QTOF (Quadrupole Time-of-Flight) instrument, which affects the time domain processing." /> | ||
<param argument="--ppm" type="integer" min="0" value="3" label="ppm tolerance" | ||
help="The parts-per-million (ppm) tolerance for considering mz values as belonging to the same ion. A higher value allows for more variation in mz values." /> | ||
<param name="intensity_function" type="select" label="Function to combine intensities" | ||
help="Choose the function to combine the intensity values of the same m/z value within the moving window. Options include mean, median, min, max, and sum."> | ||
<option value="base::mean" selected="true">mean</option> | ||
<option value="stats::median">median</option> | ||
<option value="min">min</option> | ||
<option value="max">max</option> | ||
<option value="sum">sum</option> | ||
</param> | ||
</inputs> | ||
<outputs> | ||
<data name="output_file" format="mzml" label="Chromatogram smoothing on ${on_string}" from_work_dir="smoothed.mzml"/> | ||
</outputs> | ||
<tests> | ||
<test> | ||
<param name="input_file" value="29_qc_no_dil_milliq_subset.mzML"/> | ||
<output name="output_file"> | ||
<assert_contents> | ||
<expand macro="assertions_smoothing"/> | ||
<has_size size="173532" delta="100"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
.. class:: infomark | ||
**What it does** | ||
This tool performs chromatogram smoothing on mass spectrometry data using the MSnbase package in R. It applies a moving window smoothing method to the input mzML file, which helps in reducing noise and improving the signal quality. | ||
**Usage** | ||
- **Input**: Provide the input mzML file containing the mass spectrometry data to be smoothed. | ||
- **Parameters**: | ||
- **Half window size**: The number of data points on either side of the center point to include in the smoothing calculation. | ||
- **Weighted**: Whether to apply a weighted moving average. | ||
- **QTOF**: Indicate if the data was acquired using a QTOF instrument. | ||
- **ppm tolerance**: The parts-per-million tolerance for considering mz values as belonging to the same ion. | ||
- **Function to combine intensities**: Choose the function to combine the intensity values within the moving window. | ||
- **Output**: The smoothed mzML file. | ||
**Input** | ||
- **Input mzML File**: The input mzML file containing the MSnbase data to be smoothed. | ||
**Output** | ||
- **Output mzML File**: The resulting mzML file after applying the smoothing algorithm. | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
Oops, something went wrong.