Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

matchms_filtering: Added derive_precursor_mz_from_parent_mass filter #475

Merged
merged 5 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion tools/matchms/matchms_filtering.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>filter and normalize mass spectrometry data</description>

<macros>
Expand Down Expand Up @@ -55,6 +55,10 @@
#if $require_inchi_is_true == "TRUE"
-require_inchi \
#end if
#if $derive_precursor_mz_from_parent_mass.is_true == "TRUE"
-derive_precursor_mz_from_parent_mass \
--estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \
#end if
#if $reduce_to_top_n_peaks.is_true == "TRUE"
-reduce_to_top_n_peaks \
--n_max "$reduce_to_top_n_peaks.n_max" \
Expand Down Expand Up @@ -104,6 +108,18 @@
<param name="require_inchi_is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Remove spectra that does not contain INCHI." />

<conditional name="derive_precursor_mz_from_parent_mass">
<param name="is_true" label="Derive precursor_mz from parent_mass" type="select"
help="Derives the precursor_mz from the parent mass and adduct or charge.">
<option value="FALSE" selected="true">FALSE</option>
<option value="TRUE">TRUE</option>
</param>
<when value="TRUE">
<param label="Estimate from adduct" name="estimate_from_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" />
</when>
<when value="FALSE"></when>
</conditional>

<conditional name="reduce_to_top_n_peaks">
<param name="is_true" label="Reduce to top n peaks" type="select"
help="Lowest intensity peaks will be removed when it has more peaks than desired.">
Expand Down Expand Up @@ -174,6 +190,14 @@
</section>
<output name="output" file="filtering/reduce_to_top_n_peaks.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/derive_precursor_mz.msp" ftype="msp"/>
<section name="derive_precursor_mz_from_parent_mass">
<param name="is_true" value="TRUE"/>
<param name="estimate_from_adduct" value="TRUE"/>
</section>
<output name="output" file="filtering/derive_precursor_mz_out.msp" ftype="msp"/>
</test>
</tests>

<help>
Expand Down
10 changes: 10 additions & 0 deletions tools/matchms/matchms_filtering_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
add_retention_index, add_retention_time, clean_compound_name
from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
select_by_relative_intensity
from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass
from matchms.importing import load_from_mgf, load_from_msp


Expand Down Expand Up @@ -39,6 +40,9 @@ def main(argv):
help="Remove spectra that does not contain SMILES.")
parser.add_argument("-require_inchi", action='store_true',
help="Remove spectra that does not contain INCHI.")
parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true',
help="Derives the precursor_mz from the parent mass and adduct or charge.")
parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.")
parser.add_argument("-reduce_to_top_n_peaks", action='store_true',
help="reduce to top n peaks filter.")
parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.")
Expand All @@ -51,6 +55,7 @@ def main(argv):
or args.mz_range
or args.require_smiles
or args.require_inchi
or args.derive_precursor_mz_from_parent_mass
or args.reduce_to_top_n_peaks):
raise ValueError('No filter selected.')

Expand Down Expand Up @@ -84,6 +89,11 @@ def main(argv):
if args.reduce_to_top_n_peaks:
spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max)

if args.derive_precursor_mz_from_parent_mass:
spectrum.set("parent_mass", float(spectrum.get('parent_mass')))
precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct)
spectrum.set("precursor_mz", precursor_mz)

if args.require_smiles and spectrum is not None:
spectrum = require_key(spectrum, "smiles")

Expand Down
48 changes: 48 additions & 0 deletions tools/matchms/test-data/filtering/derive_precursor_mz.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C20H12
INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Perylene
RETENTION_TIME: None
RETENTION_INDEX: 2886.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 251.08595400000002
NUM PEAKS: 3
250.07765 0.3282529462971431
252.09323 1.0
253.09656 0.20573802940517583

SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C14H10
INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Phenanthrene
RETENTION_TIME: None
RETENTION_INDEX: 1832.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 177.070224
NUM PEAKS: 5
152.0619 0.1657993569424221
176.062 0.24558560966311757
177.06982 0.12764433529926775
178.0775 1.0
179.08078 0.16394988149600653

50 changes: 50 additions & 0 deletions tools/matchms/test-data/filtering/derive_precursor_mz_out.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C20H12
INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Perylene
RETENTION_TIME: None
RETENTION_INDEX: 2886.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 251.08595400000002
PRECURSOR_MZ: 251.08540542009078
NUM PEAKS: 3
250.07765 0.3282529462971431
252.09323 1.0
253.09656 0.20573802940517583

SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C14H10
INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Phenanthrene
RETENTION_TIME: None
RETENTION_INDEX: 1832.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 177.070224
PRECURSOR_MZ: 177.06967542009076
NUM PEAKS: 5
152.0619 0.1657993569424221
176.062 0.24558560966311757
177.06982 0.12764433529926775
178.0775 1.0
179.08078 0.16394988149600653