Skip to content

Commit

Permalink
feature: add support for parsing ProForma style formulas
Browse files Browse the repository at this point in the history
  • Loading branch information
mobiusklein committed Jul 19, 2024
1 parent 81e2eec commit e840749
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 54 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

all: publish-references

publish-references:
cp ./specification/reference_data/reference_molecules.json ./implementations/python/mzpaf/data/reference_molecules.json
cd ./specification/reference_data/ && python reference_mol_to_md.py
10 changes: 8 additions & 2 deletions implementations/python/mzpaf/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
except ImportError:
Composition = None
try:
from pyteomics.proforma import ProForma
from pyteomics.proforma import (ProForma, FormulaModification)
except ImportError:
ProForma = None
FormulaModification = None

from .reference import ReferenceMolecule

Expand All @@ -34,7 +35,7 @@
(?P<reference_label>[^\]]+)
\])
))|
(?:f\{(?P<formula>[A-Za-z0-9]+)\})|
(?:f\{(?P<formula>[A-Za-z0-9\[\]]+)\})|
(?:_\{
(?P<named_compound>[^\{\}\s,/]+)
\})|
Expand Down Expand Up @@ -723,6 +724,11 @@ def _populate_from_dict(self, data):
self.formula = descr['formula']
return self

def to_composition(self) -> "Composition":
if Composition is None:
raise ImportError("Cannot use `to_composition` without `pyteomics`")
return FormulaModification(self.formula).resolve()['composition']


class SMILESAnnotation(IonAnnotationBase):
__slots__ = ("smiles", )
Expand Down
46 changes: 45 additions & 1 deletion implementations/python/mzpaf/data/reference_molecules.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,228 +27,272 @@
"TMT126": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C8N1H15",
"ion_mz": 126.127726
},
"TMT127N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C8[15N1]H15",
"ion_mz": 127.124761
},
"TMT127C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C7[13C1]N1H15",
"ion_mz": 127.131081
},
"TMT128N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C7[13C1][15N1]H15",
"ion_mz": 128.128116
},
"TMT128C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C6[13C2]N1H15",
"ion_mz": 128.134436
},
"TMT129N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C6[13C2][15N1]H15",
"ion_mz": 129.131471
},
"TMT129C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C5[13C3]N1H15",
"ion_mz": 129.13779
},
"TMT130N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C5[13C3][15N1]H15",
"ion_mz": 130.134825
},
"TMT130C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C4[13C4]N1H15",
"ion_mz": 130.141145
},
"TMT131N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C4[13C4][15N1]H15",
"ion_mz": 131.13818
},
"TMT131C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C3[13C5]N1H15",
"ion_mz": 131.1445
},
"TMT132N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C3[13C5][15N1]H15",
"ion_mz": 132.141535
},
"TMT132C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C2[13C6]N1H15",
"ion_mz": 122.147855
},
"TMT133N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C2[13C6][15N1]H15",
"ion_mz": 133.14489
},
"TMT133C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C1[13C7]N1H15",
"ion_mz": 133.15121
},
"TMT134N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C1[13C7][15N1]H15",
"ion_mz": 134.148245
},
"TMT134C": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "[13C8]N1H15",
"ion_mz": 134.154565
},
"TMT135N": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "[13C8][15N1]H15",
"ion_mz": 135.1516
},
"TMTzero": {
"label_type": "TMTzero",
"molecule_type": "reporter+balance",
"chemical_formula": "C12H20N2O2",
"neutral_mass": 224.152478,
"ion_mz": 225.15975447
},
"TMTpro_zero": {
"label_type": "TMTpro_zero",
"molecule_type": "reporter+balance",
"chemical_formula": "C15H25N3O3",
"neutral_mass": 295.189592,
"ion_mz": 296.1968685
},
"TMT2plex": {
"label_type": "TMT2plex",
"molecule_type": "reporter+balance",
"chemical_formula": "C11[13C1]H20N2O2",
"neutral_mass": 225.155833,
"ion_mz": 226.16310947
},
"TMT6plex": {
"label_type": "TMT6plex",
"molecule_type": "reporter+balance",
"chemical_formula": "C8[13C5]H20N1[15N1]O2",
"neutral_mass": 229.162932,
"ion_mz": 230.17020847
},
"TMTpro": {
"label_type": "TMTpro",
"molecule_type": "reporter+balance",
"chemical_formula": "C8[13C7]H25[15N2]N1O3",
"neutral_mass": 304.207146,
"ion_mz": 305.21442247
},
"iTRAQ113": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C6N2H12",
"ion_mz": 113.1078
},
"iTRAQ114": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C5[13C1]N2H12",
"ion_mz": 114.1112
},
"iTRAQ115": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C5[13C1]N1[15N1]H12",
"ion_mz": 115.1082
},
"iTRAQ116": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C4[13C2]N1[15N1]H12",
"ion_mz": 116.1116
},
"iTRAQ117": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C3[13C3]N1[15N1]H12",
"ion_mz": 117.1149
},
"iTRAQ118": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C3[13C3][15N2]H12",
"ion_mz": 118.112
},
"iTRAQ119": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "C2[13C4][15N2]H12",
"ion_mz": 119.1153
},
"iTRAQ121": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"chemical_formula": "[13C6][15N2]H12",
"ion_mz": 121.122
},
"iTRAQ4plex": {
"label_type": "iTRAQ4plex",
"molecule_type": "reporter+balance",
"chemical_formula": "C4[13C3]N1[15N1]O1H12",
"neutral_mass": 144.102063,
"ion_mz": 145.10933947
},
"iTRAQ8plex": {
"label_type": "iTRAQ8plex",
"molecule_type": "reporter+balance",
"chemical_formula": "C7[13C7]N3[15N1]O3H24",
"neutral_mass": 304.205360,
"ion_mz": 305.21263647
},
"TMT126-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C7N1H15",
"ion_mz": 114.127725
},
"TMT127N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C7[15N1]H15",
"ion_mz": 115.12476
},
"TMT127C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C6[13C1]N1H15",
"ion_mz": 114.127725
},
"TMT128N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C6[13C1][15N1]H15",
"ion_mz": 115.12476
},
"TMT128C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C5[13C2]N1H15",
"ion_mz": 116.134433
},
"TMT129N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C5[13C2][15N1]H15",
"ion_mz": 117.131468
},
"TMT129C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C4[13C3]N1H15",
"ion_mz": 116.134433
},
"TMT130N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C4[13C3][15N1]H15",
"ion_mz": 117.131468
},
"TMT130C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C3[13C4]N1H15",
"ion_mz": 118.141141
},
"TMT131N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C3[13C4][15N1]H15",
"ion_mz": 119.138176
},
"TMT131C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"chemical_formula": "C2[13C5]N1H15",
"ion_mz": 118.141141
}
}
}
Loading

0 comments on commit e840749

Please sign in to comment.