Skip to content

Commit

Permalink
incorporate reference molecules
Browse files Browse the repository at this point in the history
  • Loading branch information
mobiusklein committed Sep 10, 2023
1 parent 6803fc8 commit 5f25749
Show file tree
Hide file tree
Showing 5 changed files with 312 additions and 9 deletions.
24 changes: 23 additions & 1 deletion implementations/python/mzpaf/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
import re
from sys import intern
from typing import Any, List, Optional, Pattern, Dict, Tuple, Union
import warnings

try:
from pyteomics.proforma import ProForma
except ImportError:
ProForma = None

from .reference import ReferenceMolecule

JSONDict = Dict[str, Union[List, Dict, int, float, str, bool, None]]

annotation_pattern = re.compile(r"""
Expand Down Expand Up @@ -602,22 +605,41 @@ class ReferenceIonAnnotation(IonAnnotationBase):
The reference identifier.
"""

__slots__ = ("reference", )
__slots__ = ("_reference", "reference_molecule")

series_label = "reference"
_molecule_description_fields = {
"reference": "The molecule refernce identifier"
}

_reference: str
reference: str
reference_molecule: Optional[ReferenceMolecule]

def __init__(self, series, reference, neutral_losses=None, isotope=None, adducts=None, charge=None,
analyte_reference=None, mass_error=None, confidence=None, rest=None, is_auxiliary=None):
super(ReferenceIonAnnotation, self).__init__(
series, neutral_losses, isotope, adducts, charge, analyte_reference, mass_error, confidence,
rest, is_auxiliary)
self._reference = None
self.reference = reference

@property
def reference(self) -> str:
return self._reference

@reference.setter
def reference(self, value):
self._reference = value
if value is not None:
try:
self.reference_molecule = ReferenceMolecule.get(value)
except KeyError:
warnings.warn(f"Could not find a reference entry for {value}")
self.reference_molecule = None
else:
self.reference_molecule = None

def _format_ion(self):
return f"r[{self.reference}]"

Expand Down
Empty file.
254 changes: 254 additions & 0 deletions implementations/python/mzpaf/data/reference_molecules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
{
"Hex": {
"molecule_type": "monosaccharide",
"neutral_mass": 162.0528234185,
"chemical_formula": "C6H10O5"
},
"HexNAc": {
"molecule_type": "monosaccharide",
"neutral_mass": 203.07937251951,
"chemical_formula": "C8H13N1O5"
},
"dHex": {
"molecule_type": "monosaccharide",
"neutral_mass": 146.05790879894,
"chemical_formula": "C6H10O4"
},
"NeuAc": {
"molecule_type": "monosaccharide",
"neutral_mass": 291.09541650647,
"chemical_formula": "C11H17N1O8"
},
"NeuGc": {
"molecule_type": "monosaccharide",
"neutral_mass": 307.09033112603,
"chemical_formula": "C11H17N1O9"
},
"TMT126": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 126.127726
},
"TMT127N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 127.124761
},
"TMT127C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 127.131081
},
"TMT128N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 128.128116
},
"TMT128C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 128.134436
},
"TMT129N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 129.131471
},
"TMT129C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 129.13779
},
"TMT130N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 130.134825
},
"TMT130C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 130.141145
},
"TMT131N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 131.13818
},
"TMT131C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 131.1445
},
"TMT132N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 132.141535
},
"TMT132C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 122.147855
},
"TMT133N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 133.14489
},
"TMT133C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 133.15121
},
"TMT134N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 134.148245
},
"TMT134C": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 134.154565
},
"TMT135N": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 135.1516
},
"TMTzero": {
"label_type": "TMTzero",
"molecule_type": "reporter+balance",
"neutral_mass": 224.152478,
"ion_mz": 225.15975447
},
"TMTpro_zero": {
"label_type": "TMTpro_zero",
"molecule_type": "reporter+balance",
"neutral_mass": 295.189592,
"ion_mz": 296.1968685
},
"TMT2plex": {
"label_type": "TMT2plex",
"molecule_type": "reporter+balance",
"neutral_mass": 225.155833,
"ion_mz": 226.16310947
},
"TMT6plex": {
"label_type": "TMT6plex",
"molecule_type": "reporter+balance",
"neutral_mass": 229.162932,
"ion_mz": 230.17020847
},
"TMTpro": {
"label_type": "TMTpro",
"molecule_type": "reporter+balance",
"neutral_mass": 304.207146,
"ion_mz": 305.21442247
},
"iTRAQ113": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 113.1078
},
"iTRAQ114": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 114.1112
},
"iTRAQ115": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 115.1082
},
"iTRAQ116": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 116.1116
},
"iTRAQ117": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 117.1149
},
"iTRAQ118": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 118.112
},
"iTRAQ119": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 119.1153
},
"iTRAQ121": {
"label_type": "iTRAQ",
"molecule_type": "reporter",
"ion_mz": 121.122
},
"iTRAQ4plex": {
"label_type": "iTRAQ4plex",
"molecule_type": "reporter+balance",
"neutral_mass": 144.102063,
"ion_mz": 145.10933947
},
"iTRAQ8plex": {
"label_type": "iTRAQ8plex",
"molecule_type": "reporter+balance",
"neutral_mass": 304.205360,
"ion_mz": 305.21263647
},
"TMT126-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 114.127725
},
"TMT127N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 115.12476
},
"TMT127C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 114.127725
},
"TMT128N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 115.12476
},
"TMT128C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 116.134433
},
"TMT129N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 117.131468
},
"TMT129C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 116.134433
},
"TMT130N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 117.131468
},
"TMT130C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 118.141141
},
"TMT131N-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 119.138176
},
"TMT131C-ETD": {
"label_type": "TMT",
"molecule_type": "reporter",
"ion_mz": 118.141141
}
}
37 changes: 29 additions & 8 deletions implementations/python/mzpaf/reference.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

import json
from importlib.resources import open_text

from dataclasses import dataclass, field, asdict
from typing import Any, List, Optional, Pattern, Dict, Tuple, Type, Union
from typing import Any, List, Optional, Pattern, Dict, Tuple, Type, Union, ClassVar

PROTON = 1.00727646677

Expand All @@ -14,26 +14,31 @@ def _mass_charge_ratio(neutral_mass, z, charge_carrier=PROTON):
return (neutral_mass + (z * charge_carrier)) / abs(z)


@dataclass
@dataclass(frozen=True)
class ReferenceMolecule:
name: str
molecule_type: str
label_type: str = field(default=None)
cv_term: Optional[str] = field(default=None)
label_type: Optional[str] = field(default=None)
neutral_mass: float = field(default=None)
ion_mz: float = field(default=None)
chemical_formula: str = field(default=None)
ion_chemical_formula: str = field(default=None)
chemical_formula: Optional[str] = field(default=None)
ion_chemical_formula: Optional[str] = field(default=None)
references: List[str] = field(default_factory=list)

_registry: ClassVar[Dict[str, 'ReferenceMolecule']] = None

def __post_init__(self):
if self.neutral_mass is None:
if self.ion_mz is not None:
self.neutral_mass = _neutral_mass(self.ion_mz, 1)
object.__setattr__(self, "neutral_mass",
_neutral_mass(self.ion_mz, 1))
else:
raise ValueError("Must provide at least one of `neutral_mass` or `ion_mz`!")
elif self.ion_mz is None:
if self.neutral_mass is not None:
self.ion_mz = _mass_charge_ratio(self.neutral_mass, 1)
object.__setattr__(self, 'neutral_mass',
_mass_charge_ratio(self.neutral_mass, 1))
else:
raise ValueError("Must provide at least one of `neutral_mass` or `ion_mz`!")

Expand All @@ -44,6 +49,22 @@ def to_dict(self):
def from_dict(cls, state, **kwargs):
return cls(**state)

@classmethod
def _load_registry(cls):
with open_text("mzpaf.data", "reference_molecules.json") as stream:
data = json.load(stream)
cls._registry = {}
for k, v in data.items():
v['name'] = k
cls._registry[k] = cls.from_dict(v)

@classmethod
def get(cls, name: str) -> 'ReferenceMolecule':
if cls._registry is None:
cls._load_registry()
return cls._registry[name]



def load_json(stream) -> Dict[str, ReferenceMolecule]:
if isinstance(stream, dict):
Expand Down
6 changes: 6 additions & 0 deletions specification/reference_data/reference_molecule_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
"type": "string",
"description": "The formal name for this molecule by which it should be referenced"
},
"cv_term": {
"type": "array",
"items": {
"type":"string"
}
},
"neutral_mass": {
"type": "number",
"description": "The neutral mass of the molecule not including any charge or charge carrier"
Expand Down

0 comments on commit 5f25749

Please sign in to comment.