diff --git a/MDANSE/Src/MDANSE/Chemistry/__init__.py b/MDANSE/Src/MDANSE/Chemistry/__init__.py index bc01a1b002..fc4e9ef8e0 100644 --- a/MDANSE/Src/MDANSE/Chemistry/__init__.py +++ b/MDANSE/Src/MDANSE/Chemistry/__init__.py @@ -13,6 +13,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # +import os +import json + from MDANSE.Chemistry.Databases import ( AtomsDatabase, MoleculesDatabase, @@ -27,3 +30,6 @@ RESIDUES_DATABASE = ResiduesDatabase() NUCLEOTIDES_DATABASE = NucleotidesDatabase() + +with open(os.path.join(os.path.dirname(__file__), "residues_alt_names.json"), "r") as f: + RESIDUE_ALT_NAMES = json.load(f) diff --git a/MDANSE/Src/MDANSE/Chemistry/residues_alt_names.json b/MDANSE/Src/MDANSE/Chemistry/residues_alt_names.json new file mode 100644 index 0000000000..e9a7bdda80 --- /dev/null +++ b/MDANSE/Src/MDANSE/Chemistry/residues_alt_names.json @@ -0,0 +1,4 @@ +{ + "HIS": ["HID", "HIE", "HIP"], + "CYS": ["CYS", "CYX"] +} \ No newline at end of file diff --git a/MDANSE/Src/MDANSE/IO/PDB.py b/MDANSE/Src/MDANSE/IO/PDB.py index 93ce550300..22e7d98c55 100644 --- a/MDANSE/Src/MDANSE/IO/PDB.py +++ b/MDANSE/Src/MDANSE/IO/PDB.py @@ -73,12 +73,11 @@ @undocumented: export_filters @undocumented: DummyChain """ - import string import numpy as np -from MDANSE.Chemistry import NUCLEOTIDES_DATABASE, RESIDUES_DATABASE +from MDANSE.Chemistry import NUCLEOTIDES_DATABASE, RESIDUES_DATABASE, RESIDUE_ALT_NAMES from MDANSE.IO.FortranFormat import FortranFormat, FortranLine from MDANSE.IO.PDBExportFilters import export_filters from MDANSE.IO.TextFile import TextFile @@ -490,7 +489,7 @@ def nextResidue(self, name, number=None, terminus=None): name = name.upper() if self.export_filter is not None: name, number = self.export_filter.processResidue(name, number, terminus) - self.het_flag = not (name in RESIDUES_DATABASE or name in NUCLEOTIDES_DATABASE) + self.het_flag = not (name in RESIDUES_DATABASE or name in RESIDUE_ALT_NAMES or name in NUCLEOTIDES_DATABASE) self.data["residue_name"] = name self.data["residue_number"] = (self.data["residue_number"] + 1) % 10000 self.data["insertion_code"] = "" @@ -1052,7 +1051,8 @@ def isCompatible(self, chain_data, residue_data): return ( chain_data["chain_id"] == self.chain_id and chain_data["segment_id"] == self.segment_id - and residue_data["residue_name"] in RESIDUES_DATABASE + and (residue_data["residue_name"] in RESIDUES_DATABASE + or residue_data["residue_name"] in RESIDUE_ALT_NAMES) ) @@ -1097,6 +1097,7 @@ def isCompatible(self, chain_data, residue_data): chain_data["chain_id"] == self.chain_id and chain_data["segment_id"] == self.segment_id and residue_data["residue_name"] not in RESIDUES_DATABASE + and residue_data["residue_name"] not in RESIDUE_ALT_NAMES and residue_data["residue_name"] not in NUCLEOTIDES_DATABASE ) @@ -1279,7 +1280,7 @@ def extractData(self, data): def newResidue(self, residue_data): name = residue_data["residue_name"] residue_number = residue_data["residue_number"] - if name in RESIDUES_DATABASE: + if name in RESIDUES_DATABASE or name in RESIDUE_ALT_NAMES: residue = PDBAminoAcidResidue(name, [], residue_number) elif name in NUCLEOTIDES_DATABASE: residue = PDBNucleotideResidue(name, [], residue_number) diff --git a/MDANSE/Src/MDANSE/IO/PDBReader.py b/MDANSE/Src/MDANSE/IO/PDBReader.py index f73de7115c..3eb00c1dff 100644 --- a/MDANSE/Src/MDANSE/IO/PDBReader.py +++ b/MDANSE/Src/MDANSE/IO/PDBReader.py @@ -37,6 +37,7 @@ MOLECULES_DATABASE, NUCLEOTIDES_DATABASE, RESIDUES_DATABASE, + RESIDUE_ALT_NAMES ) from MDANSE.MolecularDynamics.Configuration import RealConfiguration from MDANSE.IO.PDB import PDBMolecule, PDBNucleotideChain, PDBPeptideChain, Structure @@ -473,17 +474,52 @@ def _process_residue(self, residue): pdb_atoms = [at.name for at in residue] - atoms_found = [None] * len(pdb_atoms) - - for comp, pdb_atom in enumerate(pdb_atoms): - for at, info in RESIDUES_DATABASE[code]["atoms"].items(): - if pdb_atom == at or pdb_atom in info["alternatives"]: - atoms_found[comp] = at + if code in RESIDUE_ALT_NAMES: + # RESIDUES_DATABASE is a many-to-one map multiple codes + # can map to one residue in MDANSE. + # If the code is in RESIDUE_ALT_NAMES then it is a + # many-to-many map. In this case the code can map to many + # different residues. + for new_code in RESIDUE_ALT_NAMES[code]: + # go through each name that code could be + atoms_found = [None] * len(pdb_atoms) + for comp, pdb_atom in enumerate(pdb_atoms): + if len(atoms_found) != len(RESIDUES_DATABASE[new_code]["atoms"].items()): + # different number of atoms break and try the next name + break + for at, info in RESIDUES_DATABASE[new_code]["atoms"].items(): + if pdb_atom == at or pdb_atom in info["alternatives"]: + atoms_found[comp] = at + break + else: + # unable to match the atom in the RESIDUES_DATABASE + # try the next residue name + break + if None not in atoms_found: + # matched all atoms found the residue so break break else: + # went through all alternative names looks like we were + # unable to match the code to the MDANSE residues raise PDBReaderError( - "The atom {}{}:{} is unknown".format(code, residue.number, pdb_atom) + "Unable to fine residue for {}{}".format(code, residue.number) ) + # found the match from the alternate name to the name in + # mdanse lets rename the code + code = new_code + residue.name = code + + else: + atoms_found = [None] * len(pdb_atoms) + for comp, pdb_atom in enumerate(pdb_atoms): + for at, info in RESIDUES_DATABASE[code]["atoms"].items(): + if pdb_atom == at or pdb_atom in info["alternatives"]: + atoms_found[comp] = at + break + else: + raise PDBReaderError( + "The atom {}{}:{} is unknown".format(code, residue.number, pdb_atom) + ) resname = "{}{}".format(code, residue.number) new_residue = Residue(code, resname, variant=None)