Skip to content

Commit

Permalink
Merge pull request #428 from ISISNeutronMuon/chi/pdb-fix
Browse files Browse the repository at this point in the history
Fix pdb conversion
  • Loading branch information
MBartkowiakSTFC authored Apr 30, 2024
2 parents a4424f1 + 834f793 commit 3694932
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 12 deletions.
6 changes: 6 additions & 0 deletions MDANSE/Src/MDANSE/Chemistry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import os
import json

from MDANSE.Chemistry.Databases import (
AtomsDatabase,
MoleculesDatabase,
Expand All @@ -27,3 +30,6 @@
RESIDUES_DATABASE = ResiduesDatabase()

NUCLEOTIDES_DATABASE = NucleotidesDatabase()

with open(os.path.join(os.path.dirname(__file__), "residues_alt_names.json"), "r") as f:
RESIDUE_ALT_NAMES = json.load(f)
4 changes: 4 additions & 0 deletions MDANSE/Src/MDANSE/Chemistry/residues_alt_names.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"HIS": ["HID", "HIE", "HIP"],
"CYS": ["CYS", "CYX"]
}
17 changes: 12 additions & 5 deletions MDANSE/Src/MDANSE/IO/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,11 @@
@undocumented: export_filters
@undocumented: DummyChain
"""

import string

import numpy as np

from MDANSE.Chemistry import NUCLEOTIDES_DATABASE, RESIDUES_DATABASE
from MDANSE.Chemistry import NUCLEOTIDES_DATABASE, RESIDUES_DATABASE, RESIDUE_ALT_NAMES
from MDANSE.IO.FortranFormat import FortranFormat, FortranLine
from MDANSE.IO.PDBExportFilters import export_filters
from MDANSE.IO.TextFile import TextFile
Expand Down Expand Up @@ -490,7 +489,11 @@ def nextResidue(self, name, number=None, terminus=None):
name = name.upper()
if self.export_filter is not None:
name, number = self.export_filter.processResidue(name, number, terminus)
self.het_flag = not (name in RESIDUES_DATABASE or name in NUCLEOTIDES_DATABASE)
self.het_flag = not (
name in RESIDUES_DATABASE
or name in RESIDUE_ALT_NAMES
or name in NUCLEOTIDES_DATABASE
)
self.data["residue_name"] = name
self.data["residue_number"] = (self.data["residue_number"] + 1) % 10000
self.data["insertion_code"] = ""
Expand Down Expand Up @@ -1052,7 +1055,10 @@ def isCompatible(self, chain_data, residue_data):
return (
chain_data["chain_id"] == self.chain_id
and chain_data["segment_id"] == self.segment_id
and residue_data["residue_name"] in RESIDUES_DATABASE
and (
residue_data["residue_name"] in RESIDUES_DATABASE
or residue_data["residue_name"] in RESIDUE_ALT_NAMES
)
)


Expand Down Expand Up @@ -1097,6 +1103,7 @@ def isCompatible(self, chain_data, residue_data):
chain_data["chain_id"] == self.chain_id
and chain_data["segment_id"] == self.segment_id
and residue_data["residue_name"] not in RESIDUES_DATABASE
and residue_data["residue_name"] not in RESIDUE_ALT_NAMES
and residue_data["residue_name"] not in NUCLEOTIDES_DATABASE
)

Expand Down Expand Up @@ -1279,7 +1286,7 @@ def extractData(self, data):
def newResidue(self, residue_data):
name = residue_data["residue_name"]
residue_number = residue_data["residue_number"]
if name in RESIDUES_DATABASE:
if name in RESIDUES_DATABASE or name in RESIDUE_ALT_NAMES:
residue = PDBAminoAcidResidue(name, [], residue_number)
elif name in NUCLEOTIDES_DATABASE:
residue = PDBNucleotideResidue(name, [], residue_number)
Expand Down
54 changes: 47 additions & 7 deletions MDANSE/Src/MDANSE/IO/PDBReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
MOLECULES_DATABASE,
NUCLEOTIDES_DATABASE,
RESIDUES_DATABASE,
RESIDUE_ALT_NAMES,
)
from MDANSE.MolecularDynamics.Configuration import RealConfiguration
from MDANSE.IO.PDB import PDBMolecule, PDBNucleotideChain, PDBPeptideChain, Structure
Expand Down Expand Up @@ -473,17 +474,56 @@ def _process_residue(self, residue):

pdb_atoms = [at.name for at in residue]

atoms_found = [None] * len(pdb_atoms)

for comp, pdb_atom in enumerate(pdb_atoms):
for at, info in RESIDUES_DATABASE[code]["atoms"].items():
if pdb_atom == at or pdb_atom in info["alternatives"]:
atoms_found[comp] = at
if code in RESIDUE_ALT_NAMES:
# RESIDUES_DATABASE is a many-to-one map multiple codes
# can map to one residue in MDANSE.
# If the code is in RESIDUE_ALT_NAMES then it is a
# many-to-many map. In this case the code can map to many
# different residues.
for new_code in RESIDUE_ALT_NAMES[code]:
# go through each name that code could be
atoms_found = [None] * len(pdb_atoms)
for comp, pdb_atom in enumerate(pdb_atoms):
if len(atoms_found) != len(
RESIDUES_DATABASE[new_code]["atoms"].items()
):
# different number of atoms break and try the next name
break
for at, info in RESIDUES_DATABASE[new_code]["atoms"].items():
if pdb_atom == at or pdb_atom in info["alternatives"]:
atoms_found[comp] = at
break
else:
# unable to match the atom in the RESIDUES_DATABASE
# try the next residue name
break
if None not in atoms_found:
# matched all atoms found the residue so break
break
else:
# went through all alternative names looks like we were
# unable to match the code to the MDANSE residues
raise PDBReaderError(
"The atom {}{}:{} is unknown".format(code, residue.number, pdb_atom)
"Unable to find residue for {}{}".format(code, residue.number)
)
# found the match from the alternate name to the name in
# mdanse lets rename the code
code = new_code
residue.name = code

else:
atoms_found = [None] * len(pdb_atoms)
for comp, pdb_atom in enumerate(pdb_atoms):
for at, info in RESIDUES_DATABASE[code]["atoms"].items():
if pdb_atom == at or pdb_atom in info["alternatives"]:
atoms_found[comp] = at
break
else:
raise PDBReaderError(
"The atom {}{}:{} is unknown".format(
code, residue.number, pdb_atom
)
)

resname = "{}{}".format(code, residue.number)
new_residue = Residue(code, resname, variant=None)
Expand Down

0 comments on commit 3694932

Please sign in to comment.