Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SMILES naming fix and associated tests #236

Merged
merged 2 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion chai_lab/data/sources/rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# See the LICENSE file for details.

import logging
from collections import defaultdict
from pathlib import Path

import antipickle
Expand Down Expand Up @@ -157,8 +158,13 @@ def generate(self, smiles: str) -> ConformerData:

AllChem.EmbedMultipleConfs(mol_with_hs, numConfs=1, params=params)
AllChem.RemoveHs(mol_with_hs)

element_counter: dict = defaultdict(int)
for atom in mol_with_hs.GetAtoms():
atom.SetProp("name", atom.GetSymbol())
elem = atom.GetSymbol()
element_counter[elem] += 1 # Start each counter at 1
atom.SetProp("name", elem + str(element_counter[elem]))

retval = self._load_ref_conformer_from_rdkit(mol_with_hs)
retval.atom_names = [a.upper() for a in retval.atom_names]
return retval
Expand Down
9 changes: 9 additions & 0 deletions tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,12 @@ def test_fasta_parsing():
assert records[0].sequence == "RKDES"
assert records[1].header == "bar"
assert records[1].sequence == "KEDESRRR"


def test_smiles_parsing():
smiles = ">smiles\nCc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C"
with TemporaryDirectory() as tmpdir:
fa_file = Path(tmpdir) / "test.fasta"
fa_file.write_text(smiles)
records = read_fasta(fa_file)
assert len(records) == 1
24 changes: 24 additions & 0 deletions tests/test_rdkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) 2024 Chai Discovery, Inc.
# Licensed under the Apache License, Version 2.0.
# See the LICENSE file for details.

from chai_lab.data.sources.rdkit import RefConformerGenerator


def test_ref_conformer_from_smiles():
"""Test ref conformer generation from SMILES."""
smiles = "Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C"
rcg = RefConformerGenerator()

conformer = rcg.generate(smiles)

assert len(set(conformer.atom_names)) == conformer.num_atoms


def test_ref_conformer_glycan_ccd():
"""Ref conformer from CCD code for a sugar ring."""
rcg = RefConformerGenerator()
conformer = rcg.get("MAN")
assert conformer is not None

assert len(set(conformer.atom_names)) == conformer.num_atoms
Loading