From 76f39d771a7e9a8998258424a7386cc2284cda0f Mon Sep 17 00:00:00 2001 From: Kevin Wu Date: Tue, 10 Dec 2024 17:14:13 +0000 Subject: [PATCH 1/2] Add tests and SMILES naming fix --- chai_lab/data/sources/rdkit.py | 8 +++++++- tests/test_parsing.py | 10 ++++++++++ tests/test_rdkit.py | 24 ++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 tests/test_rdkit.py diff --git a/chai_lab/data/sources/rdkit.py b/chai_lab/data/sources/rdkit.py index 1c0ab3f..2096967 100644 --- a/chai_lab/data/sources/rdkit.py +++ b/chai_lab/data/sources/rdkit.py @@ -3,6 +3,7 @@ # See the LICENSE file for details. import logging +from collections import defaultdict from pathlib import Path import antipickle @@ -157,8 +158,13 @@ def generate(self, smiles: str) -> ConformerData: AllChem.EmbedMultipleConfs(mol_with_hs, numConfs=1, params=params) AllChem.RemoveHs(mol_with_hs) + + element_counter: dict = defaultdict(int) for atom in mol_with_hs.GetAtoms(): - atom.SetProp("name", atom.GetSymbol()) + elem = atom.GetSymbol() + element_counter[elem] += 1 # Start each counter at 1 + atom.SetProp("name", elem + str(element_counter[elem])) + retval = self._load_ref_conformer_from_rdkit(mol_with_hs) retval.atom_names = [a.upper() for a in retval.atom_names] return retval diff --git a/tests/test_parsing.py b/tests/test_parsing.py index f852206..8ea74a7 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -68,3 +68,13 @@ def test_fasta_parsing(): assert records[0].sequence == "RKDES" assert records[1].header == "bar" assert records[1].sequence == "KEDESRRR" + + +def test_smiles_parsing(): + """""" + smiles = ">smiles\nCc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C" + with TemporaryDirectory() as tmpdir: + fa_file = Path(tmpdir) / "test.fasta" + fa_file.write_text(smiles) + records = read_fasta(fa_file) + assert len(records) == 1 diff --git a/tests/test_rdkit.py b/tests/test_rdkit.py new file mode 100644 index 0000000..5968930 --- /dev/null +++ b/tests/test_rdkit.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024 Chai Discovery, Inc. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for details. + +from chai_lab.data.sources.rdkit import RefConformerGenerator + + +def test_ref_conformer_from_smiles(): + """Test ref conformer generation from SMILES.""" + smiles = "Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C" + rcg = RefConformerGenerator() + + conformer = rcg.generate(smiles) + + assert len(set(conformer.atom_names)) == conformer.num_atoms + + +def test_ref_conformer_glycan_ccd(): + """Ref conformer from CCD code for a sugar ring.""" + rcg = RefConformerGenerator() + conformer = rcg.get("MAN") + assert conformer is not None + + assert len(set(conformer.atom_names)) == conformer.num_atoms From ad24c04d90c1eadcd6a8814224e7d9861a7bae10 Mon Sep 17 00:00:00 2001 From: Kevin Wu Date: Tue, 10 Dec 2024 17:16:52 +0000 Subject: [PATCH 2/2] Cleanup --- tests/test_parsing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 8ea74a7..141979c 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -71,7 +71,6 @@ def test_fasta_parsing(): def test_smiles_parsing(): - """""" smiles = ">smiles\nCc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C" with TemporaryDirectory() as tmpdir: fa_file = Path(tmpdir) / "test.fasta"