From 8d80b5b6984b388e096f47045110b107b8ac8b2f Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Thu, 31 Oct 2024 18:31:33 +0300 Subject: [PATCH] #2575 - Saving atom to monomer connections to Mol and HELM (#2592) --- .../ref/formats/ket_to_helm.py.out | 1 + .../integration/tests/formats/ket_to_helm.py | 1 + .../ref/helm_monomer_molecule_direct.ket | 223 ++++++++++++++++++ .../molecule/src/sequence_saver.cpp | 38 +++ 4 files changed, 263 insertions(+) create mode 100644 api/tests/integration/tests/formats/ref/helm_monomer_molecule_direct.ket diff --git a/api/tests/integration/ref/formats/ket_to_helm.py.out b/api/tests/integration/ref/formats/ket_to_helm.py.out index 563875c237..5970164d51 100644 --- a/api/tests/integration/ref/formats/ket_to_helm.py.out +++ b/api/tests/integration/ref/formats/ket_to_helm.py.out @@ -12,6 +12,7 @@ helm_mixed_base.ket:SUCCEED helm_mixed_custom.ket:SUCCEED helm_molecule_2418.ket:SUCCEED helm_monomer_molecule.ket:SUCCEED +helm_monomer_molecule_direct.ket:SUCCEED helm_multi_char_rna.ket:SUCCEED helm_peptide.ket:SUCCEED helm_rna_without_base.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/ket_to_helm.py b/api/tests/integration/tests/formats/ket_to_helm.py index 81b553493d..e3e69cd297 100644 --- a/api/tests/integration/tests/formats/ket_to_helm.py +++ b/api/tests/integration/tests/formats/ket_to_helm.py @@ -53,6 +53,7 @@ def find_diff(a, b): "helm_smiles_sugar": "RNA1{[C(C(CO[*:1])O[*:2])[*:3] |$;;;;_R1;;_R2;_R3$|](A)P}$$$$V2.0", "helm_molecule_2418": "PEPTIDE1{A}|CHEM1{[C1C=CC=CC=1[*:1] |$;;;;;;_R1$|]}$PEPTIDE1,CHEM1,1:R1-1:R1$$$V2.0", "helm_chem_rna_hydro": "CHEM1{[MCC]}|RNA1{R(U)P}$CHEM1,RNA1,1:pair-3:pair$$$V2.0", + "helm_monomer_molecule_direct": "PEPTIDE1{A}|CHEM1{[C(=C)N[*:1] |$;;;_R1$|]}$PEPTIDE1,CHEM1,1:R2-1:R1$$$V2.0", } for filename in sorted(helm_data.keys()): diff --git a/api/tests/integration/tests/formats/ref/helm_monomer_molecule_direct.ket b/api/tests/integration/tests/formats/ref/helm_monomer_molecule_direct.ket new file mode 100644 index 0000000000..eb20e415f8 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/helm_monomer_molecule_direct.ket @@ -0,0 +1,223 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer5635" + }, + { + "$ref": "mol0" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer5635", + "attachmentPointId": "R2" + }, + "endpoint2": { + "moleculeId": "mol0", + "atomId": "1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-A___Alanine" + } + ] + }, + "mol0": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 22.70380779065583, + -10.331249794220879, + 0 + ] + }, + { + "label": "N", + "location": [ + 23.569833212558642, + -10.831249821106224, + 0 + ] + }, + { + "label": "C", + "location": [ + 21.837782294247205, + -10.831249783853321, + 0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + } + ], + "stereoFlagPosition": { + "x": 23.569833212558642, + "y": 9.331249794220879, + "z": 0 + } + }, + "monomer5635": { + "type": "monomer", + "id": "5635", + "position": { + "x": 25.774999999999988, + "y": -10.725000000000009 + }, + "alias": "A", + "templateId": "A___Alanine" + }, + "monomerTemplate-A___Alanine": { + "type": "monomerTemplate", + "atoms": [ + { + "label": "N", + "location": [ + -1.2549, + -0.392, + 0 + ] + }, + { + "label": "C", + "location": [ + -0.272, + 0.2633, + 0 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + -0.3103, + 1.7393, + 0 + ] + }, + { + "label": "C", + "location": [ + 1.0523, + -0.392, + 0 + ] + }, + { + "label": "O", + "location": [ + 1.0829, + -1.5722, + 0 + ] + }, + { + "label": "O", + "location": [ + 2.0353, + 0.2633, + 0 + ] + }, + { + "label": "H", + "location": [ + -2.3334, + 0.0905, + 0 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 2 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 6 + ] + } + ], + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "id": "A___Alanine", + "fullName": "Alanine", + "alias": "A", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 6 + ] + }, + "type": "left" + }, + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 5 + ] + }, + "type": "right" + } + ], + "naturalAnalogShort": "A" + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/src/sequence_saver.cpp b/core/indigo-core/molecule/src/sequence_saver.cpp index 01ff9a7cd5..ffa730b1c3 100644 --- a/core/indigo-core/molecule/src/sequence_saver.cpp +++ b/core/indigo-core/molecule/src/sequence_saver.cpp @@ -1319,6 +1319,25 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector> molecules_connections; + if (molecules.Size() > 0) + { + auto process_ep = [&molecules_connections](const KetConnectionEndPoint& ep) { + if (ep.hasStringProp("moleculeId")) + { + const auto& mol_id = ep.getStringProp("moleculeId"); + if (molecules_connections.count(mol_id) == 0) + molecules_connections.try_emplace(mol_id); + if (ep.hasStringProp("atomId")) + molecules_connections.at(mol_id).push_back(std::stoi(ep.getStringProp("atomId"))); + } + }; + for (const auto& connection : document.nonSequenceConnections()) + { + process_ep(connection.ep1()); + process_ep(connection.ep2()); + } + } for (rapidjson::SizeType i = 0; i < molecules.Size(); i++) { const auto& molecule = molecules[i]; @@ -1341,6 +1360,7 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vectorsgroups; + int ap_count = 0; for (int i = sgroups.begin(); i != sgroups.end(); i = sgroups.next(i)) { auto& sgroup = sgroups.getSGroup(i); @@ -1355,6 +1375,7 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vectorsecond.emplace(ap.aidx, apid); @@ -1374,6 +1395,23 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector 0 && ap_count == 0) + { + int ap_idx = 1; + auto res = mol_atom_to_ap.try_emplace(mol_id); + auto& atom_to_ap = res.first; + static std::string apid_prefix{'R'}; + for (auto atom_id : molecules_connections.at(mol_id)) + { + std::string apid = apid_prefix + std::to_string(ap_idx); + atom_to_ap->second.emplace(atom_id, apid); + // add leaving atom and set it as R-site + auto leaving_atom = pbmol->addAtom(ELEM_RSITE); + pbmol->addBond(atom_id, leaving_atom, BOND_SINGLE); + pbmol->allowRGroupOnRSite(leaving_atom, ap_idx++); + } + } // generate smiles std::string smiles; StringOutput s_out(smiles);