From 3b84ad5de0601b36a7ace686d7bd4bdf3384edb6 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 1 Apr 2021 11:30:00 +0200 Subject: [PATCH 01/14] Update interpretation of drugbank actions --- indra/sources/drugbank/processor.py | 43 +++++++++++++++++------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index ea0912a263..126d53197c 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -1,11 +1,11 @@ -import logging from xml.etree import ElementTree -from indra.statements import * -from indra.databases.identifiers import ensure_chebi_prefix, \ - ensure_chembl_prefix + +import logging + +from indra.databases.identifiers import ensure_chebi_prefix, ensure_chembl_prefix +from indra.ontology.standardize import get_standard_agent +from indra.statements import Activation, Complex, DecreaseAmount, Evidence, IncreaseAmount, Inhibition from indra.statements.validate import assert_valid_db_refs -from indra.ontology.standardize import standardize_name_db_refs, \ - get_standard_agent logger = logging.getLogger(__name__) @@ -28,6 +28,7 @@ class DrugbankProcessor: statements : list of indra.statements.Statement A list of INDRA Statements that were extracted from DrugBank content. """ + def __init__(self, xml_tree: ElementTree.ElementTree): self.xml_tree = xml_tree self.statements = [] @@ -60,7 +61,7 @@ def _extract_statements_for_drug(drug_element): @staticmethod def _get_statement_type(action): if action in neutral_actions: - return None + return _complex elif action in activation_actions: return Activation elif action in inhibition_actions: @@ -70,9 +71,10 @@ def _get_statement_type(action): elif action in increase_amount_actions: return IncreaseAmount elif action == 'N/A': - return Inhibition + return _complex else: - return None + logger.warning('unhandled DrugBank action: %s', action) + return _complex @staticmethod def _get_target_agent(target_element): @@ -160,22 +162,27 @@ def db_findall(element, path): return element.findall(path, namespaces=drugbank_ns) -activation_actions = {'substrate', 'agonist', 'inducer', 'potentiator', - 'stimulator', 'cofactor', 'activator', 'ligand', - 'chaperone', 'partial agonist', 'protector', +def _complex(a, b, evidence): + return Complex([a, b], evidence=evidence) + + +activation_actions = {'inducer', 'potentiator', + 'stimulator', 'cofactor', 'activator', + 'protector', 'positive allosteric modulator', 'positive modulator'} -inhibition_actions = {'antagonist', 'inhibitor', 'binder', 'antibody', +inhibition_actions = {'inhibitor', 'binder', 'antibody', 'inactivator', 'binding', 'blocker', 'negative modulator', - 'inverse agonist', 'neutralizer', 'weak inhibitor', - 'suppressor', 'disruptor', + 'neutralizer', 'weak inhibitor', + 'suppressor', 'disruptor', 'chelator', 'inhibitory allosteric modulator'} -decrease_amount_actions = {'downregulator', 'metabolizer', 'chelator', +decrease_amount_actions = {'downregulator', 'metabolizer', 'degradation', 'incorporation into and destabilization'} -increase_amount_actions = {'stabilization'} +increase_amount_actions = {'stabilization', 'chaperone'} neutral_actions = {'modulator', 'other/unknown', 'unknown', 'other', - 'regulator'} \ No newline at end of file + 'regulator', 'antagonist', 'substrate', 'agonist', + 'ligand', 'inverse agonist', 'partial agonist'} From deedfbfd6f95563ea9b5ef16d6ff21eaac12c766 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 1 Apr 2021 11:31:48 +0200 Subject: [PATCH 02/14] Bring over code from #1264 --- indra/sources/drugbank/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/indra/sources/drugbank/api.py b/indra/sources/drugbank/api.py index 5eba90548c..1cca7bc516 100644 --- a/indra/sources/drugbank/api.py +++ b/indra/sources/drugbank/api.py @@ -1,6 +1,7 @@ import logging from typing import Optional, Sequence, Union from xml.etree import ElementTree + from .processor import DrugbankProcessor logger = logging.getLogger(__name__) From acf8eb6f654e3239e4df0529a1431027e62b999e Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 1 Apr 2021 11:55:13 +0200 Subject: [PATCH 03/14] Fix import --- indra/sources/drugbank/processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index 126d53197c..dc1ac10551 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -1,6 +1,5 @@ -from xml.etree import ElementTree - import logging +from xml.etree import ElementTree from indra.databases.identifiers import ensure_chebi_prefix, ensure_chembl_prefix from indra.ontology.standardize import get_standard_agent From e0baaca6b2a40f4c6e2a94dd1f68e31d371fab4b Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 1 Apr 2021 11:55:35 +0200 Subject: [PATCH 04/14] Update priority order of mapping to INDRA statements and logging --- indra/sources/drugbank/processor.py | 54 ++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index dc1ac10551..ec51e723a6 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -10,6 +10,8 @@ drugbank_ns = {'db': 'http://www.drugbank.ca'} +_UNHANDLED = set() + class DrugbankProcessor: """Processor to extract INDRA Statements from DrugBank content. @@ -59,9 +61,7 @@ def _extract_statements_for_drug(drug_element): @staticmethod def _get_statement_type(action): - if action in neutral_actions: - return _complex - elif action in activation_actions: + if action in activation_actions: return Activation elif action in inhibition_actions: return Inhibition @@ -69,11 +69,13 @@ def _get_statement_type(action): return DecreaseAmount elif action in increase_amount_actions: return IncreaseAmount - elif action == 'N/A': + elif action in neutral_actions or action == 'N/A': return _complex - else: + elif action in skip_actions: + return None + elif action not in _UNHANDLED: + _UNHANDLED.add(action) logger.warning('unhandled DrugBank action: %s', action) - return _complex @staticmethod def _get_target_agent(target_element): @@ -176,12 +178,40 @@ def _complex(a, b, evidence): 'suppressor', 'disruptor', 'chelator', 'inhibitory allosteric modulator'} -decrease_amount_actions = {'downregulator', 'metabolizer', - 'degradation', - 'incorporation into and destabilization'} +decrease_amount_actions = { + 'downregulator', + 'metabolizer', + 'degradation', + 'incorporation into and destabilization', + 'cleavage', + 'inhibition of synthesis', +} increase_amount_actions = {'stabilization', 'chaperone'} -neutral_actions = {'modulator', 'other/unknown', 'unknown', 'other', - 'regulator', 'antagonist', 'substrate', 'agonist', - 'ligand', 'inverse agonist', 'partial agonist'} +neutral_actions = { + 'modulator', + 'regulator', + 'antagonist', + 'substrate', + 'agonist', + 'ligand', + 'intercalation', # e.g., Doxorubicin intercalates DNA to prevent transcription + 'inverse agonist', + 'aggregation inhibitor', # e.g., inhibits process on a protein's aggregation (like APP or LRRK) + 'partial agonist', + 'partial antagonist', + 'antisense oligonucleotide', + 'adduct', + 'component of', + 'product of', + 'reducer', + 'oxidizer', + 'acetylation', # map to Ac INDRA statement?, but I'm not convinced by the idea of splitting up actions +} + +skip_actions = { + 'other/unknown', + 'unknown', + 'other', +} From e9a68f74a4c4c69ca50652357cbcc6c6b74c2ae3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 1 Apr 2021 11:55:40 +0200 Subject: [PATCH 05/14] Update processor.py --- indra/sources/drugbank/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index ec51e723a6..b0a60f0b50 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -176,7 +176,7 @@ def _complex(a, b, evidence): 'inactivator', 'binding', 'blocker', 'negative modulator', 'neutralizer', 'weak inhibitor', 'suppressor', 'disruptor', 'chelator', - 'inhibitory allosteric modulator'} + 'inhibitory allosteric modulator', 'translocation inhibitor'} decrease_amount_actions = { 'downregulator', From 9550a76c8b987e9521199b941876ba3c6c405d41 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 1 Apr 2021 12:02:02 +0200 Subject: [PATCH 06/14] Update processor.py --- indra/sources/drugbank/processor.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index b0a60f0b50..c3f71f5d07 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -69,7 +69,9 @@ def _get_statement_type(action): return DecreaseAmount elif action in increase_amount_actions: return IncreaseAmount - elif action in neutral_actions or action == 'N/A': + elif action == 'N/A': + return Inhibition + elif action in neutral_actions: return _complex elif action in skip_actions: return None @@ -176,7 +178,9 @@ def _complex(a, b, evidence): 'inactivator', 'binding', 'blocker', 'negative modulator', 'neutralizer', 'weak inhibitor', 'suppressor', 'disruptor', 'chelator', - 'inhibitory allosteric modulator', 'translocation inhibitor'} + 'inhibitory allosteric modulator', 'translocation inhibitor', + 'nucleotide exchange blocker', + } decrease_amount_actions = { 'downregulator', @@ -208,6 +212,9 @@ def _complex(a, b, evidence): 'reducer', 'oxidizer', 'acetylation', # map to Ac INDRA statement?, but I'm not convinced by the idea of splitting up actions + 'allosteric modulator', + 'deoxidizer', + 'cross-linking/alkylation', # e.g. Busulfan (DB01008) alkalytes DNA } skip_actions = { From fbe73f9d2ff904e67dd03e3d3ed2eeecd2123439 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Thu, 1 Apr 2021 23:37:15 -0400 Subject: [PATCH 07/14] Revert some long lines --- indra/sources/drugbank/processor.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index c3f71f5d07..f8e305424b 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -1,9 +1,11 @@ import logging from xml.etree import ElementTree -from indra.databases.identifiers import ensure_chebi_prefix, ensure_chembl_prefix +from indra.databases.identifiers import ensure_chebi_prefix,\ + ensure_chembl_prefix from indra.ontology.standardize import get_standard_agent -from indra.statements import Activation, Complex, DecreaseAmount, Evidence, IncreaseAmount, Inhibition +from indra.statements import Activation, Complex, DecreaseAmount, Evidence,\ + IncreaseAmount, Inhibition from indra.statements.validate import assert_valid_db_refs logger = logging.getLogger(__name__) @@ -178,8 +180,8 @@ def _complex(a, b, evidence): 'inactivator', 'binding', 'blocker', 'negative modulator', 'neutralizer', 'weak inhibitor', 'suppressor', 'disruptor', 'chelator', - 'inhibitory allosteric modulator', 'translocation inhibitor', - 'nucleotide exchange blocker', + 'inhibitory allosteric modulator', + 'translocation inhibitor', 'nucleotide exchange blocker', } decrease_amount_actions = { @@ -200,9 +202,11 @@ def _complex(a, b, evidence): 'substrate', 'agonist', 'ligand', - 'intercalation', # e.g., Doxorubicin intercalates DNA to prevent transcription + # e.g., Doxorubicin intercalates DNA to prevent transcription + 'intercalation', 'inverse agonist', - 'aggregation inhibitor', # e.g., inhibits process on a protein's aggregation (like APP or LRRK) + # e.g., inhibits process on a protein's aggregation (like APP or LRRK) + 'aggregation inhibitor', 'partial agonist', 'partial antagonist', 'antisense oligonucleotide', @@ -211,7 +215,9 @@ def _complex(a, b, evidence): 'product of', 'reducer', 'oxidizer', - 'acetylation', # map to Ac INDRA statement?, but I'm not convinced by the idea of splitting up actions + # map to Ac INDRA statement?, but I'm not convinced by the idea of + # splitting up actions + 'acetylation', 'allosteric modulator', 'deoxidizer', 'cross-linking/alkylation', # e.g. Busulfan (DB01008) alkalytes DNA From a34ccc1227d75c86b2c893298f459721dd8affe1 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 2 Apr 2021 18:26:39 +0200 Subject: [PATCH 08/14] Update interpretations of "binder and binding" --- indra/sources/drugbank/processor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index f8e305424b..0a95d7b14d 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -176,8 +176,8 @@ def _complex(a, b, evidence): 'protector', 'positive allosteric modulator', 'positive modulator'} -inhibition_actions = {'inhibitor', 'binder', 'antibody', - 'inactivator', 'binding', 'blocker', 'negative modulator', +inhibition_actions = {'inhibitor', 'antibody', + 'inactivator', 'blocker', 'negative modulator', 'neutralizer', 'weak inhibitor', 'suppressor', 'disruptor', 'chelator', 'inhibitory allosteric modulator', @@ -196,6 +196,8 @@ def _complex(a, b, evidence): increase_amount_actions = {'stabilization', 'chaperone'} neutral_actions = { + 'binder', + 'binding', 'modulator', 'regulator', 'antagonist', From fb87e695d868756f36928287ffb812d79961e6cb Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Fri, 2 Apr 2021 15:19:15 -0400 Subject: [PATCH 09/14] Use Complex instead of Inhibition for missing actions --- indra/sources/drugbank/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index 0a95d7b14d..d12da312d8 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -72,7 +72,7 @@ def _get_statement_type(action): elif action in increase_amount_actions: return IncreaseAmount elif action == 'N/A': - return Inhibition + return _complex elif action in neutral_actions: return _complex elif action in skip_actions: From d355891e091b52f0c19d3105c0b6638761e20580 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Fri, 2 Apr 2021 15:55:45 -0400 Subject: [PATCH 10/14] Skip unknown pharmacological actions too --- indra/sources/drugbank/processor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index d12da312d8..de218b5b71 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -49,7 +49,14 @@ def _extract_statements_for_drug(drug_element): actions = {a.text for a in db_findall(target_element, 'db:actions/db:action')} if not actions: - actions = {'N/A'} + # See https://dev.drugbank.com/guides/terms/pharmacological-action + pharm_action = db_find(target_element, 'db:known-action') + # Skip if it's not known that it's a direct interaction + if pharm_action.text in {'no', 'unknown'}: + actions = set() + # Otherwise use the N/A action which ultimately maps to Complex + else: + actions = {'N/A'} for action in actions: stmt_type = DrugbankProcessor._get_statement_type(action) if not stmt_type: From c04546f668a7812136b125519f110af886d54ae2 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 14 Jun 2022 21:59:51 +0200 Subject: [PATCH 11/14] Update relation interpretation --- indra/sources/drugbank/processor.py | 63 ++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index de218b5b71..75b4774c51 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -178,18 +178,48 @@ def _complex(a, b, evidence): return Complex([a, b], evidence=evidence) -activation_actions = {'inducer', 'potentiator', - 'stimulator', 'cofactor', 'activator', - 'protector', - 'positive allosteric modulator', 'positive modulator'} - -inhibition_actions = {'inhibitor', 'antibody', - 'inactivator', 'blocker', 'negative modulator', - 'neutralizer', 'weak inhibitor', - 'suppressor', 'disruptor', 'chelator', - 'inhibitory allosteric modulator', - 'translocation inhibitor', 'nucleotide exchange blocker', - } +activation_actions = { + 'inducer', + 'potentiator', + 'stimulator', + 'cofactor', + 'activator', + 'protector', + 'positive allosteric modulator', + 'positive modulator', + # All agonists activate receptors, The only differences are potency, + # how efficiently they bind and how long they stay at the receptor site + 'agonist', + 'partial agonist', +} + +inhibition_actions = { + 'inhibitor', + 'antibody', + 'inactivator', + 'blocker', + 'negative modulator', + 'neutralizer', + 'weak inhibitor', + 'suppressor', + 'disruptor', + 'chelator', + 'inhibitory allosteric modulator', + 'translocation inhibitor', + 'nucleotide exchange blocker', + # Antagonists can either bind to the receptor and do nothing and prevent + # physiologic agonists to bind (which can be overcome with higher agonist + # dosage [except in the case of irreversible antagonism which obviously + # can't be competed with]) or can be a noncompetitive antagonist that will + # change the structure of the active site and prevent agonist binding. + 'antagonist', + 'partial antagonist', + # Inverse agonists act exactly the same as competitive antagonists + # unless there is a basal physiological agonism. In which case the + # inverse agonist will have more of an opposite effect than just a + # pure antagonist would have. + 'inverse agonist', +} decrease_amount_actions = { 'downregulator', @@ -198,6 +228,7 @@ def _complex(a, b, evidence): 'incorporation into and destabilization', 'cleavage', 'inhibition of synthesis', + 'antisense oligonucleotide', } increase_amount_actions = {'stabilization', 'chaperone'} @@ -207,18 +238,12 @@ def _complex(a, b, evidence): 'binding', 'modulator', 'regulator', - 'antagonist', 'substrate', - 'agonist', 'ligand', # e.g., Doxorubicin intercalates DNA to prevent transcription 'intercalation', - 'inverse agonist', # e.g., inhibits process on a protein's aggregation (like APP or LRRK) 'aggregation inhibitor', - 'partial agonist', - 'partial antagonist', - 'antisense oligonucleotide', 'adduct', 'component of', 'product of', @@ -229,7 +254,7 @@ def _complex(a, b, evidence): 'acetylation', 'allosteric modulator', 'deoxidizer', - 'cross-linking/alkylation', # e.g. Busulfan (DB01008) alkalytes DNA + 'cross-linking/alkylation', # e.g. Busulfan (DB01008) alkalytes DNA } skip_actions = { From a9b272aa61ad64bbc24af0c1d0a24eb209803fa1 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 14 Jun 2022 22:00:02 +0200 Subject: [PATCH 12/14] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 65e3618c41..a56bc0e51b 100644 --- a/.gitignore +++ b/.gitignore @@ -122,3 +122,4 @@ indra/tests/tempfile.html .pytest_cache .DS_Store .noseids +.venv From 1735705930eef15397f0df8bb34c47059529ea55 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 14 Jun 2022 22:04:40 +0200 Subject: [PATCH 13/14] Update api.py --- indra/sources/drugbank/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/indra/sources/drugbank/api.py b/indra/sources/drugbank/api.py index 1cca7bc516..5eba90548c 100644 --- a/indra/sources/drugbank/api.py +++ b/indra/sources/drugbank/api.py @@ -1,7 +1,6 @@ import logging from typing import Optional, Sequence, Union from xml.etree import ElementTree - from .processor import DrugbankProcessor logger = logging.getLogger(__name__) From 91ba79503f71e660fb47e1e435b868e44b4ef401 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 14 Jun 2022 22:16:34 +0200 Subject: [PATCH 14/14] Add new actions in 5.1.9 --- indra/sources/drugbank/processor.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/indra/sources/drugbank/processor.py b/indra/sources/drugbank/processor.py index 75b4774c51..e88048a56f 100644 --- a/indra/sources/drugbank/processor.py +++ b/indra/sources/drugbank/processor.py @@ -206,6 +206,7 @@ def _complex(a, b, evidence): 'chelator', 'inhibitory allosteric modulator', 'translocation inhibitor', + 'inhibits downstream inflammation cascades', 'nucleotide exchange blocker', # Antagonists can either bind to the receptor and do nothing and prevent # physiologic agonists to bind (which can be overcome with higher agonist @@ -231,7 +232,11 @@ def _complex(a, b, evidence): 'antisense oligonucleotide', } -increase_amount_actions = {'stabilization', 'chaperone'} +increase_amount_actions = { + 'stabilization', + 'chaperone', + 'gene replacement', +} neutral_actions = { 'binder', @@ -255,6 +260,7 @@ def _complex(a, b, evidence): 'allosteric modulator', 'deoxidizer', 'cross-linking/alkylation', # e.g. Busulfan (DB01008) alkalytes DNA + 'multitarget', } skip_actions = {