From 1a2d5c4c17bc5c793df92a4bd406f6857be5b420 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 1 Mar 2024 14:36:19 -0800 Subject: [PATCH 1/2] Add test_ensembl_protein --- tests/test_match.py | 129 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/tests/test_match.py b/tests/test_match.py index de736ff..4576c1c 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -79,6 +79,135 @@ def test_checks_by_source_id_kras(self, conn): ] assert "KRAS" in kras + # KBDEV-1163 + # Testing if the addition of Ensembl protein Features are limiting results + # returned by get_equivalent_features() since SimilatTo queryType queries + # aren't traversing the graph to it's whole depth. + @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding data-specific test") + def test_ensembl_protein(self, conn): + for feature, expected in [ + # Sample + ( + 'EGFR', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'ENST00000275493', + 'ENST00000275493.6', + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', + 'NP_001333826.1', + ], + ), + ( + 'NM_001346897', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', + 'NP_001333826.1', + ], + ), + ( + 'NM_001346897.2', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', + 'NP_001333826.1', + ], + ), + ( + 'NP_001333826', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', # Warn: Versionized ENSG won't be returned due to API limitations + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', + 'NP_001333826.1', + ], + ), + ( + 'NP_001333826.1', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', # Warn: Versionized ENSG won't be returned due to API limitations + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', + 'NP_001333826.1', + ], + ), + ( + 'ENSG00000146648', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'ENST00000275493', + 'ENST00000275493.6', + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', # Warn: Versionized NP won't be returned due to API limitations + ], + ), + ( + 'ENSG00000146648.17', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'ENST00000275493', + 'ENST00000275493.6', + 'NM_001346897', + 'NM_001346897.2', + 'NP_001333826', # Warn: Versionized NP won't be returned due to API limitations + ], + ), + ( + 'ENST00000275493', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'ENST00000275493', + 'ENST00000275493.6', + ], + ), + ( + 'ENST00000275493.6', + [ + 'EGFR', + 'ERBB', + 'ENSG00000146648', + 'ENSG00000146648.17', + 'ENST00000275493', + 'ENST00000275493.6', + ], + ), + ]: + equivalent_features = match.get_equivalent_features(conn, feature) + equivalent_features = [el['displayName'] for el in equivalent_features] + for equivalent_feature in expected: + assert equivalent_feature in equivalent_features + class TestMatchCopyVariant: def test_bad_category(self, conn): From 16195a08de536571b789b0a7bc5b454c095b2484 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Fri, 1 Mar 2024 14:40:42 -0800 Subject: [PATCH 2/2] Black linting --- graphkb/genes.py | 1 + graphkb/match.py | 17 +++++++++++------ tests/data.py | 1 - tests/test_genes.py | 1 + tests/test_vocab.py | 1 + 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/graphkb/genes.py b/graphkb/genes.py index b6e06b5..7b6979e 100644 --- a/graphkb/genes.py +++ b/graphkb/genes.py @@ -1,4 +1,5 @@ """Methods for retrieving gene annotation lists from GraphKB.""" + from typing import Any, Dict, List, Sequence, Set, Tuple, cast from . import GraphKBConnection diff --git a/graphkb/match.py b/graphkb/match.py index 3e4e0ab..ec82e57 100644 --- a/graphkb/match.py +++ b/graphkb/match.py @@ -1,6 +1,7 @@ """ Functions which return Variants from GraphKB which match some input variant definition """ + from typing import Dict, List, Optional, Set, Union, cast from . import GraphKBConnection @@ -303,9 +304,11 @@ def compare_positional_variants( if not positions_overlap( cast(BasicPosition, variant["break1Start"]), cast(BasicPosition, reference_variant["break1Start"]), - None - if "break1End" not in reference_variant - else cast(BasicPosition, reference_variant["break1End"]), + ( + None + if "break1End" not in reference_variant + else cast(BasicPosition, reference_variant["break1End"]) + ), ): return False @@ -318,9 +321,11 @@ def compare_positional_variants( if not positions_overlap( cast(BasicPosition, variant["break2Start"]), cast(BasicPosition, reference_variant["break2Start"]), - None - if "break2End" not in reference_variant - else cast(BasicPosition, reference_variant["break2End"]), + ( + None + if "break2End" not in reference_variant + else cast(BasicPosition, reference_variant["break2End"]) + ), ): return False diff --git a/tests/data.py b/tests/data.py index 3c24fe0..764f905 100644 --- a/tests/data.py +++ b/tests/data.py @@ -5,7 +5,6 @@ Array of variants (diplayName and type) that MUST NOT be matching, but not restricted to """ - # Screening structural variant to rule out small events [KBDEV_1056] structuralVariants = { # Unambiguous structural variations diff --git a/tests/test_genes.py b/tests/test_genes.py index b3670f4..2404213 100644 --- a/tests/test_genes.py +++ b/tests/test_genes.py @@ -1,6 +1,7 @@ """ Tests here depend on specific data in GraphKB which can change. To avoid this, expected/stable values are chosen """ + import os import pytest diff --git a/tests/test_vocab.py b/tests/test_vocab.py index 5e40e04..dc2e319 100644 --- a/tests/test_vocab.py +++ b/tests/test_vocab.py @@ -1,6 +1,7 @@ """ Tests here depend on specific data in GraphKB which can change. To avoid this, expected/stable values are chosen """ + import os import pytest