Skip to content

Commit

Permalink
Remove HPO Recursion (#464)
Browse files Browse the repository at this point in the history
* remove overcomplicated logic when searching for HPO terms related to a query term

* Bump version: 6.2.0 → 6.2.1
  • Loading branch information
MattWellie authored Dec 10, 2024
1 parent c0a6dce commit 6c35f3c
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 6.2.0
current_version = 6.2.1
commit = True
tag = False

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/index_page_builder.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest

env:
VERSION: 6.2.0
VERSION: 6.2.1

steps:

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ The Dockerfile offers two builds, use the `--target` flag to specify which build
- one for a Google cloud environment (called `talos_gcloud`).

```bash
DOCKERBUILDKIT=1 docker build --target talos_none -t talos:6.2.0 .
DOCKERBUILDKIT=1 docker build --target talos_none -t talos:6.2.1 .
```

## Workflow Demonstration with Nextflow

A Nextflow pipeline is provided to demonstrate how to run Talos. This pipeline is designed to be run on a local machine, and requires that you have nextflow installed locally, and a docker daemon running. First, as described above, build the docker image:

```bash
docker build --target talos_none -t talos:6.2.0 .
docker build --target talos_none -t talos:6.2.1 .
```

Then, run the pipeline:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def read_reqs(filename: str) -> list[str]:
name='talos',
description='Centre for Population Genomics Variant Prioritisation',
long_description=readme,
version='6.2.0',
version='6.2.1',
author='Matthew Welland, CPG',
author_email='[email protected], [email protected]',
package_data={'talos': ['templates/*.jinja', 'example_config.toml']},
Expand Down
60 changes: 8 additions & 52 deletions src/talos/GeneratePanelData.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import phenopackets.schema.v2 as pps2
from google.protobuf.json_format import ParseDict
from networkx import MultiDiGraph
from networkx import dfs_successors
from obonet import read_obo

from talos.config import config_retrieve
Expand Down Expand Up @@ -89,53 +89,6 @@ def set_up_cohort_pmp(cohort: pps2.Cohort) -> tuple[PhenotypeMatchedPanels, set[
return hpo_dict, all_hpos


def match_hpo_terms(
panel_map: dict[str, set[int]],
hpo_tree: MultiDiGraph,
hpo_str: str,
selections: set[int] | None = None,
) -> set[int]:
"""
get panels relevant for this HPO using a recursive edge traversal
for live terms we recurse on all parents
if a term is obsolete we instead check each replacement term
relevant usage guide:
https://github.com/dhimmel/obonet/blob/main/examples/go-obonet.ipynb
Args:
panel_map (dict):
hpo_tree (): a graph object representing the HPO tree
hpo_str (str): the query HPO term
selections (set[int]): collected panel IDs so far
Returns:
set: panel IDs relating to this HPO term
"""

if selections is None:
selections = set()

# identify identical match and select the panel
if hpo_str in panel_map:
selections.update(panel_map[hpo_str])

# if a node is invalid, recursively call this method for each replacement D:
# there are simpler ways, just none that are as fun to write
if not hpo_tree.has_node(hpo_str):
get_logger().error(f'HPO term was absent from the tree: {hpo_str}')
return selections

hpo_node = hpo_tree.nodes[hpo_str]
if hpo_node.get('is_obsolete', 'false') == 'true':
for hpo_term in hpo_node.get('replaced_by', []):
selections.update(match_hpo_terms(panel_map, hpo_tree, hpo_term, selections))
# search for parent(s), even if the term is obsolete
for hpo_term in hpo_node.get('is_a', []):
selections.update(match_hpo_terms(panel_map, hpo_tree, hpo_term, selections))
return selections


def match_hpos_to_panels(hpo_panel_map: dict[str, set[int]], hpo_file: str, all_hpos: set[str]) -> dict[str, set[int]]:
"""
take the HPO terms from the participant metadata, and match to panels
Expand All @@ -147,15 +100,18 @@ def match_hpos_to_panels(hpo_panel_map: dict[str, set[int]], hpo_file: str, all_
Returns:
a dictionary linking all HPO terms to a corresponding set of Panel IDs
a second dictionary linking all HPO terms to their plaintext names
"""

hpo_graph = read_obo(hpo_file, ignore_obsolete=False)

hpo_to_panels = {}
hpo_to_panels = defaultdict(set)
for hpo in all_hpos:
panel_ids = match_hpo_terms(panel_map=hpo_panel_map, hpo_tree=hpo_graph, hpo_str=hpo)
hpo_to_panels[hpo] = panel_ids
# identify all HPO terms back to the ontology root
successor_hpo_terms = set(dfs_successors(hpo_graph, hpo))

for hpo_term in successor_hpo_terms:
if hpo_term in hpo_panel_map:
hpo_to_panels[hpo].update(hpo_panel_map[hpo_term])

return hpo_to_panels

Expand Down
2 changes: 1 addition & 1 deletion src/talos/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
"""

# Do not edit this file manually
__version__ = '6.2.0'
__version__ = '6.2.1'
13 changes: 1 addition & 12 deletions test/test_metamist_hpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import networkx as nx
from obonet import read_obo

from talos.GeneratePanelData import get_panels, match_hpo_terms, match_hpos_to_panels, match_participants_to_panels
from talos.GeneratePanelData import get_panels, match_hpos_to_panels, match_participants_to_panels
from talos.models import ParticipantHPOPanels, PhenotypeMatchedPanels


Expand All @@ -18,17 +18,6 @@ def test_get_panels(httpx_mock, fake_panelapp_overview):
assert panels_parsed == {'HP:1': {2}, 'HP:4': {1}, 'HP:6': {2}}


def test_match_hpo_terms(fake_obo_path):
"""
check that HP tree traversal works
this test is kinda limited now that the layer count is constant
"""
obo_parsed = read_obo(fake_obo_path)
panel_map = {'HP:2': {1, 2}}
assert match_hpo_terms(panel_map=panel_map, hpo_tree=obo_parsed, hpo_str='HP:4') == {1, 2}
assert match_hpo_terms(panel_map=panel_map, hpo_tree=obo_parsed, hpo_str='HP:2') == {1, 2}


def test_match_hpos_to_panels(fake_obo_path):
"""
test the hpo-to-panel matching
Expand Down

0 comments on commit 6c35f3c

Please sign in to comment.