Seqr minimiser produces a second pheno-matched JSON (#363)

* seqr minimiser produces a second pheno-matched JSON * Bump version: 3.0.4 → 3.1.0
populationgenomics · Mar 4, 2024 · 074febd · 074febd
1 parent eae8bdb
commit 074febd
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 17 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.4
+current_version = 3.1.0
 commit = True
 tag = False
 

diff --git a/.github/workflows/clinvar_runner.yaml b/.github/workflows/clinvar_runner.yaml
@@ -32,5 +32,5 @@ jobs:
           curl --fail --silent --show-error -X POST \
               -H "Authorization: Bearer $TOKEN" \
               -H "Content-Type:application/json" \
-              -d '{"output": "generate_clinvar_${{ steps.date.outputs.date }}", "dataset": "talos", "accessLevel": "full", "repo": "automated-interpretation-pipeline", "commit": "${{ github.sha }}", "cwd": "reanalysis", "script": ["./clinvar_runner.py"], "description": "Generate Latest Clinvar Summaries", "image": "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.0.4", "config": {"workflow": {"sequencing_type": "genome"}, "cohorts": {"talos": {"clinvar_filter": ["victorian clinical genetics services, murdoch childrens research institute"]}}}, "wait": false}' \
+              -d '{"output": "generate_clinvar_${{ steps.date.outputs.date }}", "dataset": "talos", "accessLevel": "full", "repo": "automated-interpretation-pipeline", "commit": "${{ github.sha }}", "cwd": "reanalysis", "script": ["./clinvar_runner.py"], "description": "Generate Latest Clinvar Summaries", "image": "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.1.0", "config": {"workflow": {"sequencing_type": "genome"}, "cohorts": {"talos": {"clinvar_filter": ["victorian clinical genetics services, murdoch childrens research institute"]}}}, "wait": false}' \
               https://server-a2pko7ameq-ts.a.run.app
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -14,7 +14,7 @@ permissions:
   contents: read
 
 env:
-  VERSION: 3.0.4
+  VERSION: 3.1.0
 
 jobs:
   docker:

diff --git a/helpers/prepare_aip_cohort.py b/helpers/prepare_aip_cohort.py
@@ -31,10 +31,10 @@
 
 PED_QUERY = gql(
     """
-    query PedAndSGs($project: String!) {
+    query PedAndSGs($project: String!, $type: String!) {
         project(name: $project) {
             pedigree
-            sequencingGroups(activeOnly: {eq: true}) {
+            sequencingGroups(type: {eq: $type}, activeOnly: {eq: true}) {
                 id
                 sample {
                     participant {
@@ -218,19 +218,20 @@ def process_pedigree(
 
 
 def get_pedigree_for_project(
-    project: str,
+    project: str, seq_type: str
 ) -> tuple[list[dict[str, str]], dict[str, str]]:
     """
     fetches the project pedigree from sample-metadata
     list, one dict per participant
 
     Args:
         project (str): project/dataset to use in query
+        seq_type (str): exome or genome
 
     Returns:
         All API returned content
     """
-    response = query(PED_QUERY, variables={'project': project})
+    response = query(PED_QUERY, variables={'project': project, 'type': seq_type})
     pedigree = response['project']['pedigree']
     lookup = {
         sg['sample']['participant']['externalId']: [sg['id']]
@@ -281,7 +282,9 @@ def main(
 
     # get the list of all pedigree members as list of dictionaries
     logging.info('Pulling all pedigree members')
-    pedigree_dicts, ext_lookup = get_pedigree_for_project(project=project)
+    pedigree_dicts, ext_lookup = get_pedigree_for_project(
+        project=project, seq_type=exome_or_genome
+    )
 
     # endpoint gives list of tuples e.g. [['A1234567_proband', 'CPGABCDE']]
     # parser returns a dictionary, arbitrary # sample IDs per participant

diff --git a/helpers/minimise_output_for_seqr.py → reanalysis/minimise_output_for_seqr.py b/helpers/minimise_output_for_seqr.py → reanalysis/minimise_output_for_seqr.py
@@ -9,12 +9,13 @@
     - Individual ID
         - Variant ID
             - Categories (list)
-            - Labels (list)
             - Support Variants (list)
-            - Independent (bool)
+
+Also produce a second version of the same, limited to phenotype-matches
 """
 
 import json
+import logging
 from argparse import ArgumentParser
 
 from reanalysis.models import MiniForSeqr, MiniVariant, ResultData
@@ -33,7 +34,9 @@ def coord_to_string(coord: dict) -> str:
     return f"{coord['chrom']}-{coord['pos']}-{coord['ref']}-{coord['alt']}"
 
 
-def main(input_file: str, output: str, ext_map: str | None = None):
+def main(
+    input_file: str, output: str, ext_map: str | None = None, pheno_match: bool = False
+):
     """
     reads in the input file, shrinks it, and writes the output file
 
@@ -42,6 +45,7 @@ def main(input_file: str, output: str, ext_map: str | None = None):
         input_file (str):
         output (str):
         ext_map (str): optional mapping of internal to external IDs for seqr
+        pheno_match (bool): whether to limit to phenotype-matching variants
     """
 
     with open(input_file, encoding='utf-8') as f:
@@ -65,17 +69,29 @@ def main(input_file: str, output: str, ext_map: str | None = None):
         lil_data.results[individual] = {}
         for variant in details.variants:
             var_data = variant.var_data
+            if pheno_match and not variant.panels.matched:
+                continue
             lil_data.results[individual][var_data.info['seqr_link']] = MiniVariant(
                 **{
                     'categories': variant.categories,
-                    'support_vars': variant.support_vars
-                    # 'independent': variant.independent,
+                    'support_vars': variant.support_vars,
                 }
             )
 
+    if pheno_match:
+        additional_string = 'phenotype-matched'
+        output = output.replace('.json', '_pheno.json')
+    else:
+        additional_string = ''
+
+    if not any(lil_data.results.values()):
+        logging.info(f'No {additional_string} results found')
+        return
     with open(output, 'w', encoding='utf-8') as f:
         f.write(MiniForSeqr.model_validate(lil_data).model_dump_json(indent=4))
 
+    logging.info(f'Wrote {additional_string} output to {output}')
+
 
 if __name__ == '__main__':
     parser = ArgumentParser()
@@ -86,8 +102,13 @@ def main(input_file: str, output: str, ext_map: str | None = None):
         help='mapping of internal to external IDs for seqr',
         default=None,
         type=str,
-        required=False,
     )
     args = parser.parse_args()
 
     main(input_file=args.input_file, output=args.output_file, ext_map=args.external_map)
+    main(
+        input_file=args.input_file,
+        output=args.output_file,
+        ext_map=args.external_map,
+        pheno_match=True,
+    )
diff --git a/reanalysis/reanalysis_global.toml b/reanalysis/reanalysis_global.toml
@@ -74,7 +74,7 @@ default_memory = 'highmem'
 
 [images]
 gatk = 'australia-southeast1-docker.pkg.dev/cpg-common/images/gatk:4.2.6.1'
-aip = 'australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.0.4'
+aip = 'australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.1.0'
 vep_110 = "australia-southeast1-docker.pkg.dev/cpg-common/images/vep_110:release_110.1"
 cpg_workflows = "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_workflows:latest"
 

diff --git a/reanalysis/version.py b/reanalysis/version.py
@@ -3,4 +3,4 @@
 """
 
 # Do not edit this file manually
-__version__ = '3.0.4'
+__version__ = '3.1.0'
diff --git a/setup.py b/setup.py
@@ -31,7 +31,7 @@ def read_reqs(filename: str) -> list[str]:
     name='automated-interpretation-pipeline',
     description='CPG Variant Prioritisation',
     long_description=readme,
-    version='3.0.4',
+    version='3.1.0',
     author='Matthew Welland, CPG',
     author_email=(
         '[email protected], '
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,7 @@ permissions: @@
       contents: read
     env:
-      VERSION: 3.0.4
+      VERSION: 3.1.0
     jobs:
       docker:
@@ Expand Down @@