Skip to content

Commit

Permalink
Seqr minimiser produces a second pheno-matched JSON (#363)
Browse files Browse the repository at this point in the history
* seqr minimiser produces a second pheno-matched JSON

* Bump version: 3.0.4 → 3.1.0
  • Loading branch information
MattWellie authored Mar 4, 2024
1 parent eae8bdb commit 074febd
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 3.0.4
current_version = 3.1.0
commit = True
tag = False

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/clinvar_runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ jobs:
curl --fail --silent --show-error -X POST \
-H "Authorization: Bearer $TOKEN" \
-H "Content-Type:application/json" \
-d '{"output": "generate_clinvar_${{ steps.date.outputs.date }}", "dataset": "talos", "accessLevel": "full", "repo": "automated-interpretation-pipeline", "commit": "${{ github.sha }}", "cwd": "reanalysis", "script": ["./clinvar_runner.py"], "description": "Generate Latest Clinvar Summaries", "image": "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.0.4", "config": {"workflow": {"sequencing_type": "genome"}, "cohorts": {"talos": {"clinvar_filter": ["victorian clinical genetics services, murdoch childrens research institute"]}}}, "wait": false}' \
-d '{"output": "generate_clinvar_${{ steps.date.outputs.date }}", "dataset": "talos", "accessLevel": "full", "repo": "automated-interpretation-pipeline", "commit": "${{ github.sha }}", "cwd": "reanalysis", "script": ["./clinvar_runner.py"], "description": "Generate Latest Clinvar Summaries", "image": "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.1.0", "config": {"workflow": {"sequencing_type": "genome"}, "cohorts": {"talos": {"clinvar_filter": ["victorian clinical genetics services, murdoch childrens research institute"]}}}, "wait": false}' \
https://server-a2pko7ameq-ts.a.run.app
2 changes: 1 addition & 1 deletion .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ permissions:
contents: read

env:
VERSION: 3.0.4
VERSION: 3.1.0

jobs:
docker:
Expand Down
13 changes: 8 additions & 5 deletions helpers/prepare_aip_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@

PED_QUERY = gql(
"""
query PedAndSGs($project: String!) {
query PedAndSGs($project: String!, $type: String!) {
project(name: $project) {
pedigree
sequencingGroups(activeOnly: {eq: true}) {
sequencingGroups(type: {eq: $type}, activeOnly: {eq: true}) {
id
sample {
participant {
Expand Down Expand Up @@ -218,19 +218,20 @@ def process_pedigree(


def get_pedigree_for_project(
project: str,
project: str, seq_type: str
) -> tuple[list[dict[str, str]], dict[str, str]]:
"""
fetches the project pedigree from sample-metadata
list, one dict per participant
Args:
project (str): project/dataset to use in query
seq_type (str): exome or genome
Returns:
All API returned content
"""
response = query(PED_QUERY, variables={'project': project})
response = query(PED_QUERY, variables={'project': project, 'type': seq_type})
pedigree = response['project']['pedigree']
lookup = {
sg['sample']['participant']['externalId']: [sg['id']]
Expand Down Expand Up @@ -281,7 +282,9 @@ def main(

# get the list of all pedigree members as list of dictionaries
logging.info('Pulling all pedigree members')
pedigree_dicts, ext_lookup = get_pedigree_for_project(project=project)
pedigree_dicts, ext_lookup = get_pedigree_for_project(
project=project, seq_type=exome_or_genome
)

# endpoint gives list of tuples e.g. [['A1234567_proband', 'CPGABCDE']]
# parser returns a dictionary, arbitrary # sample IDs per participant
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
- Individual ID
- Variant ID
- Categories (list)
- Labels (list)
- Support Variants (list)
- Independent (bool)
Also produce a second version of the same, limited to phenotype-matches
"""

import json
import logging
from argparse import ArgumentParser

from reanalysis.models import MiniForSeqr, MiniVariant, ResultData
Expand All @@ -33,7 +34,9 @@ def coord_to_string(coord: dict) -> str:
return f"{coord['chrom']}-{coord['pos']}-{coord['ref']}-{coord['alt']}"


def main(input_file: str, output: str, ext_map: str | None = None):
def main(
input_file: str, output: str, ext_map: str | None = None, pheno_match: bool = False
):
"""
reads in the input file, shrinks it, and writes the output file
Expand All @@ -42,6 +45,7 @@ def main(input_file: str, output: str, ext_map: str | None = None):
input_file (str):
output (str):
ext_map (str): optional mapping of internal to external IDs for seqr
pheno_match (bool): whether to limit to phenotype-matching variants
"""

with open(input_file, encoding='utf-8') as f:
Expand All @@ -65,17 +69,29 @@ def main(input_file: str, output: str, ext_map: str | None = None):
lil_data.results[individual] = {}
for variant in details.variants:
var_data = variant.var_data
if pheno_match and not variant.panels.matched:
continue
lil_data.results[individual][var_data.info['seqr_link']] = MiniVariant(
**{
'categories': variant.categories,
'support_vars': variant.support_vars
# 'independent': variant.independent,
'support_vars': variant.support_vars,
}
)

if pheno_match:
additional_string = 'phenotype-matched'
output = output.replace('.json', '_pheno.json')
else:
additional_string = ''

if not any(lil_data.results.values()):
logging.info(f'No {additional_string} results found')
return
with open(output, 'w', encoding='utf-8') as f:
f.write(MiniForSeqr.model_validate(lil_data).model_dump_json(indent=4))

logging.info(f'Wrote {additional_string} output to {output}')


if __name__ == '__main__':
parser = ArgumentParser()
Expand All @@ -86,8 +102,13 @@ def main(input_file: str, output: str, ext_map: str | None = None):
help='mapping of internal to external IDs for seqr',
default=None,
type=str,
required=False,
)
args = parser.parse_args()

main(input_file=args.input_file, output=args.output_file, ext_map=args.external_map)
main(
input_file=args.input_file,
output=args.output_file,
ext_map=args.external_map,
pheno_match=True,
)
2 changes: 1 addition & 1 deletion reanalysis/reanalysis_global.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ default_memory = 'highmem'

[images]
gatk = 'australia-southeast1-docker.pkg.dev/cpg-common/images/gatk:4.2.6.1'
aip = 'australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.0.4'
aip = 'australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.1.0'
vep_110 = "australia-southeast1-docker.pkg.dev/cpg-common/images/vep_110:release_110.1"
cpg_workflows = "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_workflows:latest"

Expand Down
2 changes: 1 addition & 1 deletion reanalysis/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
"""

# Do not edit this file manually
__version__ = '3.0.4'
__version__ = '3.1.0'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def read_reqs(filename: str) -> list[str]:
name='automated-interpretation-pipeline',
description='CPG Variant Prioritisation',
long_description=readme,
version='3.0.4',
version='3.1.0',
author='Matthew Welland, CPG',
author_email=(
'[email protected], '
Expand Down

0 comments on commit 074febd

Please sign in to comment.