diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8c1440f2..64c2ce02 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.0.4 +current_version = 3.1.0 commit = True tag = False diff --git a/.github/workflows/clinvar_runner.yaml b/.github/workflows/clinvar_runner.yaml index 5dcaae74..7e8d0209 100644 --- a/.github/workflows/clinvar_runner.yaml +++ b/.github/workflows/clinvar_runner.yaml @@ -32,5 +32,5 @@ jobs: curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer $TOKEN" \ -H "Content-Type:application/json" \ - -d '{"output": "generate_clinvar_${{ steps.date.outputs.date }}", "dataset": "talos", "accessLevel": "full", "repo": "automated-interpretation-pipeline", "commit": "${{ github.sha }}", "cwd": "reanalysis", "script": ["./clinvar_runner.py"], "description": "Generate Latest Clinvar Summaries", "image": "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.0.4", "config": {"workflow": {"sequencing_type": "genome"}, "cohorts": {"talos": {"clinvar_filter": ["victorian clinical genetics services, murdoch childrens research institute"]}}}, "wait": false}' \ + -d '{"output": "generate_clinvar_${{ steps.date.outputs.date }}", "dataset": "talos", "accessLevel": "full", "repo": "automated-interpretation-pipeline", "commit": "${{ github.sha }}", "cwd": "reanalysis", "script": ["./clinvar_runner.py"], "description": "Generate Latest Clinvar Summaries", "image": "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.1.0", "config": {"workflow": {"sequencing_type": "genome"}, "cohorts": {"talos": {"clinvar_filter": ["victorian clinical genetics services, murdoch childrens research institute"]}}}, "wait": false}' \ https://server-a2pko7ameq-ts.a.run.app diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index efcfa36c..c61f5ff6 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -14,7 +14,7 @@ permissions: contents: read env: - VERSION: 3.0.4 + VERSION: 3.1.0 jobs: docker: diff --git a/helpers/prepare_aip_cohort.py b/helpers/prepare_aip_cohort.py index 3ae589b8..9d671a00 100644 --- a/helpers/prepare_aip_cohort.py +++ b/helpers/prepare_aip_cohort.py @@ -31,10 +31,10 @@ PED_QUERY = gql( """ - query PedAndSGs($project: String!) { + query PedAndSGs($project: String!, $type: String!) { project(name: $project) { pedigree - sequencingGroups(activeOnly: {eq: true}) { + sequencingGroups(type: {eq: $type}, activeOnly: {eq: true}) { id sample { participant { @@ -218,7 +218,7 @@ def process_pedigree( def get_pedigree_for_project( - project: str, + project: str, seq_type: str ) -> tuple[list[dict[str, str]], dict[str, str]]: """ fetches the project pedigree from sample-metadata @@ -226,11 +226,12 @@ def get_pedigree_for_project( Args: project (str): project/dataset to use in query + seq_type (str): exome or genome Returns: All API returned content """ - response = query(PED_QUERY, variables={'project': project}) + response = query(PED_QUERY, variables={'project': project, 'type': seq_type}) pedigree = response['project']['pedigree'] lookup = { sg['sample']['participant']['externalId']: [sg['id']] @@ -281,7 +282,9 @@ def main( # get the list of all pedigree members as list of dictionaries logging.info('Pulling all pedigree members') - pedigree_dicts, ext_lookup = get_pedigree_for_project(project=project) + pedigree_dicts, ext_lookup = get_pedigree_for_project( + project=project, seq_type=exome_or_genome + ) # endpoint gives list of tuples e.g. [['A1234567_proband', 'CPGABCDE']] # parser returns a dictionary, arbitrary # sample IDs per participant diff --git a/helpers/minimise_output_for_seqr.py b/reanalysis/minimise_output_for_seqr.py similarity index 73% rename from helpers/minimise_output_for_seqr.py rename to reanalysis/minimise_output_for_seqr.py index 16ebe695..879d8e0b 100644 --- a/helpers/minimise_output_for_seqr.py +++ b/reanalysis/minimise_output_for_seqr.py @@ -9,12 +9,13 @@ - Individual ID - Variant ID - Categories (list) - - Labels (list) - Support Variants (list) - - Independent (bool) + +Also produce a second version of the same, limited to phenotype-matches """ import json +import logging from argparse import ArgumentParser from reanalysis.models import MiniForSeqr, MiniVariant, ResultData @@ -33,7 +34,9 @@ def coord_to_string(coord: dict) -> str: return f"{coord['chrom']}-{coord['pos']}-{coord['ref']}-{coord['alt']}" -def main(input_file: str, output: str, ext_map: str | None = None): +def main( + input_file: str, output: str, ext_map: str | None = None, pheno_match: bool = False +): """ reads in the input file, shrinks it, and writes the output file @@ -42,6 +45,7 @@ def main(input_file: str, output: str, ext_map: str | None = None): input_file (str): output (str): ext_map (str): optional mapping of internal to external IDs for seqr + pheno_match (bool): whether to limit to phenotype-matching variants """ with open(input_file, encoding='utf-8') as f: @@ -65,17 +69,29 @@ def main(input_file: str, output: str, ext_map: str | None = None): lil_data.results[individual] = {} for variant in details.variants: var_data = variant.var_data + if pheno_match and not variant.panels.matched: + continue lil_data.results[individual][var_data.info['seqr_link']] = MiniVariant( **{ 'categories': variant.categories, - 'support_vars': variant.support_vars - # 'independent': variant.independent, + 'support_vars': variant.support_vars, } ) + if pheno_match: + additional_string = 'phenotype-matched' + output = output.replace('.json', '_pheno.json') + else: + additional_string = '' + + if not any(lil_data.results.values()): + logging.info(f'No {additional_string} results found') + return with open(output, 'w', encoding='utf-8') as f: f.write(MiniForSeqr.model_validate(lil_data).model_dump_json(indent=4)) + logging.info(f'Wrote {additional_string} output to {output}') + if __name__ == '__main__': parser = ArgumentParser() @@ -86,8 +102,13 @@ def main(input_file: str, output: str, ext_map: str | None = None): help='mapping of internal to external IDs for seqr', default=None, type=str, - required=False, ) args = parser.parse_args() main(input_file=args.input_file, output=args.output_file, ext_map=args.external_map) + main( + input_file=args.input_file, + output=args.output_file, + ext_map=args.external_map, + pheno_match=True, + ) diff --git a/reanalysis/reanalysis_global.toml b/reanalysis/reanalysis_global.toml index 07704ee2..b4345675 100644 --- a/reanalysis/reanalysis_global.toml +++ b/reanalysis/reanalysis_global.toml @@ -74,7 +74,7 @@ default_memory = 'highmem' [images] gatk = 'australia-southeast1-docker.pkg.dev/cpg-common/images/gatk:4.2.6.1' -aip = 'australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.0.4' +aip = 'australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_aip:3.1.0' vep_110 = "australia-southeast1-docker.pkg.dev/cpg-common/images/vep_110:release_110.1" cpg_workflows = "australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_workflows:latest" diff --git a/reanalysis/version.py b/reanalysis/version.py index 25709212..ba583cd5 100644 --- a/reanalysis/version.py +++ b/reanalysis/version.py @@ -3,4 +3,4 @@ """ # Do not edit this file manually -__version__ = '3.0.4' +__version__ = '3.1.0' diff --git a/setup.py b/setup.py index 0c46b3e7..2891270c 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ def read_reqs(filename: str) -> list[str]: name='automated-interpretation-pipeline', description='CPG Variant Prioritisation', long_description=readme, - version='3.0.4', + version='3.1.0', author='Matthew Welland, CPG', author_email=( 'matthew.welland@populationgenomics.org.au, '