From 96348a50440803137611d514f4bf2bf24e153670 Mon Sep 17 00:00:00 2001 From: Shenglai Li Date: Tue, 5 Apr 2022 21:24:00 -0500 Subject: [PATCH] BINF-494: add individual pon workflow (#19) * BINF-494: add individual pon workflow * BINF-494: fix workflow * BINF-494: update base image * BINF-494: add option to skip download some reference for pon * BINF-494: fix prefix * BINF-494: fix download when skipping * BINF-494: fix workflow input source --- example/gatk4.2.4.1_mutect2_tumor_only.json | 18 +- gatk4-mutect2-calling-cwl/Dockerfile | 2 +- .../gpas_gatk4.2.4.1_mutect2_workflow.cwl | 2 + gatk4-mutect2-tumor-only-cwl/Dockerfile | 2 +- ...atk4.2.4.1_mutect2_tumor_only_workflow.cwl | 2 + gatk4-pon-cwl/Dockerfile | 17 + gatk4-pon-cwl/Makefile | 72 ++++ .../gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl | 340 ++++++++++++++++++ .../subworkflow/preparation_workflow.cwl | 101 ++++-- utils-cwl/subworkflow/stage_workflow.cwl | 55 ++- 10 files changed, 562 insertions(+), 49 deletions(-) create mode 100644 gatk4-pon-cwl/Dockerfile create mode 100644 gatk4-pon-cwl/Makefile create mode 100644 gatk4-pon-cwl/gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl diff --git a/example/gatk4.2.4.1_mutect2_tumor_only.json b/example/gatk4.2.4.1_mutect2_tumor_only.json index 4fbedda..0dc9c4b 100644 --- a/example/gatk4.2.4.1_mutect2_tumor_only.json +++ b/example/gatk4.2.4.1_mutect2_tumor_only.json @@ -1,18 +1,14 @@ { "has_normal": [], - "output_prefix": "0708_test_full", + "output_prefix": "pon_test_full", "java_heap": "30G", "chunk_java_heap": "8G", "nthreads": 4, - "reference": {"class": "File", "path": "/mnt/SCRATCH/reference/GRCh38.d1.vd1.fa"}, - "reference_image": {"class": "File", "path": "/mnt/SCRATCH/reference/GRCh38.d1.vd1.fa.img"}, - "common_variant_reference": {"class": "File", "path": "/mnt/SCRATCH/reference/gnomad.genomes.r2.1.1.sites.liftover_grch38.pass.no_decoy.af_only.biallelicvcf.gz"}, + "reference": {"class": "File", "path": "/mnt/SCRATCH/reference/hwf/GRCh38.d1.vd1.fa"}, "intervals": {"class": "File", "path": "/mnt/SCRATCH/reference/intervals.bed"}, - "tumor_bam": {"class": "File", "path": "/mnt/SCRATCH/playground/C500.TCGA-HQ-A5ND-01A-11D-A26M-08.4_gdc_realn.bam"}, - "f1r2_tar_gz": true, - "bam_output": true, - "genotype_pon_sites": true, - "germline_resource": {"class": "File", "path": "/mnt/SCRATCH/reference/gnomad.genomes.r2.1.1.sites.liftover_grch38.pass.no_decoy.af_only.vcf.gz"}, - "panel_of_normals": {"class": "File", "path": "/mnt/SCRATCH/reference/gatk4.1.2.tcga.4136.pon.vcf.gz"}, - "call_on_all": true + "input": [{"class": "File", "path": "/mnt/SCRATCH/playground/bam_files/C500.TCGA-HQ-A5ND-01A-11D-A26M-08.4_gdc_realn.bam"}], + "f1r2_tar_gz": false, + "bam_output": false, + "call_on_all": false, + "max_mnp_distance": 0 } \ No newline at end of file diff --git a/gatk4-mutect2-calling-cwl/Dockerfile b/gatk4-mutect2-calling-cwl/Dockerfile index 093b1e6..79ca5ba 100644 --- a/gatk4-mutect2-calling-cwl/Dockerfile +++ b/gatk4-mutect2-calling-cwl/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/ncigdc/bio-alpine:py36 +FROM quay.io/ncigdc/bio-alpine:py36-8dee591 ARG WORKFLOW diff --git a/gatk4-mutect2-calling-cwl/gpas_gatk4.2.4.1_mutect2_workflow.cwl b/gatk4-mutect2-calling-cwl/gpas_gatk4.2.4.1_mutect2_workflow.cwl index e73afed..c91f765 100644 --- a/gatk4-mutect2-calling-cwl/gpas_gatk4.2.4.1_mutect2_workflow.cwl +++ b/gatk4-mutect2-calling-cwl/gpas_gatk4.2.4.1_mutect2_workflow.cwl @@ -14,6 +14,7 @@ requirements: inputs: ###CONDITIONAL_INPUTS### has_normal: int[] + pon_calling: int[] ###BIOCLIENT_INPUTS### bioclient_config: File tumor_gdc_id: string @@ -207,6 +208,7 @@ steps: preparation: run: ../utils-cwl/subworkflow/preparation_workflow.cwl in: + pon_calling: pon_calling bioclient_config: bioclient_config has_normal: has_normal tumor_gdc_id: tumor_gdc_id diff --git a/gatk4-mutect2-tumor-only-cwl/Dockerfile b/gatk4-mutect2-tumor-only-cwl/Dockerfile index 093b1e6..79ca5ba 100644 --- a/gatk4-mutect2-tumor-only-cwl/Dockerfile +++ b/gatk4-mutect2-tumor-only-cwl/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/ncigdc/bio-alpine:py36 +FROM quay.io/ncigdc/bio-alpine:py36-8dee591 ARG WORKFLOW diff --git a/gatk4-mutect2-tumor-only-cwl/gpas_gatk4.2.4.1_mutect2_tumor_only_workflow.cwl b/gatk4-mutect2-tumor-only-cwl/gpas_gatk4.2.4.1_mutect2_tumor_only_workflow.cwl index 898f411..55168fa 100644 --- a/gatk4-mutect2-tumor-only-cwl/gpas_gatk4.2.4.1_mutect2_tumor_only_workflow.cwl +++ b/gatk4-mutect2-tumor-only-cwl/gpas_gatk4.2.4.1_mutect2_tumor_only_workflow.cwl @@ -14,6 +14,7 @@ requirements: inputs: ###CONDITIONAL_INPUTS### has_normal: int[] + pon_calling: int[] ###BIOCLIENT_INPUTS### bioclient_config: File tumor_gdc_id: string @@ -207,6 +208,7 @@ steps: preparation: run: ../utils-cwl/subworkflow/preparation_workflow.cwl in: + pon_calling: pon_calling bioclient_config: bioclient_config has_normal: has_normal tumor_gdc_id: tumor_gdc_id diff --git a/gatk4-pon-cwl/Dockerfile b/gatk4-pon-cwl/Dockerfile new file mode 100644 index 0000000..79ca5ba --- /dev/null +++ b/gatk4-pon-cwl/Dockerfile @@ -0,0 +1,17 @@ +FROM quay.io/ncigdc/bio-alpine:py36-8dee591 + +ARG WORKFLOW + +COPY ./${WORKFLOW} /opt +# TODO: Copy additional directories as needed +COPY ./tools /tools +COPY ./utils-cwl /utils-cwl +COPY ./subworkflows /subworkflows + +RUN make -C /opt init-pip + +WORKDIR /opt + +ENTRYPOINT ["make"] + +CMD ["pack"] \ No newline at end of file diff --git a/gatk4-pon-cwl/Makefile b/gatk4-pon-cwl/Makefile new file mode 100644 index 0000000..4ada277 --- /dev/null +++ b/gatk4-pon-cwl/Makefile @@ -0,0 +1,72 @@ +# quay repo not git repo +REPO = gatk4-pon-cwl +# TODO: Replace me +ENTRY_CWL = "gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl" + +VERSION := $(shell date -u +"%Y%m%dT%H%MZ") +COMMIT_HASH:=$(shell git rev-parse HEAD) + +DOCKER_REPO := quay.io/ncigdc +DOCKER_IMAGE_COMMIT := ${DOCKER_REPO}/${REPO}:${COMMIT_HASH} +DOCKER_IMAGE := ${DOCKER_REPO}/${REPO}:${VERSION} + +.PHONY: version version-* name +name: + @echo ${WORKFLOW_NAME} + +version: + @echo --- VERSION: ${VERSION} --- + +version-docker: + @echo ${DOCKER_IMAGE_COMMIT} + @echo ${DOCKER_IMAGE} + +.PHONY: docker-* +docker-login: + @echo + docker login -u="${QUAY_USERNAME}" -p="${QUAY_PASSWORD}" quay.io + + +.PHONY: build build-* clean init init-* lint requirements run version +init: init-pip + +init-pip: + @echo + @echo -- Installing pip packages -- + pip3 install --no-cache-dir cwltool==1.0.20180306163216 + +.PHONY: pack pack-% +pack: + @python -m cwltool --pack "${ENTRY_CWL}" + +run: + @docker run --rm ${DOCKER_IMAGE_COMMIT} pack ENTRY_CWL=/opt/${ENTRY_CWL} + +.PHONY: validate validate-* +validate: + @python -m cwltool --validate "${ENTRY_CWL}" + +validate-docker: + @docker run --rm ${DOCKER_IMAGE_COMMIT} validate ENTRY_CWL=/opt/${ENTRY_CWL} + +.PHONY: build build-* +build: build-docker + +build-docker: + @echo + @echo -- Building docker -- + docker build .. \ + --file ./Dockerfile \ + --build-arg WORKFLOW=${WORKFLOW_NAME} \ + -t "${DOCKER_IMAGE_COMMIT}" + + +.PHONY: publish publish-release +publish: + docker push ${DOCKER_IMAGE_COMMIT} + +publish-staging: publish + +publish-release: publish + docker tag ${DOCKER_IMAGE_COMMIT} ${DOCKER_IMAGE} + docker push ${DOCKER_IMAGE} \ No newline at end of file diff --git a/gatk4-pon-cwl/gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl b/gatk4-pon-cwl/gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl new file mode 100644 index 0000000..29899ce --- /dev/null +++ b/gatk4-pon-cwl/gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl @@ -0,0 +1,340 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 + +class: Workflow + +requirements: + - class: InlineJavascriptRequirement + - class: StepInputExpressionRequirement + - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement + +inputs: +###OPTIONS### + has_normal: int[] + pon_calling: int[] +###BIOCLIENT_INPUTS### + bioclient_config: File + tumor_gdc_id: string + tumor_index_gdc_id: string + reference_gdc_id: string + reference_faidx_gdc_id: string + reference_dict_gdc_id: string + upload_bucket: string +###GENERAL_INPUTS### + project_id: string? + experimental_strategy: string? + job_uuid: + type: string + doc: Job id. Served as a prefix for most outputs. + java_heap: + type: string + default: '32G' + doc: Java option flags for all the java cmd. GDC default is 32G. + chunk_java_heap: + type: string + default: '3G' + doc: Java option flag for multithreading Mutect2 only. GDC default is 3G. + nthreads: int + bam_output: + type: boolean + default: false + doc: If specified, assembled haplotypes wil be written to bam. Used for alignment artifacts filtration. GDC default is true. + f1r2_tar_gz: + type: boolean + default: false + doc: If specified, collect F1R2 counts and output files into tar.gz file. Used for Mutect2 filtration. GDC default is true. + usedecoy: + type: boolean + default: false + doc: If specified, it will include all the decoy sequences in the faidx. GDC default is false. + max_mnp_distance: + type: int + default: 0 + +###OPTIONAL_INPUTS### + active_probability_threshold: + type: float? + adaptive_pruning_initial_error_rate: + type: float? + af_of_alleles_not_in_resource: + type: float? + allow_non_unique_kmers_in_ref: + type: boolean? + assembly_region_padding: + type: int? + bam_writer_type: + type: string? + base_quality_score_threshold: + type: int? + callable_depth: + type: int? + disable_adaptive_pruning: + type: boolean? + disable_bam_index_caching: + type: boolean? + disable_sequence_dictionary_validation: + type: boolean? + disable_tool_default_annotations: + type: boolean? + dont_increase_kmer_sizes_for_cycles: + type: boolean? + dont_trim_active_regions: + type: boolean? + dont_use_soft_clipped_bases: + type: boolean? + downsampling_stride: + type: int? + emit_ref_confidence: + type: string? + enable_all_annotations: + type: boolean? + f1r2_max_depth: + type: int? + f1r2_median_mq: + type: int? + f1r2_min_bq: + type: int? + force_active: + type: boolean? + genotype_filtered_alleles: + type: boolean? + genotype_germline_sites: + type: boolean? + genotype_pon_sites: + type: boolean? + gvcf_lod_band: + type: float? + ignore_itr_artifacts: + type: boolean? + initial_tumor_lod: + type: float? + interval_merging_rule: + type: string? + kmer_size: + type: int? + max_assembly_region_size: + type: int? + max_num_haplotypes_in_population: + type: int? + max_population_af: + type: float? + max_prob_propagation_distance: + type: int? + max_reads_per_alignment_start: + type: int? + max_suspicious_reads_per_alignment_start: + type: int? + max_unpruned_variants: + type: int? + min_assembly_region_size: + type: int? + min_base_quality_score: + type: int? + min_dangling_branch_length: + type: int? + min_pruning: + type: int? + minimum_allele_fraction: + type: float? + mitochondria_mode: + type: boolean? + native_pair_hmm_threads: + type: int? + native_pair_hmm_use_double_precision: + type: boolean? + num_pruning_samples: + type: int? + pair_hmm_gap_continuation_penalty: + type: int? + pair_hmm_implementation: + type: string? + pcr_indel_model: + type: string? + pcr_indel_qual: + type: int? + pcr_snv_qual: + type: int? + pedigree: + type: File? + phred_scaled_global_read_mismapping_rate: + type: int? + pruning_lod_threshold: + type: float? + recover_all_dangling_branches: + type: boolean? + showHidden: + type: boolean? + sites_only_vcf_output: + type: boolean? + smith_waterman: + type: string? + tumor_lod_to_emit: + type: float? + +outputs: + individual_pon_vcf_uuid: + type: string + outputSource: uuid_vcf/output + individual_pon_vcf_index_uuid: + type: string + outputSource: uuid_vcf_index/output + +steps: + prepare_file_prefix: + run: ../utils-cwl/make_prefix.cwl + in: + has_normal: has_normal + project_id: project_id + job_id: job_uuid + experimental_strategy: experimental_strategy + out: [output_prefix] + + preparation: + run: ../utils-cwl/subworkflow/preparation_workflow.cwl + in: + has_normal: has_normal + pon_calling: pon_calling + bioclient_config: bioclient_config + tumor_gdc_id: tumor_gdc_id + tumor_index_gdc_id: tumor_index_gdc_id + reference_fa_gdc_id: reference_gdc_id + reference_fai_gdc_id: reference_faidx_gdc_id + reference_dict_gdc_id: reference_dict_gdc_id + out: [tumor_with_index, normal_with_index, reference_with_index, reference_image, germline_resource_with_index, common_biallelic_variants_with_index, panel_of_normal_with_index] + + faidx_to_bed: + run: ../utils-cwl/faidx_to_bed.cwl + in: + ref_fai: + source: preparation/reference_with_index + valueFrom: $(self.secondaryFiles[0]) + usedecoy: usedecoy + out: [output_bed] + + gatk4_pon: + run: ../tools/filter_mutect2/gatk4_multithread_mutect2.cwl + in: + nthreads: nthreads + java_heap: chunk_java_heap + input: + source: [preparation/tumor_with_index, preparation/normal_with_index] + valueFrom: $([self[0], self[1]]) + output_prefix: prepare_file_prefix/output_prefix + reference: preparation/reference_with_index + intervals: faidx_to_bed/output_bed + bam_output: bam_output + f1r2_tar_gz: f1r2_tar_gz + active_probability_threshold: active_probability_threshold + adaptive_pruning_initial_error_rate: adaptive_pruning_initial_error_rate + af_of_alleles_not_in_resource: af_of_alleles_not_in_resource + allow_non_unique_kmers_in_ref: allow_non_unique_kmers_in_ref + assembly_region_padding: assembly_region_padding + bam_writer_type: bam_writer_type + base_quality_score_threshold: base_quality_score_threshold + callable_depth: callable_depth + disable_adaptive_pruning: disable_adaptive_pruning + disable_bam_index_caching: disable_bam_index_caching + disable_sequence_dictionary_validation: disable_sequence_dictionary_validation + disable_tool_default_annotations: disable_tool_default_annotations + dont_increase_kmer_sizes_for_cycles: dont_increase_kmer_sizes_for_cycles + dont_trim_active_regions: dont_trim_active_regions + dont_use_soft_clipped_bases: dont_use_soft_clipped_bases + downsampling_stride: downsampling_stride + emit_ref_confidence: emit_ref_confidence + enable_all_annotations: enable_all_annotations + f1r2_max_depth: f1r2_max_depth + f1r2_median_mq: f1r2_median_mq + f1r2_min_bq: f1r2_min_bq + force_active: force_active + genotype_filtered_alleles: genotype_filtered_alleles + genotype_germline_sites: genotype_germline_sites + genotype_pon_sites: genotype_pon_sites + germline_resource: preparation/germline_resource_with_index + gvcf_lod_band: gvcf_lod_band + ignore_itr_artifacts: ignore_itr_artifacts + initial_tumor_lod: initial_tumor_lod + interval_merging_rule: interval_merging_rule + kmer_size: kmer_size + max_assembly_region_size: max_assembly_region_size + max_mnp_distance: max_mnp_distance + max_num_haplotypes_in_population: max_num_haplotypes_in_population + max_population_af: max_population_af + max_prob_propagation_distance: max_prob_propagation_distance + max_reads_per_alignment_start: max_reads_per_alignment_start + max_suspicious_reads_per_alignment_start: max_suspicious_reads_per_alignment_start + max_unpruned_variants: max_unpruned_variants + min_assembly_region_size: min_assembly_region_size + min_base_quality_score: min_base_quality_score + min_dangling_branch_length: min_dangling_branch_length + min_pruning: min_pruning + minimum_allele_fraction: minimum_allele_fraction + mitochondria_mode: mitochondria_mode + native_pair_hmm_threads: native_pair_hmm_threads + native_pair_hmm_use_double_precision: native_pair_hmm_use_double_precision + num_pruning_samples: num_pruning_samples + pair_hmm_gap_continuation_penalty: pair_hmm_gap_continuation_penalty + pair_hmm_implementation: pair_hmm_implementation + panel_of_normals: preparation/panel_of_normal_with_index + pcr_indel_model: pcr_indel_model + pcr_indel_qual: pcr_indel_qual + pcr_snv_qual: pcr_snv_qual + pedigree: pedigree + phred_scaled_global_read_mismapping_rate: phred_scaled_global_read_mismapping_rate + pruning_lod_threshold: pruning_lod_threshold + recover_all_dangling_branches: recover_all_dangling_branches + showHidden: showHidden + sites_only_vcf_output: sites_only_vcf_output + smith_waterman: smith_waterman + tumor_lod_to_emit: tumor_lod_to_emit + out: [vcfs, reassembly, f1r2s, stats] + + merge_vcfs: + run: ../tools/filter_mutect2/merge_vcf.cwl + in: + java_heap: java_heap + output_prefix: prepare_file_prefix/output_prefix + vcfs: gatk4_pon/vcfs + out: [mutect2_unfiltered_vcf] + + upload_vcf: + run: ../utils-cwl/bio_client/bio_client_upload_pull_uuid.cwl + in: + config_file: bioclient_config + upload_bucket: upload_bucket + upload_key: + source: [job_uuid, merge_vcfs/mutect2_unfiltered_vcf] + valueFrom: $(self[0])/$(self[1].basename) + local_file: merge_vcfs/mutect2_unfiltered_vcf + out: [output] + + upload_vcf_index: + run: ../utils-cwl/bio_client/bio_client_upload_pull_uuid.cwl + in: + config_file: bioclient_config + upload_bucket: upload_bucket + upload_key: + source: [job_uuid, merge_vcfs/mutect2_unfiltered_vcf] + valueFrom: $(self[0])/$(self[1].secondaryFiles[0].basename) + local_file: + source: merge_vcfs/mutect2_unfiltered_vcf + valueFrom: $(self.secondaryFiles[0]) + out: [output] + + uuid_vcf: + run: ../utils-cwl/emit_json_value.cwl + in: + input: upload_vcf/output + key: + valueFrom: 'did' + out: [output] + + uuid_vcf_index: + run: ../utils-cwl/emit_json_value.cwl + in: + input: upload_vcf_index/output + key: + valueFrom: 'did' + out: [output] \ No newline at end of file diff --git a/utils-cwl/subworkflow/preparation_workflow.cwl b/utils-cwl/subworkflow/preparation_workflow.cwl index 5653205..83fd0ae 100644 --- a/utils-cwl/subworkflow/preparation_workflow.cwl +++ b/utils-cwl/subworkflow/preparation_workflow.cwl @@ -12,6 +12,8 @@ requirements: inputs: bioclient_config: type: File + pon_calling: + type: int[] has_normal: type: int[] tumor_gdc_id: @@ -29,41 +31,41 @@ inputs: reference_fai_gdc_id: type: string reference_image_gdc_id: - type: string + type: string? germline_resource_gdc_id: - type: string + type: string? germline_resource_index_gdc_id: - type: string + type: string? common_biallelic_variants_gdc_id: - type: string + type: string? common_biallelic_variants_index_gdc_id: - type: string + type: string? panel_of_normal_gdc_id: - type: string + type: string? panel_of_normal_index_gdc_id: - type: string + type: string? outputs: tumor_with_index: type: File outputSource: stage/tumor_with_index - normal_with_index: - type: File? - outputSource: stage/normal_with_index reference_with_index: type: File outputSource: stage/reference_with_index + normal_with_index: + type: File? + outputSource: stage/normal_with_index reference_image: - type: File - outputSource: reference_image_download/output + type: File? + outputSource: extract_reference_image/input_file germline_resource_with_index: - type: File + type: File? outputSource: stage/germline_ref_with_index common_biallelic_variants_with_index: - type: File + type: File? outputSource: stage/biallelic_ref_with_index panel_of_normal_with_index: - type: File + type: File? outputSource: stage/pon_with_index steps: @@ -134,56 +136,113 @@ steps: reference_image_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: reference_image_gdc_id out: [output] + extract_reference_image: + run: ../extract_from_conditional_array.cwl + in: + input_array: reference_image_download/output + out: [input_file] + germline_resource_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: germline_resource_gdc_id out: [output] germline_resource_index_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: germline_resource_index_gdc_id out: [output] + extract_germline_resource: + run: ../extract_from_conditional_array.cwl + in: + input_array: germline_resource_download/output + out: [input_file] + + extract_germline_resource_index: + run: ../extract_from_conditional_array.cwl + in: + input_array: germline_resource_index_download/output + out: [input_file] + common_biallelic_variants_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: common_biallelic_variants_gdc_id out: [output] common_biallelic_variants_index_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: common_biallelic_variants_index_gdc_id out: [output] + extract_common_biallelic: + run: ../extract_from_conditional_array.cwl + in: + input_array: common_biallelic_variants_download/output + out: [input_file] + + extract_common_biallelic_index: + run: ../extract_from_conditional_array.cwl + in: + input_array: common_biallelic_variants_index_download/output + out: [input_file] + panel_of_normal_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: panel_of_normal_gdc_id out: [output] panel_of_normal_index_download: run: ../bio_client/bio_client_download.cwl + scatter: pon_calling in: + pon_calling: pon_calling config_file: bioclient_config download_handle: panel_of_normal_index_gdc_id out: [output] + extract_panel_of_normal: + run: ../extract_from_conditional_array.cwl + in: + input_array: panel_of_normal_download/output + out: [input_file] + + extract_panel_of_normal_index: + run: ../extract_from_conditional_array.cwl + in: + input_array: panel_of_normal_index_download/output + out: [input_file] + stage: run: ./stage_workflow.cwl in: + pon_calling: pon_calling has_normal: has_normal tumor: tumor_download/output tumor_index: tumor_index_download/output @@ -192,10 +251,10 @@ steps: reference: reference_fa_download/output reference_fai: reference_fai_download/output reference_dict: reference_dict_download/output - germline_ref: germline_resource_download/output - germline_ref_index: germline_resource_index_download/output - biallelic_ref: common_biallelic_variants_download/output - biallelic_ref_index: common_biallelic_variants_index_download/output - pon: panel_of_normal_download/output - pon_index: panel_of_normal_index_download/output + germline_ref: extract_germline_resource/input_file + germline_ref_index: extract_germline_resource_index/input_file + biallelic_ref: extract_common_biallelic/input_file + biallelic_ref_index: extract_common_biallelic_index/input_file + pon: extract_panel_of_normal/input_file + pon_index: extract_panel_of_normal_index/input_file out: [tumor_with_index, normal_with_index, reference_with_index, germline_ref_with_index, biallelic_ref_with_index, pon_with_index] diff --git a/utils-cwl/subworkflow/stage_workflow.cwl b/utils-cwl/subworkflow/stage_workflow.cwl index cd541af..90f549e 100644 --- a/utils-cwl/subworkflow/stage_workflow.cwl +++ b/utils-cwl/subworkflow/stage_workflow.cwl @@ -11,6 +11,7 @@ requirements: class: Workflow inputs: + pon_calling: int[] has_normal: int[] tumor: File tumor_index: File @@ -19,32 +20,32 @@ inputs: reference: File reference_fai: File reference_dict: File - germline_ref: File - germline_ref_index: File - biallelic_ref: File - biallelic_ref_index: File - pon: File - pon_index: File + germline_ref: File? + germline_ref_index: File? + biallelic_ref: File? + biallelic_ref_index: File? + pon: File? + pon_index: File? outputs: tumor_with_index: type: File outputSource: make_tumor_bam/output - normal_with_index: - type: File? - outputSource: extract_normal/input_file reference_with_index: type: File outputSource: make_reference/output + normal_with_index: + type: File? + outputSource: extract_normal/input_file pon_with_index: - type: File - outputSource: make_pon/output + type: File? + outputSource: extract_pon/input_file biallelic_ref_with_index: - type: File - outputSource: make_biallelic_ref/output + type: File? + outputSource: extract_biallelic_ref/input_file germline_ref_with_index: - type: File - outputSource: make_germline_ref/output + type: File? + outputSource: extract_germline_ref/input_file steps: standardize_tumor_bai: @@ -112,27 +113,51 @@ steps: make_pon: run: ../make_secondary.cwl + scatter: pon_calling in: + pon_calling: pon_calling parent_file: pon children: source: pon_index valueFrom: $([self]) out: [ output ] + extract_pon: + run: ../extract_from_conditional_array.cwl + in: + input_array: make_pon/output + out: [input_file] + make_biallelic_ref: run: ../make_secondary.cwl + scatter: pon_calling in: + pon_calling: pon_calling parent_file: biallelic_ref children: source: biallelic_ref_index valueFrom: $([self]) out: [ output ] + extract_biallelic_ref: + run: ../extract_from_conditional_array.cwl + in: + input_array: make_biallelic_ref/output + out: [input_file] + make_germline_ref: run: ../make_secondary.cwl + scatter: pon_calling in: + pon_calling: pon_calling parent_file: germline_ref children: source: germline_ref_index valueFrom: $([self]) out: [ output ] + + extract_germline_ref: + run: ../extract_from_conditional_array.cwl + in: + input_array: make_germline_ref/output + out: [input_file] \ No newline at end of file