-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpas_gatk4.2.4.1_mutect2_pon_workflow.cwl
340 lines (325 loc) · 10.3 KB
/
gpas_gatk4.2.4.1_mutect2_pon_workflow.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
requirements:
- class: InlineJavascriptRequirement
- class: StepInputExpressionRequirement
- class: MultipleInputFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
inputs:
###OPTIONS###
has_normal: int[]
pon_calling: int[]
###BIOCLIENT_INPUTS###
bioclient_config: File
tumor_gdc_id: string
tumor_index_gdc_id: string
reference_gdc_id: string
reference_faidx_gdc_id: string
reference_dict_gdc_id: string
upload_bucket: string
###GENERAL_INPUTS###
project_id: string?
experimental_strategy: string?
job_uuid:
type: string
doc: Job id. Served as a prefix for most outputs.
java_heap:
type: string
default: '32G'
doc: Java option flags for all the java cmd. GDC default is 32G.
chunk_java_heap:
type: string
default: '3G'
doc: Java option flag for multithreading Mutect2 only. GDC default is 3G.
nthreads: int
bam_output:
type: boolean
default: false
doc: If specified, assembled haplotypes wil be written to bam. Used for alignment artifacts filtration. GDC default is true.
f1r2_tar_gz:
type: boolean
default: false
doc: If specified, collect F1R2 counts and output files into tar.gz file. Used for Mutect2 filtration. GDC default is true.
usedecoy:
type: boolean
default: false
doc: If specified, it will include all the decoy sequences in the faidx. GDC default is false.
max_mnp_distance:
type: int
default: 0
###OPTIONAL_INPUTS###
active_probability_threshold:
type: float?
adaptive_pruning_initial_error_rate:
type: float?
af_of_alleles_not_in_resource:
type: float?
allow_non_unique_kmers_in_ref:
type: boolean?
assembly_region_padding:
type: int?
bam_writer_type:
type: string?
base_quality_score_threshold:
type: int?
callable_depth:
type: int?
disable_adaptive_pruning:
type: boolean?
disable_bam_index_caching:
type: boolean?
disable_sequence_dictionary_validation:
type: boolean?
disable_tool_default_annotations:
type: boolean?
dont_increase_kmer_sizes_for_cycles:
type: boolean?
dont_trim_active_regions:
type: boolean?
dont_use_soft_clipped_bases:
type: boolean?
downsampling_stride:
type: int?
emit_ref_confidence:
type: string?
enable_all_annotations:
type: boolean?
f1r2_max_depth:
type: int?
f1r2_median_mq:
type: int?
f1r2_min_bq:
type: int?
force_active:
type: boolean?
genotype_filtered_alleles:
type: boolean?
genotype_germline_sites:
type: boolean?
genotype_pon_sites:
type: boolean?
gvcf_lod_band:
type: float?
ignore_itr_artifacts:
type: boolean?
initial_tumor_lod:
type: float?
interval_merging_rule:
type: string?
kmer_size:
type: int?
max_assembly_region_size:
type: int?
max_num_haplotypes_in_population:
type: int?
max_population_af:
type: float?
max_prob_propagation_distance:
type: int?
max_reads_per_alignment_start:
type: int?
max_suspicious_reads_per_alignment_start:
type: int?
max_unpruned_variants:
type: int?
min_assembly_region_size:
type: int?
min_base_quality_score:
type: int?
min_dangling_branch_length:
type: int?
min_pruning:
type: int?
minimum_allele_fraction:
type: float?
mitochondria_mode:
type: boolean?
native_pair_hmm_threads:
type: int?
native_pair_hmm_use_double_precision:
type: boolean?
num_pruning_samples:
type: int?
pair_hmm_gap_continuation_penalty:
type: int?
pair_hmm_implementation:
type: string?
pcr_indel_model:
type: string?
pcr_indel_qual:
type: int?
pcr_snv_qual:
type: int?
pedigree:
type: File?
phred_scaled_global_read_mismapping_rate:
type: int?
pruning_lod_threshold:
type: float?
recover_all_dangling_branches:
type: boolean?
showHidden:
type: boolean?
sites_only_vcf_output:
type: boolean?
smith_waterman:
type: string?
tumor_lod_to_emit:
type: float?
outputs:
individual_pon_vcf_uuid:
type: string
outputSource: uuid_vcf/output
individual_pon_vcf_index_uuid:
type: string
outputSource: uuid_vcf_index/output
steps:
prepare_file_prefix:
run: ../utils-cwl/make_prefix.cwl
in:
has_normal: has_normal
project_id: project_id
job_id: job_uuid
experimental_strategy: experimental_strategy
out: [output_prefix]
preparation:
run: ../utils-cwl/subworkflow/preparation_workflow.cwl
in:
has_normal: has_normal
pon_calling: pon_calling
bioclient_config: bioclient_config
tumor_gdc_id: tumor_gdc_id
tumor_index_gdc_id: tumor_index_gdc_id
reference_fa_gdc_id: reference_gdc_id
reference_fai_gdc_id: reference_faidx_gdc_id
reference_dict_gdc_id: reference_dict_gdc_id
out: [tumor_with_index, normal_with_index, reference_with_index, reference_image, germline_resource_with_index, common_biallelic_variants_with_index, panel_of_normal_with_index]
faidx_to_bed:
run: ../utils-cwl/faidx_to_bed.cwl
in:
ref_fai:
source: preparation/reference_with_index
valueFrom: $(self.secondaryFiles[0])
usedecoy: usedecoy
out: [output_bed]
gatk4_pon:
run: ../tools/filter_mutect2/gatk4_multithread_mutect2.cwl
in:
nthreads: nthreads
java_heap: chunk_java_heap
input:
source: [preparation/tumor_with_index, preparation/normal_with_index]
valueFrom: $([self[0], self[1]])
output_prefix: prepare_file_prefix/output_prefix
reference: preparation/reference_with_index
intervals: faidx_to_bed/output_bed
bam_output: bam_output
f1r2_tar_gz: f1r2_tar_gz
active_probability_threshold: active_probability_threshold
adaptive_pruning_initial_error_rate: adaptive_pruning_initial_error_rate
af_of_alleles_not_in_resource: af_of_alleles_not_in_resource
allow_non_unique_kmers_in_ref: allow_non_unique_kmers_in_ref
assembly_region_padding: assembly_region_padding
bam_writer_type: bam_writer_type
base_quality_score_threshold: base_quality_score_threshold
callable_depth: callable_depth
disable_adaptive_pruning: disable_adaptive_pruning
disable_bam_index_caching: disable_bam_index_caching
disable_sequence_dictionary_validation: disable_sequence_dictionary_validation
disable_tool_default_annotations: disable_tool_default_annotations
dont_increase_kmer_sizes_for_cycles: dont_increase_kmer_sizes_for_cycles
dont_trim_active_regions: dont_trim_active_regions
dont_use_soft_clipped_bases: dont_use_soft_clipped_bases
downsampling_stride: downsampling_stride
emit_ref_confidence: emit_ref_confidence
enable_all_annotations: enable_all_annotations
f1r2_max_depth: f1r2_max_depth
f1r2_median_mq: f1r2_median_mq
f1r2_min_bq: f1r2_min_bq
force_active: force_active
genotype_filtered_alleles: genotype_filtered_alleles
genotype_germline_sites: genotype_germline_sites
genotype_pon_sites: genotype_pon_sites
germline_resource: preparation/germline_resource_with_index
gvcf_lod_band: gvcf_lod_band
ignore_itr_artifacts: ignore_itr_artifacts
initial_tumor_lod: initial_tumor_lod
interval_merging_rule: interval_merging_rule
kmer_size: kmer_size
max_assembly_region_size: max_assembly_region_size
max_mnp_distance: max_mnp_distance
max_num_haplotypes_in_population: max_num_haplotypes_in_population
max_population_af: max_population_af
max_prob_propagation_distance: max_prob_propagation_distance
max_reads_per_alignment_start: max_reads_per_alignment_start
max_suspicious_reads_per_alignment_start: max_suspicious_reads_per_alignment_start
max_unpruned_variants: max_unpruned_variants
min_assembly_region_size: min_assembly_region_size
min_base_quality_score: min_base_quality_score
min_dangling_branch_length: min_dangling_branch_length
min_pruning: min_pruning
minimum_allele_fraction: minimum_allele_fraction
mitochondria_mode: mitochondria_mode
native_pair_hmm_threads: native_pair_hmm_threads
native_pair_hmm_use_double_precision: native_pair_hmm_use_double_precision
num_pruning_samples: num_pruning_samples
pair_hmm_gap_continuation_penalty: pair_hmm_gap_continuation_penalty
pair_hmm_implementation: pair_hmm_implementation
panel_of_normals: preparation/panel_of_normal_with_index
pcr_indel_model: pcr_indel_model
pcr_indel_qual: pcr_indel_qual
pcr_snv_qual: pcr_snv_qual
pedigree: pedigree
phred_scaled_global_read_mismapping_rate: phred_scaled_global_read_mismapping_rate
pruning_lod_threshold: pruning_lod_threshold
recover_all_dangling_branches: recover_all_dangling_branches
showHidden: showHidden
sites_only_vcf_output: sites_only_vcf_output
smith_waterman: smith_waterman
tumor_lod_to_emit: tumor_lod_to_emit
out: [vcfs, reassembly, f1r2s, stats]
merge_vcfs:
run: ../tools/filter_mutect2/merge_vcf.cwl
in:
java_heap: java_heap
output_prefix: prepare_file_prefix/output_prefix
vcfs: gatk4_pon/vcfs
out: [mutect2_unfiltered_vcf]
upload_vcf:
run: ../utils-cwl/bio_client/bio_client_upload_pull_uuid.cwl
in:
config_file: bioclient_config
upload_bucket: upload_bucket
upload_key:
source: [job_uuid, merge_vcfs/mutect2_unfiltered_vcf]
valueFrom: $(self[0])/$(self[1].basename)
local_file: merge_vcfs/mutect2_unfiltered_vcf
out: [output]
upload_vcf_index:
run: ../utils-cwl/bio_client/bio_client_upload_pull_uuid.cwl
in:
config_file: bioclient_config
upload_bucket: upload_bucket
upload_key:
source: [job_uuid, merge_vcfs/mutect2_unfiltered_vcf]
valueFrom: $(self[0])/$(self[1].secondaryFiles[0].basename)
local_file:
source: merge_vcfs/mutect2_unfiltered_vcf
valueFrom: $(self.secondaryFiles[0])
out: [output]
uuid_vcf:
run: ../utils-cwl/emit_json_value.cwl
in:
input: upload_vcf/output
key:
valueFrom: 'did'
out: [output]
uuid_vcf_index:
run: ../utils-cwl/emit_json_value.cwl
in:
input: upload_vcf_index/output
key:
valueFrom: 'did'
out: [output]