-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathpipeline-se.cwl
238 lines (238 loc) · 8.01 KB
/
pipeline-se.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/usr/bin/env cwl-runner
class: Workflow
cwlVersion: v1.0
doc: 'ATAC-seq pipeline - reads: SE'
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
inputs:
input_fastq_files:
type: File[]
genome_sizes_file:
doc: Genome sizes tab-delimited file (used in samtools)
type: File
default_adapters_file:
doc: Adapters file
type: File
genome_effective_size:
default: hs
doc: Effective genome size used by MACS2. It can be numeric or a shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8), Default:hs
type: string
genome_ref_first_index_file:
doc: '"First index file of Bowtie reference genome with extension 1.ebwt. \ (Note: the rest of the index files MUST be in the same folder)" '
type: File
secondaryFiles:
- ^^.2.ebwt
- ^^.3.ebwt
- ^^.4.ebwt
- ^^.rev.1.ebwt
- ^^.rev.2.ebwt
as_narrowPeak_file:
doc: Definition narrowPeak file in AutoSql format (used in bedToBigBed)
type: File
trimmomatic_jar_path:
doc: Trimmomatic Java jar file
type: string
trimmomatic_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
picard_jar_path:
doc: Picard Java jar file
type: string
picard_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
nthreads_qc:
doc: Number of threads required for the 01-qc step
type: int
nthreads_trimm:
doc: Number of threads required for the 02-trim step
type: int
nthreads_map:
doc: Number of threads required for the 03-map step
type: int
nthreads_peakcall:
doc: Number of threads required for the 04-peakcall step
type: int
nthreads_quant:
doc: Number of threads required for the 05-quantification step
type: int
steps:
qc:
run: 01-qc-se.cwl
in:
input_fastq_files: input_fastq_files
default_adapters_file: default_adapters_file
nthreads: nthreads_qc
out:
- output_count_raw_reads
- output_diff_counts
- output_fastqc_report_files
- output_fastqc_data_files
- output_custom_adapters
trimm:
run: 02-trim-se.cwl
in:
input_fastq_files: input_fastq_files
input_adapters_files: qc/output_custom_adapters
trimmomatic_jar_path: trimmomatic_jar_path
trimmomatic_java_opts: trimmomatic_java_opts
nthreads: nthreads_trimm
out:
- output_data_fastq_trimmed_files
- output_trimmed_fastq_read_count
map:
run: 03-map-se.cwl
in:
input_fastq_files: trimm/output_data_fastq_trimmed_files
genome_sizes_file: genome_sizes_file
genome_ref_first_index_file: genome_ref_first_index_file
picard_jar_path: picard_jar_path
picard_java_opts: picard_java_opts
nthreads: nthreads_map
out:
- output_data_sorted_dedup_bam_files
- output_data_sorted_dups_marked_bam_files
- output_picard_mark_duplicates_files
- output_pbc_files
- output_bowtie_log
- output_preseq_c_curve_files
- output_percentage_uniq_reads
- output_read_count_mapped
- output_percent_mitochondrial_reads
peak_call:
run: 04-peakcall-se.cwl
in:
input_bam_files: map/output_data_sorted_dedup_bam_files
input_bam_format:
valueFrom: BAM
genome_effective_size: genome_effective_size
input_genome_sizes: genome_sizes_file
as_narrowPeak_file: as_narrowPeak_file
nthreads: nthreads_peakcall
out:
- output_spp_x_cross_corr
- output_spp_cross_corr_plot
- output_read_in_peak_count_within_replicate
- output_peak_file
- output_peak_bigbed_file
- output_peak_summits_file
- output_extended_peak_file
- output_peak_xls_file
- output_filtered_read_count_file
- output_peak_count_within_replicate
quant:
run: 05-quantification.cwl
in:
input_bam_files: map/output_data_sorted_dedup_bam_files
input_genome_sizes: genome_sizes_file
nthreads: nthreads_quant
out:
- bigwig_raw_files
- bigwig_norm_files
outputs:
qc_fastqc_data_files:
doc: FastQC data files
type: File[]
outputSource: qc/output_fastqc_data_files
qc_fastqc_report_files:
doc: FastQC reports in zip format
type: File[]
outputSource: qc/output_fastqc_report_files
qc_count_raw_reads:
doc: Raw read counts of fastq files after QC
type: File[]
outputSource: qc/output_count_raw_reads
qc_diff_counts:
doc: Diff file between number of raw reads and number of reads counted by FASTQC,
type: File[]
outputSource: qc/output_diff_counts
trimm_fastq_files:
doc: FASTQ files after trimming
type: File[]
outputSource: trimm/output_data_fastq_trimmed_files
trimm_raw_counts:
doc: Raw read counts of fastq files after trimming
type: File[]
outputSource: trimm/output_trimmed_fastq_read_count
map_read_count_mapped:
doc: Read counts of the mapped BAM files
type: File[]
outputSource: map/output_read_count_mapped
map_bowtie_log_files:
doc: Bowtie log file with mapping stats
type: File[]
outputSource: map/output_bowtie_log
map_preseq_percentage_uniq_reads:
doc: Preseq percentage of uniq reads
type: File[]
outputSource: map/output_percentage_uniq_reads
map_pbc_files:
doc: PCR Bottleneck Coefficient files (used to flag samples when pbc<0.5)
type: File[]
outputSource: map/output_pbc_files
map_dedup_bam_files:
doc: Filtered BAM files (post-processing end point)
type: File[]
outputSource: map/output_data_sorted_dups_marked_bam_files
map_mark_duplicates_files:
doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions
type: File[]
outputSource: map/output_picard_mark_duplicates_files
map_preseq_c_curve_files:
doc: Preseq c_curve output files
type: File[]
outputSource: map/output_preseq_c_curve_files
map_percent_mitochondrial_reads:
doc: Percentage of mitochondrial reads
type: File[]
outputSource: map/output_percent_mitochondrial_reads
peakcall_peak_file:
doc: Peaks in ENCODE Peak file format
type: File[]
outputSource: peak_call/output_peak_file
peakcall_spp_x_cross_corr:
doc: SPP strand cross correlation summary
type: File[]
outputSource: peak_call/output_spp_x_cross_corr
peakcall_peak_xls_file:
doc: Peak calling report file
type: File[]
outputSource: peak_call/output_peak_xls_file
peakcall_peak_summits_file:
doc: Peaks summits in bedfile format
type: File[]
outputSource: peak_call/output_peak_summits_file
peakcall_peak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call/output_peak_count_within_replicate
peakcall_spp_x_cross_corr_plot:
doc: SPP strand cross correlation plot
type: File[]
outputSource: peak_call/output_spp_cross_corr_plot
peakcall_filtered_read_count_file:
doc: Filtered read count after peak calling
type: File[]
outputSource: peak_call/output_filtered_read_count_file
peakcall_extended_peak_file:
doc: Extended fragment peaks in ENCODE Peak file format
type: File[]
outputSource: peak_call/output_extended_peak_file
peakcall_read_in_peak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call/output_read_in_peak_count_within_replicate
peakcall_peak_bigbed_file:
doc: Peaks in bigBed format
type: File[]
outputSource: peak_call/output_peak_bigbed_file
quant_bigwig_raw_files:
doc: Raw reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_raw_files
quant_bigwig_norm_files:
doc: Normalized reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_norm_files