-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathpipeline-se.cwl
265 lines (265 loc) · 10.2 KB
/
pipeline-se.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/usr/bin/env cwl-runner
class: Workflow
cwlVersion: v1.0
doc: "ChIP-seq pipeline - reads: SE, samples: treatment."
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
inputs:
input_treatment_fastq_files:
doc: Input treatment fastq files
type: File[]
genome_sizes_file:
doc: Genome sizes tab-delimited file (used in samtools)
type: File
genome_effective_size:
default: hs
doc: Effective genome size used by MACS2. It can be numeric or a shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8), Default:hs
type: string
default_adapters_file:
doc: Adapters file
type: File
ENCODE_blacklist_bedfile:
doc: Bedfile containing ENCODE consensus blacklist regions to be excluded.
type: File
genome_ref_first_index_file:
doc: '"First index file of Bowtie reference genome with extension 1.ebwt. \ (Note: the rest of the index files MUST be in the same folder)" '
type: File
secondaryFiles:
- ^^.2.ebwt
- ^^.3.ebwt
- ^^.4.ebwt
- ^^.rev.1.ebwt
- ^^.rev.2.ebwt
as_narrowPeak_file:
doc: Definition narrowPeak file in AutoSql format (used in bedToBigBed)
type: File
as_broadPeak_file:
doc: Definition broadPeak file in AutoSql format (used in bedToBigBed)
type: File
trimmomatic_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
trimmomatic_jar_path:
doc: Trimmomatic Java jar file
type: string
picard_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
picard_jar_path:
doc: Picard Java jar file
type: string
nthreads_qc:
doc: Number of threads required for the 01-qc step
type: int
nthreads_trimm:
doc: Number of threads required for the 02-trim step
type: int
nthreads_map:
doc: Number of threads required for the 03-map step
type: int
nthreads_peakcall:
doc: Number of threads required for the 04-peakcall step
type: int
nthreads_quant:
doc: Number of threads required for the 05-quantification step
type: int
outputs:
qc_treatment_count_raw_reads:
doc: Raw read counts of fastq files after QC for treatment
type: File[]
outputSource: qc_treatment/output_count_raw_reads
qc_treatment_fastqc_data_files:
doc: FastQC data files
type: File[]
outputSource: qc_treatment/output_fastqc_data_files
qc_treatment_fastqc_report_files:
doc: FastQC report files
type: File[]
outputSource: qc_treatment/output_fastqc_report_files
qc_treatment_diff_counts:
doc: Diff file between number of raw reads and number of reads counted by FASTQC, for treatment
type: File[]
outputSource: qc_treatment/output_diff_counts
trimm_treatment_fastq_files:
doc: FASTQ files after trimming step for control
type: File[]
outputSource: trimm_treatment/output_data_fastq_trimmed_files
trimm_treatment_raw_counts:
doc: Raw read counts for fastq files after trimming for treatment
type: File[]
outputSource: trimm_treatment/output_trimmed_fastq_read_count
map_treatment_mark_duplicates_files:
doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions) for treatment
type: File[]
outputSource: map_treatment/output_picard_mark_duplicates_files
map_treatment_dedup_bam_files:
doc: Filtered BAM files (post-processing end point) for treatment
type: File[]
outputSource: map_treatment/output_data_sorted_dedup_bam_files
map_treatment_dups_marked_bam_files:
doc: Filtered BAM files with duplicates marked (post-processing end point) for treatment
type: File[]
outputSource: map_treatment/output_data_sorted_dups_marked_bam_files
map_treatment_pbc_files:
doc: PCR Bottleneck Coefficient files (used to flag samples when pbc<0.5) for control
type: File[]
outputSource: map_treatment/output_pbc_files
map_treatment_preseq_percentage_uniq_reads:
doc: Preseq percentage of uniq reads
type: File[]
outputSource: map_treatment/output_percentage_uniq_reads
map_treatment_read_count_mapped:
doc: Read counts of the mapped BAM files
type: File[]
outputSource: map_treatment/output_read_count_mapped
map_treatment_bowtie_log_files:
doc: Bowtie log file with mapping stats for treatment
type: File[]
outputSource: map_treatment/output_bowtie_log
map_treatment_preseq_c_curve_files:
doc: Preseq c_curve output files for treatment
type: File[]
outputSource: map_treatment/output_preseq_c_curve_files
peak_call_treatment_spp_x_cross_corr:
doc: SPP strand cross correlation summary
type: File[]
outputSource: peak_call_treatment/output_spp_x_cross_corr
peak_call_treatment_spp_x_cross_corr_plot:
doc: SPP strand cross correlation plot
type: File[]
outputSource: peak_call_treatment/output_spp_cross_corr_plot
peak_call_treatment_filtered_read_count_file:
doc: Filtered read count after peak calling
type: File[]
outputSource: peak_call_treatment/output_filtered_read_count_file
peak_call_treatment_narrowpeak_peak_xls_file:
doc: Peak calling report file
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_xls_file
peak_call_treatment_read_in_narrowpeak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_read_in_narrowpeak_count_within_replicate
peak_call_treatment_narrowpeak_count:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_count
peak_call_treatment_narrowpeak_file:
doc: Peaks in narrowPeak file format
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_file
peak_call_treatment_narrowpeak_summits_file:
doc: Peaks summits in bedfile format
type:
type: array
items:
- 'null'
- items: File
type: array
outputSource: peak_call_treatment/output_narrowpeak_summits_file
peak_call_treatment_narrowpeak_bigbed_file:
doc: narrowPeaks in bigBed format
type: File[]
outputSource: peak_call_treatment/output_narrowpeak_bigbed_file
peak_call_treatment_read_in_broadpeak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_read_in_broadpeak_count_within_replicate
peak_call_treatment_broadpeak_count:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call_treatment/output_broadpeak_count
peak_call_treatment_broadpeak_file:
doc: Peaks in broadPeak file format
type: File[]
outputSource: peak_call_treatment/output_broadpeak_file
peak_call_treatment_broadpeak_bigbed_file:
doc: broadPeaks in bigBed format
type: File[]
outputSource: peak_call_treatment/output_broadpeak_bigbed_file
quant_bigwig_raw_files:
doc: Raw reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_raw_files
quant_bigwig_rpkm_extended_files:
doc: Fragment extended reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_rpkm_extended_files
steps:
qc_treatment:
run: 01-qc-se.cwl
in:
default_adapters_file: default_adapters_file
input_fastq_files: input_treatment_fastq_files
nthreads: nthreads_qc
out:
- output_count_raw_reads
- output_diff_counts
- output_fastqc_report_files
- output_fastqc_data_files
- output_custom_adapters
trimm_treatment:
run: 02-trim-se.cwl
in:
input_adapters_files: qc_treatment/output_custom_adapters
input_read1_fastq_files: input_treatment_fastq_files
trimmomatic_java_opts: trimmomatic_java_opts
trimmomatic_jar_path: trimmomatic_jar_path
nthreads: nthreads_trimm
out:
- output_data_fastq_trimmed_files
- output_trimmed_fastq_read_count
map_treatment:
run: 03-map-se.cwl
in:
input_fastq_files: trimm_treatment/output_data_fastq_trimmed_files
genome_sizes_file: genome_sizes_file
ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
genome_ref_first_index_file: genome_ref_first_index_file
picard_jar_path: picard_jar_path
picard_java_opts: picard_java_opts
nthreads: nthreads_map
out:
- output_data_sorted_dedup_bam_files
- output_data_sorted_dups_marked_bam_files
- output_picard_mark_duplicates_files
- output_pbc_files
- output_bowtie_log
- output_preseq_c_curve_files
- output_percentage_uniq_reads
- output_read_count_mapped
peak_call_treatment:
run: 04-peakcall.cwl
in:
input_bam_files: map_treatment/output_data_sorted_dedup_bam_files
input_genome_sizes: genome_sizes_file
genome_effective_size: genome_effective_size
as_narrowPeak_file: as_narrowPeak_file
as_broadPeak_file: as_broadPeak_file
nthreads: nthreads_peakcall
out:
- output_spp_x_cross_corr
- output_spp_cross_corr_plot
- output_filtered_read_count_file
- output_read_in_narrowpeak_count_within_replicate
- output_narrowpeak_count
- output_narrowpeak_file
- output_narrowpeak_summits_file
- output_narrowpeak_bigbed_file
- output_narrowpeak_xls_file
- output_read_in_broadpeak_count_within_replicate
- output_broadpeak_count
- output_broadpeak_file
- output_broadpeak_summits_file
- output_broadpeak_bigbed_file
quant:
run: 05-quantification.cwl
in:
nthreads: nthreads_quant
input_trt_bam_files: map_treatment/output_data_sorted_dedup_bam_files
input_genome_sizes: genome_sizes_file
out:
- bigwig_raw_files
- bigwig_rpkm_extended_files