-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy path03-map-se-with-sjdb.cwl
243 lines (243 loc) · 7.11 KB
/
03-map-se-with-sjdb.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/usr/bin/env cwl-runner
class: Workflow
cwlVersion: v1.0
doc: 'RNA-seq 03 mapping - reads: SE'
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
- class: InlineJavascriptRequirement
- class: MultipleInputFeatureRequirement
inputs:
input_fastq_read1_files:
doc: Input fastq files
type: File[]
genome_sizes_file:
doc: Genome sizes tab-delimited file
type: File
sjdb_name:
default: ggr.SJ.out.all.tab
type: string
STARgenomeDir:
doc: STAR genome reference/indices files.
type: Directory
annotation_file:
doc: GTF annotation file
type: File
genome_fasta_files:
doc: STAR genome generate - Genome FASTA file with all the genome sequences in FASTA format
type: File[]
sjdbOverhang:
doc: 'Length of the genomic sequence around the annotated junction to be used in constructing the splice junctions database. Ideally, this length should be equal to the ReadLength-1, where ReadLength is the length of the reads. '
type: string
nthreads:
default: 1
type: int
outputs:
star_aligned_unsorted_file:
doc: STAR mapped unsorted file.
type: File[]
outputSource: star_pass2/aligned
star_aligned_sorted_file:
doc: STAR mapped unsorted file.
type: File[]
outputSource: index_star_pass2_bam/indexed_file
percentage_uniq_reads_star2:
doc: Percentage of uniq reads from preseq c_curve output
type: File[]
outputSource: percent_uniq_reads_star1/output
pcr_bottleneck_coef_file:
doc: PCR Bottleneck Coefficient
type: File[]
outputSource: execute_pcr_bottleneck_coef/pbc_file
star2_readspergene_file:
doc: STAR pass-2 reads per gene counts file.
type: File[]?
outputSource: star_pass2/readspergene
transcriptome_star_aligned_file:
doc: STAR mapped unsorted file.
type: File[]
outputSource: transcriptome_star_pass2/transcriptomesam
read_count_mapped_star2:
doc: Read counts of the mapped BAM files after STAR pass2
type: File[]
outputSource: mapped_reads_count_star2/output
transcriptome_star_stat_files:
doc: STAR pass-2 aligned to transcriptome stat files.
type:
items:
- 'null'
- items: File
type: array
type: array
outputSource: transcriptome_star_pass2/mappingstats
star2_stat_files:
doc: STAR pass-2 stat files.
type:
items:
- 'null'
- items: File
type: array
type: array
outputSource: star_pass2/mappingstats
read_count_transcriptome_mapped_star2:
doc: Read counts of the mapped to transcriptome BAM files with STAR pass2
type: File[]
outputSource: transcriptome_mapped_reads_count_star2/output
steps:
basename:
run: ../utils/basename.cwl
scatter: file_path
in:
file_path:
source: input_fastq_read1_files
valueFrom: $(self.basename)
sep:
valueFrom: '(\.fastq.gz|\.fastq)'
do_not_escape_sep:
valueFrom: ${return true}
out:
- basename
star_pass2:
run: ../../workflows/tools/STAR.cwl
scatterMethod: dotproduct
scatter:
- readFilesIn
- outFileNamePrefix
in:
genomeDir: STARgenomeDir
outFilterIntronMotifs:
valueFrom: RemoveNoncanonical
outSAMattributes:
valueFrom: All
outFilterMultimapNmax:
valueFrom: ${return 1}
outFileNamePrefix:
source: basename/basename
valueFrom: $(self + ".star2.")
outSAMtype:
valueFrom: $(['BAM', 'Unsorted'])
runThreadN: nthreads
readFilesIn:
source: input_fastq_read1_files
valueFrom: ${return [self]}
quantMode:
valueFrom: GeneCounts
sjdbOverhang:
source: sjdbOverhang
valueFrom: $(parseInt(self))
readFilesCommand:
valueFrom: |
${return inputs.readFilesIn.basename.endsWith(".gz") ? "zcat" : (inputs.readFilesIn.basename.endsWith(".bz2") ? "bz2" : "cat") }
out:
- aligned
- mappingstats
- readspergene
transcriptome_star_pass2:
run: ../../workflows/tools/STAR.cwl
scatterMethod: dotproduct
scatter:
- readFilesIn
- outFileNamePrefix
in:
readFilesIn:
source: input_fastq_read1_files
valueFrom: ${return [self]}
alignSJoverhangMin:
valueFrom: ${return 8}
genomeDir: STARgenomeDir
outFilterType:
valueFrom: BySJout
alignSJDBoverhangMin:
valueFrom: ${return 1}
outFilterIntronMotifs:
valueFrom: RemoveNoncanonical
outSAMattributes:
valueFrom: All
outSAMunmapped:
valueFrom: Within
outFilterMultimapNmax:
valueFrom: ${return 20}
alignIntronMax:
valueFrom: ${return 1000000}
outFilterMismatchNoverReadLmax:
valueFrom: ${return 0.04}
outFilterMismatchNmax:
valueFrom: ${return 999}
alignIntronMin:
valueFrom: ${return 20}
runThreadN: nthreads
alignMatesGapMax:
valueFrom: ${return 1000000}
sjdbScore:
valueFrom: ${return 1}
outFileNamePrefix:
source: basename/basename
valueFrom: $(self + ".transcriptome.star2.")
quantMode:
valueFrom: TranscriptomeSAM
sjdbOverhang:
source: sjdbOverhang
valueFrom: $(parseInt(self))
readFilesCommand:
valueFrom: |
${return inputs.readFilesIn.basename.endsWith(".gz") ? "zcat" : (inputs.readFilesIn.basename.endsWith(".bz2") ? "bz2" : "cat") }
out:
- transcriptomesam
- mappingstats
preseq-c-curve:
run: ../map/preseq-c_curve.cwl
scatterMethod: dotproduct
scatter:
- input_sorted_file
- output_file_basename
in:
input_sorted_file: sort_star_pass2_bam/sorted_file
output_file_basename: basename/basename
out:
- output_file
index_star_pass2_bam:
run: ../map/samtools-index.cwl
scatter: input_file
in:
input_file: sort_star_pass2_bam/sorted_file
out:
- indexed_file
sort_star_pass2_bam:
run: ../map/samtools-sort.cwl
scatter: input_file
in:
nthreads: nthreads
input_file: star_pass2/aligned
out:
- sorted_file
mapped_reads_count_star2:
run: ../map/star-log-read-count.cwl
scatter: star_log
in:
star_log:
source: star_pass2/mappingstats
out:
- output
transcriptome_mapped_reads_count_star2:
run: ../map/star-log-read-count.cwl
scatter: star_log
in:
star_log: transcriptome_star_pass2/mappingstats
out:
- output
percent_uniq_reads_star1:
run: ../map/preseq-percent-uniq-reads.cwl
scatter: preseq_c_curve_outfile
in:
preseq_c_curve_outfile: preseq-c-curve/output_file
out:
- output
execute_pcr_bottleneck_coef:
run: ../map/pcr-bottleneck-coef.cwl
in:
input_bam_files: sort_star_pass2_bam/sorted_file
genome_sizes: genome_sizes_file
input_output_filenames: basename/basename
out:
- pbc_file