-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathamplicon.nf
131 lines (102 loc) · 3.54 KB
/
amplicon.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
params.raw_dir = "./raw"
params.fastq_files = "${params.raw_dir}/*_{1,2}.fastq.gz"
params.results_dir = "./results"
params.paired = true
params.fwd_primer = "FWDPRIMER"
params.rev_primer = "REVPRIMER"
include {fastQC as fastQC_initial} from "./modules/qc_modules"
include {fastQC as fastQC_final} from "./modules/qc_modules"
include {multiQC as multiQC_initial} from "./modules/qc_modules"
include {multiQC as multiQC_final} from "./modules/qc_modules"
/*
process cutadapt {
tag "Cutadapt on ${sample_id}"
publishDir "${params.results_dir}/Cutadapt"
input:
tuple val(sample_id), path(fastq_in)
output:
path("${sample_id}*.fastq.gz")
script:
"""
if ${params.paired}; then
cutadapt \
-a ${params.fwd_primer} \
-A ${params.rev_primer} \
-o ${sample_id}.1.fastq.gz \
-p ${sample_id}.2.fastq.gz \
${fastq_in}
else
cutadapt \
-a ${params.fwd_primer} \
-o ${sample_id}.fastq.gz \
${fastq_in}
fi
"""
}
*/
process DADA2 {
publishDir "${params.results_dir}/DADA2"
input:
tuple val(sample_id), path(fastq_in)
output:
tuple val(sample_id), path("${sample_id}*.fastq.gz"), emit: filtered
path("${sample_id}.trimmed.txt")
script:
"""
#!/usr/bin/env Rscript
#print("testing using R")
#print(paste0("Input:", "${sample_id}", ", ", "${fastq_in}"))
library(dada2)
reads <- unlist(strsplit("${fastq_in}", split = " "))
#print(reads)
if ("${params.paired}" == "true") {
out <- filterAndTrim(fwd = file.path(reads[1]),
filt = paste0("${sample_id}", ".R1.filtered.fastq.gz"),
rev = file.path(reads[2]),
filt.rev = paste0("${sample_id}", ".R2.filtered.fastq.gz"),
minLen = 50,
maxN = 0,
maxEE = 2,
truncQ = 2,
trimRight = 0,
rm.phix = TRUE,
compress = TRUE,
multithread = TRUE,
verbose = TRUE)
} else {
out <- filterAndTrim(fwd = file.path(reads[1]),
filt = paste0("${sample_id}", ".R1.filtered.fastq.gz"),
minLen = 50,
maxN = 0,
maxEE = 2,
truncQ = 2,
trimRight = 0,
rm.phix = TRUE,
compress = TRUE,
multithread = TRUE,
verbose = TRUE)
}
write.csv(out, paste0("${sample_id}", ".trimmed.txt"))
"""
}
workflow {
// file_channel = channel.fromPath(params.fastq_files, checkIfExists: true)
// .map {it -> [it.simpleName, it]}
// file_channel.view()
reads_ch = channel.fromFilePairs(params.fastq_files, checkIfExists: true)
// Block 1: Initial Quality Control Reports
fastqc_ch = fastQC_initial(reads_ch, "FastQC_Initial")
multiQC_initial(fastqc_ch.collect(), "MultiQC_Initial")
// Block 2: Primer Removal
// cutadapt(reads_ch)
// Block 3: Quality Trimming and Filtering
DADA2(reads_ch)
DADA2.out.filtered.view()
// Block 4: Final Quality Control Reports
fastqc_final_ch = fastQC_final(DADA2.out.filtered, "FastQC_Final")
multiQC_final(fastqc_final_ch.collect(), "MultiQC_Final")
// Block 5: Phylogenetic Tree Generation
// Block 6: Packaging of Results
}