Skip to content

Commit

Permalink
Merge pull request #19 from UPHL-BioNGS/dragonflye-update
Browse files Browse the repository at this point in the history
Dragonflye update
  • Loading branch information
erinyoung authored May 5, 2023
2 parents 1316c5c + 09768b3 commit 040a042
Show file tree
Hide file tree
Showing 25 changed files with 299 additions and 102 deletions.
2 changes: 1 addition & 1 deletion .dockstore.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 1.2
version: 1.1.20230425
workflows:
- name: Donut_Falls
subclass: NFL
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/github_actions.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ process {
withName:flye{
cpus = 2
}
withName:dragonflye{
cpus = 2
}
withName:masurca{
cpus = 2
}
Expand Down Expand Up @@ -34,6 +37,7 @@ process {
}
withName:reconcile{
cpus = 2
errorStrategy = 'ignore'
}
withName:msa {
cpus = 2
Expand Down
33 changes: 33 additions & 0 deletions .github/workflows/run_workflow_dragonflye.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Test Donut Falls dragonflye assembly

on: [pull_request, workflow_dispatch]

jobs:

test:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v3
with:
lfs: true

- name: Checkout LFS objects
run: git lfs checkout

- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Run Donut Falls
run: |
docker --version
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR377/009/ERR3772599/ERR3772599_1.fastq.gz
mv ERR3772599_1.fastq.gz reads/.
nextflow run . -profile docker -c .github/workflows/github_actions.config --reads reads --assembler dragonflye
tree donut_falls
4 changes: 2 additions & 2 deletions .github/workflows/run_workflow_masurca.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ jobs:
#wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/long_reads_high_depth.fastq.gz
wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/long_reads_low_depth.fastq.gz
#nextflow run . -profile docker -c .github/workflows/github_actions.config --sample_sheet .github/workflows/github_actions_hybrid_sample_sheet.txt --assembler masurca
nextflow run . -profile docker -c .github/workflows/github_actions.config --sample_sheet .github/workflows/github_actions_hybrid_sample_sheet.txt --assembler masurca
#tree donut_falls
tree donut_falls
9 changes: 6 additions & 3 deletions .github/workflows/run_workflow_raven_dir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ jobs:
run: |
docker --version
wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/long_reads_high_depth.fastq.gz
mkdir reads
mv long_reads_high_depth.fastq.gz reads/.
wget -q https://github.com/nf-core/test-datasets/blob/23f5b889e4736798c8692e9b92810d9a3e37ee97/nanopore/subset15000.fq.gz?raw=true -O reads/nfcore_subset15000.fa.gz
wget -q https://bridges.monash.edu/ndownloader/files/23754659 -O great_dataset.tar.gz
tar -xvf great_dataset.tar.gz
mv reads.fastq.gz reads/.
nextflow run . -profile docker -c .github/workflows/github_actions.config --reads reads --assembler raven
tree donut_falls
8 changes: 3 additions & 5 deletions .github/workflows/run_workflow_raven_polish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ jobs:
run: |
docker --version
wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/short_reads_1.fastq.gz
wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/short_reads_2.fastq.gz
wget --quiet https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/long_reads_high_depth.fastq.gz
# This needs a better dataset. Sorry!
#nextflow run . -profile docker -c .github/workflows/github_actions.config --sample_sheet .github/workflows/github_actions_hd_hybrid_sample_sheet.txt --assembler raven
# nextflow run . -profile docker -c .github/workflows/github_actions.config --sample_sheet .github/workflows/github_actions_hd_hybrid_sample_sheet.txt --assembler raven
#tree donut_falls
# tree donut_falls
76 changes: 47 additions & 29 deletions bin/.tests.sh
Original file line number Diff line number Diff line change
@@ -1,60 +1,78 @@
# nextflow run /home/eriny/sandbox/Donut_Falls -profile singularity --reads /home/eriny/sandbox/test_files/donut/combined --illumina /home/eriny/sandbox/test_files/donut/illumina -with-tower -resume
# nextflow run /home/eriny/sandbox/Donut_Falls -profile singularity --reads /home/eriny/sandbox/test_files/donut/combined -with-tower -resume
# nextflow run /home/eriny/sandbox/Donut_Falls -profile singularity --sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv -resume

echo "$(date): testing only LR with flye"
echo "$(date): testing only LR with flye" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--reads /home/eriny/sandbox/test_files/donut/combined \
--outdir only_nanopore \
--reads /home/eriny/sandbox/test_files/donut/combined \
--outdir only_nanopore \
-with-tower \
-resume

echo "$(date): testing defaults"
-resume && \
echo "$(date): testing sample sheet" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--reads /home/eriny/sandbox/test_files/donut/combined \
--illumina /home/eriny/sandbox/test_files/donut/illumina \
--outdir default \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--outdir sample_sheet \
--sequencing_summary /home/eriny/sandbox/test_files/donut/sequencing_summary_FAS76150_35058c5c.txt \
-with-tower \
-resume

echo "$(date): testing raven"
-resume && \
echo "$(date): testing raven" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--reads /home/eriny/sandbox/test_files/donut/combined \
--illumina /home/eriny/sandbox/test_files/donut/illumina \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--assembler raven \
--outdir raven \
-with-tower \
-resume

echo "$(date): testing miniasm"
-resume && \
echo "$(date): testing miniasm" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--reads /home/eriny/sandbox/test_files/donut/combined \
--illumina /home/eriny/sandbox/test_files/donut/illumina \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--assembler miniasm \
--outdir miniasm \
-with-tower \
-resume

echo "$(date): testing unicycler"
-resume && \
echo "$(date): testing lr_unicycler" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--assembler lr_unicycler \
--outdir lr_unicycler \
-with-tower \
-resume && \
echo "$(date): testing unicycler" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--reads /home/eriny/sandbox/test_files/donut/combined \
--illumina /home/eriny/sandbox/test_files/donut/illumina \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--assembler unicycler \
--outdir unicycler \
-with-tower \
-resume


echo "$(date): testing empty"
-resume && \
echo "$(date): testing masurca" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--assembler masurca \
--outdir masurca \
-with-tower \
-resume && \
echo "$(date): testing empty" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--reads shouldntexist \
--illumina wontexist \
--outdir nonexistent \
--sequencing_summary doesntexit \
-with-tower \
-resume


echo "$(date): testing trycycler" && \
nextflow run /home/eriny/sandbox/Donut_Falls \
-profile singularity \
--sample_sheet /home/eriny/sandbox/test_files/donut/sample_sheet.csv \
--outdir trycycler \
--assembler trycycler \
--trycycler_min_fasta 12 \
-with-tower \
-resume
23 changes: 19 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ params.remove = 'remove.txt'
params.reads = ''
params.test_wf = false

params.bandage_options = ''
params.busco_options = ''
params.circlator_options = ''
params.dragonflye_options = ''
params.enable_porechop = false
params.fastp_options = ''
params.filtlong_options = '--min_length 1000 --keep_percent 95'
Expand Down Expand Up @@ -54,6 +56,7 @@ params.trycycler_reconcile_options = ''
params.unicycler_options = ''

include { assembly } from './workflows/assembly' addParams(params)
include { copy } from './modules/copy' addParams(params)
include { filter } from './workflows/filter' addParams(params)
include { hybrid } from './workflows/hybrid' addParams(params)
include { nanoplot_summary as nanoplot } from './modules/nanoplot' addParams(params)
Expand Down Expand Up @@ -112,23 +115,24 @@ workflow {



if ( params.assembler == 'flye' || params.assembler == 'raven' || params.assembler == 'miniasm' || params.assembler == 'lr_unicycler' ) {
if ( params.assembler == 'flye' || params.assembler == 'raven' || params.assembler == 'miniasm' || params.assembler == 'lr_unicycler' || params.assembler == 'dragonflye' ) {
if ( params.test_wf ) {
test()
ch_input_files = ch_input_files.mix(test.out.fastq)
}

filter(ch_input_files)
assembly(filter.out.fastq)

ch_fastq = ch_fasta.mix(filter.out.fastq)
ch_illumina = ch_illumina.mix(filter.out.reads)

assembly(filter.out.fastq)
ch_fasta = ch_fasta.mix(assembly.out.fasta)
ch_summary = ch_summary.mix(filter.out.summary)
ch_summary = ch_summary.mix(filter.out.summary).mix(assembly.out.summary)

} else if ( params.assembler == 'unicycler' || params.assembler == 'masurca' ) {
hybrid(ch_input_files.filter{it -> it[2]})
ch_consensus = ch_consensus.mix(hybrid.out.fasta)
ch_summary = ch_summary.mix(hybrid.out.summary)

} else if ( params.assembler == 'trycycler' ) {
if ( params.test_wf ) {
Expand All @@ -143,6 +147,7 @@ workflow {

trycycler(filter.out.fastq, ch_remove.ifEmpty([]))
ch_fasta = ch_fasta.mix(trycycler.out.fasta)
ch_summary = ch_summary.mix(trycycler.out.summary)
}

nanoplot(ch_sequencing_summary)
Expand All @@ -153,4 +158,14 @@ workflow {
ch_input_files.map{it -> tuple (it[0], it[1])},
ch_consensus,
ch_summary.ifEmpty([]))

copy(ch_consensus.map{it -> tuple(it[1])}.collect())
}

workflow.onComplete {
println("Pipeline completed at: $workflow.complete")
println("The multiqc report can be found at ${params.outdir}/multiqc/multiqc_report.html")
println("The consensus fasta files can be found in ${params.outdir}/consensus")
println("The fasta files are from each phase of assembly. polca > polypolish > medaka > unpolished")
println("Execution status: ${ workflow.success ? 'OK' : 'failed' }")
}
23 changes: 23 additions & 0 deletions modules/bandage.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
process bandage {
publishDir "${params.outdir}", mode: 'copy'
tag "${sample}"
cpus 1
container 'quay.io/biocontainers/bandage:0.8.1--hc9558a2_2'

input:
tuple val(sample), file(gfa)

output:
tuple val(sample), path("bandage/${sample}.{png,svg}"), emit: fastq
path "bandage/${sample}_mqc.png", emit: summary

shell:
'''
mkdir -p bandage
Bandage image !{gfa} bandage/!{sample}.png !{params.bandage_options}
Bandage image !{gfa} bandage/!{sample}.svg !{params.bandage_options}
cp bandage/!{sample}.png bandage/!{sample}_mqc.png
'''
}
21 changes: 21 additions & 0 deletions modules/copy.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
process copy {
publishDir "${params.outdir}", mode: 'copy'
tag "putting all fasta files in ${params.outdir}/consensus"
container 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0'

input:
file(fasta)

output:
path "consensus/"

shell:
'''
mkdir consensus
for fasta in !{fasta}
do
cat $fasta | sed 's/_length/ length/g' | sed 's/_circular/ circular/g' | sed 's/_polypolish//g' > consensus/$fasta
done
'''
}
24 changes: 12 additions & 12 deletions modules/dragonflye.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,35 @@ process dragonflye {
publishDir "${params.outdir}", mode: 'copy'
tag "${sample}"
cpus 12
container 'staphb/dragonflye:2.9.2'
container 'staphb/dragonflye:1.0.14'

input:
tuple val(sample), file(fastq)

output:
tuple val(sample), file("flye/${sample}/${sample}_flye.fasta"), optional: true, emit: fasta
tuple val(sample), file("flye/${sample}/${sample}_flye.gfa"), optional: true, emit: gfa
path "flye/${sample}/${sample}_assembly_info.csv", emit: summary
path "flye/${sample}/*"
tuple val(sample), file("dragonflye/${sample}/${sample}_dragonflye.fasta"), optional: true, emit: fasta
tuple val(sample), file("dragonflye/${sample}/${sample}_dragonflye.gfa"), optional: true, emit: gfa
path "dragonflye/${sample}/${sample}_assembly_info.tsv", emit: summary
path "dragonflye/${sample}/*"

shell:
'''
mkdir -p dragonflye/!{sample}
mkdir -p dragonflye
dragonflye --version
dragonflye !{params.dragonflye_options} \
--reads !{fastq} \
--cpus !{task.cpus} \
--outdir flye/!{sample} \
--outdir dragonflye/!{sample} \
--prefix !{sample}
# renaming final files
if [ -f "dragonflye/!{sample}/assembly.fasta" ] ; then cp dragonflye/!{sample}/assembly.fasta dragonflye/!{sample}/!{sample}_flye.fasta ; fi
if [ -f "dragonflye/!{sample}/assembly_graph.gfa" ] ; then cp dragonflye/!{sample}/assembly_graph.gfa dragonflye/!{sample}/!{sample}_flye.gfa ; fi
if [ -f "dragonflye/!{sample}/flye-unpolished.gfa" ] ; then cp dragonflye/!{sample}/flye-unpolished.gfa dragonflye/!{sample}/!{sample}_dragonflye.gfa ; fi
if [ -f "dragonflye/!{sample}/flye.fasta" ] ; then cp dragonflye/!{sample}/flye.fasta dragonflye/!{sample}/!{sample}_dragonflye.fasta ; fi
# getting a summary file
head -n 1 dragonflye/!{sample}/assembly_info.txt | tr "\\t" "," | awk '{print "sample," $0}' > flye/!{sample}/!{sample}_assembly_info.csv
tail -n+2 dragonflye/!{sample}/assembly_info.txt | tr "\\t" "," | awk -v sample=!{sample} '{print sample "," $0}' >> flye/!{sample}/!{sample}_assembly_info.csv
head -n 1 dragonflye/!{sample}/flye-info.txt | awk '{print "sample\\t" $0}' > dragonflye/!{sample}/!{sample}_assembly_info.tsv
tail -n+2 dragonflye/!{sample}/flye-info.txt | awk -v sample=!{sample} '{print sample "\\t" $0}' >> dragonflye/!{sample}/!{sample}_assembly_info.tsv
'''
}
1 change: 0 additions & 1 deletion modules/filtlong.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
process filtlong {
publishDir "${params.outdir}", mode: 'copy'
tag "${sample}"
cpus 1
container 'staphb/filtlong:0.2.1'
Expand Down
Loading

0 comments on commit 040a042

Please sign in to comment.