-
Notifications
You must be signed in to change notification settings - Fork 83
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add dorado #256
add dorado #256
Changes from 7 commits
533a7e7
8e16866
1867595
b664520
b6309f2
9ab2f0f
7a8cce9
510adfb
db1eaaf
183fd5c
b3a51bb
16dc59e
04614ea
b884a45
2d70e1a
352d2f9
d3d089d
5738424
510ce56
d2a0e22
5e61982
7922b5f
fa14487
a92abac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,40 @@ | ||
/* | ||
* ------------------------------------------------- | ||
* Nextflow config file for running tests | ||
* ------------------------------------------------- | ||
* Defines bundled input files and everything required | ||
* to run a fast and simple test. Use as follows: | ||
* nextflow run nf-core/nanoseq -profile test_nobc_dx,<docker/singularity> | ||
*/ | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Nextflow config file for running minimal tests | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Defines input files and everything required to run a fast and simple pipeline test. | ||
|
||
Use as follows: | ||
nextflow run nf-core/nanoseq -profile test,<docker/singularity> --outdir <OUTDIR> | ||
|
||
---------------------------------------------------------------------------------------- | ||
*/ | ||
|
||
params { | ||
config_profile_name = 'Test profile' | ||
config_profile_description = 'Minimal test dataset to check pipeline function' | ||
|
||
// Limit resources | ||
max_cpus = 2 | ||
max_memory = 6.GB | ||
max_time = 12.h | ||
// Limit resources so that this can run on GitHub Actions | ||
max_cpus = 2 | ||
max_memory = '6.GB' | ||
max_time = '12.h' | ||
|
||
// Input data to perform demultipexing | ||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_dx.csv' | ||
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa' | ||
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf' | ||
run_nanolyse = true | ||
protocol = 'DNA' | ||
// Input data to perform both basecalling and demultiplexing | ||
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_dx.csv' | ||
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa' | ||
protocol = 'cDNA' | ||
flowcell = 'FLO-MIN106' | ||
kit = 'SQK-DCS109' | ||
barcode_kit = 'NBD103/NBD104' | ||
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fastq/nondemultiplexed/sample_nobc_dx.fastq.gz' | ||
skip_bigwig = true | ||
skip_bigbed = true | ||
trim_barcodes = true | ||
dorado_model = '[email protected]' | ||
dorado_device = 'cpu' | ||
run_nanolyse = true | ||
skip_quantification = true | ||
skip_fusion_analysis= true | ||
skip_modification_analysis=true | ||
aligner = 'graphmap2' | ||
|
||
// This variable is just for reference and isnt actually required for the tests | ||
// Files are downloaded and staged using the "GetTestData" process | ||
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fast5/barcoded_multi/' | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/* | ||
* ------------------------------------------------- | ||
* Nextflow config file for running tests | ||
* ------------------------------------------------- | ||
* Defines bundled input files and everything required | ||
* to run a fast and simple test. Use as follows: | ||
* nextflow run nf-core/nanoseq -profile test_bc_nodx,<docker/singularity> | ||
*/ | ||
|
||
params { | ||
config_profile_name = 'Test profile' | ||
config_profile_description = 'Minimal test dataset to check pipeline function' | ||
|
||
// Limit resources so that this can run on Travis | ||
max_cpus = 2 | ||
max_memory = 6.GB | ||
max_time = 12.h | ||
|
||
// Input data to perform basecalling and to skip demultipexing | ||
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_nodx.csv' | ||
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa' | ||
protocol = 'cDNA' | ||
flowcell = 'FLO-MIN106' | ||
kit = 'SQK-DCS108' | ||
dorado_model = '[email protected]' | ||
dorado_device = 'cpu' | ||
skip_bigbed = true | ||
skip_bigwig = true | ||
skip_demultiplexing = true | ||
skip_quantification = true | ||
skip_fusion_analysis= true | ||
skip_modification_analysis=true | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/* | ||
* ------------------------------------------------- | ||
* Nextflow config file for running tests | ||
* ------------------------------------------------- | ||
* Defines bundled input files and everything required | ||
* to run a fast and simple test. Use as follows: | ||
* nextflow run nf-core/nanoseq -profile test_nobc_dx,<docker/singularity> | ||
*/ | ||
|
||
params { | ||
config_profile_name = 'Test profile' | ||
config_profile_description = 'Minimal test dataset to check pipeline function' | ||
|
||
// Limit resources | ||
max_cpus = 2 | ||
max_memory = 6.GB | ||
max_time = 12.h | ||
|
||
// Input data to perform demultipexing | ||
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_dx.csv' | ||
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa' | ||
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf' | ||
skip_basecalling = true | ||
run_nanolyse = true | ||
protocol = 'DNA' | ||
barcode_kit = 'NBD103/NBD104' | ||
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fastq/nondemultiplexed/sample_nobc_dx.fastq.gz' | ||
skip_bigwig = true | ||
skip_bigbed = true | ||
skip_quantification = true | ||
skip_fusion_analysis= true | ||
skip_modification_analysis=true | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Nextflow config file for running minimal tests | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Defines input files and everything required to run a fast and simple pipeline test. | ||
|
||
Use as follows: | ||
nextflow run nf-core/nanoseq -profile test,<docker/singularity> --outdir <OUTDIR> | ||
|
||
---------------------------------------------------------------------------------------- | ||
*/ | ||
|
||
params { | ||
config_profile_name = 'Test profile' | ||
config_profile_description = 'Minimal test dataset to check pipeline function' | ||
|
||
// Limit resources so that this can run on GitHub Actions | ||
max_cpus = 2 | ||
max_memory = '6.GB' | ||
max_time = '6.h' | ||
|
||
// Input data to perform both basecalling and demultiplexing | ||
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_dx.csv' | ||
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa' | ||
protocol = 'cDNA' | ||
flowcell = 'FLO-MIN106' | ||
kit = 'SQK-DCS109' | ||
barcode_kit = 'EXP-NBD103' | ||
trim_barcodes=true | ||
output_demultiplex_fast5 = true | ||
run_nanolyse = true | ||
skip_quantification = true | ||
skip_fusion_analysis= true | ||
skip_modification_analysis=true | ||
|
||
// This variable is just for reference and isnt actually required for the tests | ||
// Files are downloaded and staged using the "GetTestData" process | ||
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fast5/barcoded/' | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
process DORADO { | ||
tag "$meta.id" | ||
label 'process_medium' | ||
|
||
container "docker.io/ontresearch/dorado" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to double check, it is OK to use this license wise? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And would this work with singularity stilll? |
||
|
||
input: | ||
tuple val(meta), path(pod5_path) | ||
val dorado_device | ||
val dorado_model | ||
|
||
output: | ||
tuple val(meta), path("*.fastq.gz") , emit: fastq | ||
path "versions.yml" , emit: versions | ||
|
||
script: | ||
""" | ||
dorado download --model $dorado_model | ||
dorado basecaller $dorado_model $pod5_path --device $dorado_device --emit-fastq > basecall.fastq | ||
Comment on lines
+18
to
+19
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any options a user could theoretically add? Missing |
||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
dorado: \$(echo \$(dorado --version 2>&1) | sed -r 's/.{81}//') | ||
END_VERSIONS | ||
|
||
gzip basecall.fastq | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should probably go before the emissions, and should the file be forced to be |
||
""" | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
process FAST5_TO_POD5 { | ||
tag "$meta.id" | ||
label 'process_medium' | ||
|
||
conda "conda-forge::r-base=4.0.3 bioconda::bioconductor-bambu=3.0.8 bioconda::bioconductor-bsgenome=1.66.0" | ||
container "docker.io/yuukiiwa/pod5:0.2.4" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could be a biocontainer |
||
|
||
input: | ||
tuple val(meta), path(input_path) | ||
|
||
output: | ||
tuple val(meta), path("pod5/") , emit: pod5 | ||
yuukiiwa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
output_name = "pod5/converted.pod5" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this needs to be hardcoded, why not put directly in the command? |
||
""" | ||
pod5 convert fast5 $input_path --output $output_name | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
pod5: \$(echo \$(pod5 --version 2>&1) | sed -r 's/..............//') | ||
END_VERSIONS | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,15 +4,15 @@ process GET_TEST_DATA { | |
container "docker.io/yuukiiwa/git:latest" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh, interesting, this isn't how we normally retrieve test data with nf-core (either via URLs, or upstream step in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's in the container? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The container contains the |
||
|
||
output: | ||
path "test-datasets/fast5/$barcoded/*" , emit: ch_input_fast5s_path | ||
path "test-datasets/fast5/$barcoded/" , emit: ch_input_fast5_dir_path | ||
path "test-datasets/modification_fast5_fastq/", emit: ch_input_dir_path | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
barcoded = (workflow.profile.contains('test_bc_nodx') || workflow.profile.contains('rnamod')) ? "nonbarcoded" : "barcoded" | ||
barcoded = (workflow.profile.contains('test_bc_nodx') || workflow.profile.contains('rnamod')) ? "nonbarcoded_multi" : "barcoded_multi" | ||
yuukiiwa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
git clone https://github.com/nf-core/test-datasets.git --branch nanoseq --single-branch | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Harshil Align™️!