Skip to content

Commit

Permalink
Merge pull request #117 from nf-core/support-custom-panels
Browse files Browse the repository at this point in the history
  • Loading branch information
scwatts authored Dec 5, 2024
2 parents f029bec + 786ce02 commit fc1bbf0
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 63 deletions.
4 changes: 4 additions & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ lint:
- lib/Utils.groovy
- lib/WorkflowMain.groovy
- lib/WorkflowOncoanalyser.groovy
nextflow_config:
- config_defaults:
- params.fastp_umi_length
- params.fastp_umi_skip
bump_version: null
org_path: null
update: null
19 changes: 6 additions & 13 deletions lib/Utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -277,33 +277,26 @@ class Utils {
// Apply some required restrictions to targeted mode
if (run_config.mode === Constants.RunMode.TARGETED) {

// Do not allow normal DNA
if (Utils.hasNormalDna(meta)) {
log.error "targeted mode is not compatible with the normal DNA BAM provided for ${meta.group_id}\n\n" +
"The targeted workflow supports only tumor DNA BAMs (and tumor RNA BAMs for TSO500)"
// Do not allow donor DNA
if (Utils.hasDonorDna(meta)) {
log.error "targeted mode is not compatible with the donor DNA BAM provided for ${meta.group_id}\n\n" +
"The targeted workflow supports only tumor and normal DNA BAMs (and tumor RNA BAMs for TSO500)"
Nextflow.exit(1)
}

// Do not allow only tumor RNA
if (Utils.hasTumorRnaBam(meta) && !Utils.hasTumorDna(meta)) {
if (Utils.hasTumorRna(meta) && !Utils.hasTumorDna(meta)) {
log.error "targeted mode is not compatible with only tumor RNA provided for ${meta.group_id}\n\n" +
"The targeted workflow requires tumor DNA and can optionally take tumor RNA, depending on " +
"the configured panel."
Nextflow.exit(1)
}

// Restrict tumor RNA inputs to the TSO500 panel
if (Utils.hasTumorRnaBam(meta) && run_config.panel != 'tso500') {
def panel = run_config.panel.toUpperCase()
log.error "only the TSO500 panel supports tumor RNA analysis but got: ${panel}"
Nextflow.exit(1)
}

}

// Do not allow normal DNA only
if (Utils.hasNormalDna(meta) && !Utils.hasTumorDna(meta)) {
log.error "germline only mode not supported, found only a normal DNA BAM for ${meta.group_id}"
log.error "found only normal DNA input for ${meta.group_id} but germline only analysis is not supported"
Nextflow.exit(1)
}

Expand Down
87 changes: 72 additions & 15 deletions lib/WorkflowMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class WorkflowMain {
}
}

if (!params.containsKey('ref_hmf_data_path')) {
if (!params.containsKey('ref_data_hmf_data_path')) {
if (params.genome_version.toString() == '37') {
params.ref_data_hmf_data_path = Constants.HMF_DATA_37_PATH
} else if (params.genome_version.toString() == '38') {
Expand Down Expand Up @@ -65,14 +65,30 @@ class WorkflowMain {
if (run_mode === Constants.RunMode.TARGETED) {

// Attempt to set default panel data path; make no assumption on valid 'panel' value

if (params.containsKey('panel')) {
if (params.panel == 'tso500' && params.genome_version.toString() == '37') {
params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH
} else if (params.panel == 'tso500' && params.genome_version.toString() == '38') {
params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH
}
}

// When fastp UMI is enabled, MarkDups UMI should be as well
if (params.fastp_umi && (!params.containsKey('markdups_umi') || !params.markdups_umi)) {
params.markdups_umi = true
}

// Set the MarkDups UMI duplex delimiter to '_' when the following conditions are met:
// - both fastp and MarkDups UMI processing enabled
// - fastp is using a duplex UMI location type (per_index or per_read)
// - no MarkDups duplex delimiter has been set
def fastp_and_markdups_umi = params.fastp_umi && params.markdups_umi
def fastp_duplex_location = params.containsKey('fastp_umi_location') && (params.fastp_umi_location == 'per_index' || params.fastp_umi_location == 'per_read')
def no_umi_duplex_delim = !params.containsKey('markdups_umi_duplex_delim') || !params.markdups_umi_duplex_delim
if (fastp_and_markdups_umi && fastp_duplex_location && no_umi_duplex_delim) {
params.markdups_umi_duplex_delim = '_'
}

}

def stages = Processes.getRunStages(
Expand All @@ -93,12 +109,18 @@ class WorkflowMain {
}

// Final point to set any default to avoid access to undefined parameters during nf-validation
if (!params.containsKey('panel')) { params.panel = null }
if (!params.containsKey('ref_data_genome_alt')) { params.ref_data_genome_alt = null }
if (!params.containsKey('ref_data_genome_gtf')) { params.ref_data_genome_gtf = null }
if (!params.containsKey('ref_data_hla_slice_bed')) { params.ref_data_hla_slice_bed = null }
if (!params.containsKey('ref_data_panel_data_path')) { params.ref_data_panel_data_path = null }
if (!params.containsKey('ref_data_virusbreakenddb_path')) { params.ref_data_virusbreakenddb_path = null }
if (!params.containsKey('panel')) params.panel = null
if (!params.containsKey('ref_data_genome_alt')) params.ref_data_genome_alt = null
if (!params.containsKey('ref_data_genome_gtf')) params.ref_data_genome_gtf = null
if (!params.containsKey('ref_data_hla_slice_bed')) params.ref_data_hla_slice_bed = null
if (!params.containsKey('ref_data_panel_data_path')) params.ref_data_panel_data_path = null
if (!params.containsKey('ref_data_virusbreakenddb_path')) params.ref_data_virusbreakenddb_path = null

// Additionally set selected parameters with false-ish truthy values to avoid passing null values as inputs
if (!params.containsKey('fastp_umi_location')) params.fastp_umi_location = ''
if (!params.containsKey('fastp_umi_length')) params.fastp_umi_length = 0
if (!params.containsKey('fastp_umi_skip')) params.fastp_umi_skip = -1
if (!params.containsKey('markdups_umi_duplex_delim')) params.markdups_umi_duplex_delim = ''

}

Expand Down Expand Up @@ -191,15 +213,20 @@ class WorkflowMain {

} else if (!Constants.PANELS_DEFINED.contains(params.panel)) {

def panels = Constants.PANELS_DEFINED.join('\n - ')
log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" The ${params.panel} is not defined. Currently, the available panels are:\n" +
" - ${panels}\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
if (params.containsKey('force_panel') && params.force_panel) {
log.warn "provided panel ${params.panel} does not have built-in support but forcing to proceed"
} else {
def panels = Constants.PANELS_DEFINED.join('\n - ')
log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" The ${params.panel} panel does not have built-in support. Currently, the\n" +
" available supported panels are:\n" +
" - ${panels}\n\n" +
" Please adjust the --panel argument or override with --force_panel.\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
}

}

}

if (params.ref_data_genome_alt !== null) {
Expand All @@ -219,8 +246,38 @@ class WorkflowMain {
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
}

}

// UMI parameters

def fastp_umi_args_set_any = params.fastp_umi_location || params.fastp_umi_length || params.fastp_umi_skip >= 0
if (fastp_umi_args_set_any && !params.fastp_umi) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Detected use of fastp UMI parameters but fastp UMI processing has not been enabled.\n" +
" Please review your configuration and set the fastp_umi flag or otherwise adjust\n" +
" accordingly.\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
}

def fastp_umi_args_set_all = params.fastp_umi_location && params.fastp_umi_length && params.fastp_umi_skip >= 0
if (params.fastp_umi && !fastp_umi_args_set_all) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Refusing to run fastp UMI processing without having any UMI params configured.\n" +
" Please review your configuration and appropriately set all fastp_umi_* parameters.\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
}

if (params.markdups_umi_duplex_delim && params.markdups_umi === false) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Detected use of MarkDups UMI parameters but MarkDups UMI processing has not been\n" +
" enabled. Please review your configuration and set the markdups_umi flag or\n" +
" otherwise adjust accordingly.\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
Nextflow.exit(1)
}

}

Expand Down
15 changes: 13 additions & 2 deletions modules/local/fastp/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ process FASTP {
input:
tuple val(meta), path(reads_fwd), path(reads_rev)
val max_fastq_records
val umi_location
val umi_length
val umi_skip

output:
tuple val(meta), path('*_R1.fastp.fastq.gz'), path('*_R2.fastp.fastq.gz'), emit: fastq
Expand All @@ -21,6 +24,14 @@ process FASTP {
script:
def args = task.ext.args ?: ''

def split_by_lines_arg = max_fastq_records > 0 ? "--split_by_lines ${4 * max_fastq_records}" : ''

def umi_args_list = []
if (umi_location) umi_args_list.add("--umi_loc ${umi_location}")
if (umi_length) umi_args_list.add("--umi_len ${umi_length}")
if (umi_skip >= 0) umi_args_list.add("--umi_skip ${umi_skip}")
def umi_args = umi_args_list ? '--umi ' + umi_args_list.join(' ') : ''

"""
fastp \\
${args} \\
Expand All @@ -30,8 +41,8 @@ process FASTP {
--disable_length_filtering \\
--disable_adapter_trimming \\
--disable_trim_poly_g \\
--split_by_lines ${4 * max_fastq_records} \\
--thread ${task.cpus} \\
${umi_args} \\
${split_by_lines_arg} \\
--out1 ${meta.sample_id}_${meta.library_id}_${meta.lane}_R1.fastp.fastq.gz \\
--out2 ${meta.sample_id}_${meta.library_id}_${meta.lane}_R2.fastp.fastq.gz
Expand Down
11 changes: 10 additions & 1 deletion modules/local/fastp/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,16 @@ input:
pattern: "*.{fastq.gz}"
- max_fastq_records:
type: integer
description: Maximum number of reads per file
description: Maximum number of reads per file (optional)
- umi_location:
type: string
description: UMI location type (optional)
- umi_length:
type: integer
description: UMI length (optional)
- umi_skip:
type: integer
description: UMI base skip (optional)
output:
- meta:
type: map
Expand Down
16 changes: 11 additions & 5 deletions modules/local/markdups/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ process MARKDUPS {
path genome_fai
path genome_dict
path unmap_regions
val has_umis
val umi_enable
val umi_duplex_delim

output:
tuple val(meta), path('*.markdups.bam'), path('*.markdups.bam.bai'), emit: bam
Expand All @@ -29,7 +30,12 @@ process MARKDUPS {

def xmx_mod = task.ext.xmx_mod ?: 0.95

def umi_flags = has_umis ? '-umi_enabled -umi_duplex -umi_duplex_delim +' : ''
def form_consensus_arg = umi_enable ? '' : '-form_consensus'

def umi_args_list = []
if (umi_enable) umi_args_list.add('-umi_enabled')
if (umi_duplex_delim) umi_args_list.add("-umi_duplex -umi_duplex_delim ${umi_duplex_delim}")
def umi_args = umi_args_list ? umi_args_list.join(' ') : ''

"""
markdups \\
Expand All @@ -42,8 +48,8 @@ process MARKDUPS {
-sample ${meta.sample_id} \\
-input_bam ${bams.join(',')} \\
\\
-form_consensus \\
${umi_flags} \\
${form_consensus_arg} \\
${umi_args} \\
\\
-unmap_regions ${unmap_regions} \\
-ref_genome ${genome_fasta} \\
Expand All @@ -68,7 +74,7 @@ process MARKDUPS {
touch ${meta.sample_id}.markdups.bam.bai
touch ${meta.sample_id}.duplicate_freq.tsv
if [[ -n "${has_umis}" ]]; then
if [[ -n "${umi_enable}" ]]; then
touch ${meta.sample_id}.umi_coord_freq.tsv
touch ${meta.sample_id}.umi_edit_distance.tsv
touch ${meta.sample_id}.umi_nucleotide_freq.tsv
Expand Down
7 changes: 5 additions & 2 deletions modules/local/markdups/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@ input:
type: file
description: Unmapped regions file
pattern: "*.{tsv}"
- has_umis:
- umi_enable:
type: boolean
description: Flag indicating presence of UMIs in reads
description: Flag to enable UMI processing
- umi_duplex_delim:
type: string
description: UMI duplex delimiter
output:
- meta:
type: map
Expand Down
18 changes: 14 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,27 @@ params {
// Workflow mode
mode = null

// Force options
force_genome = false
force_panel = false

// Read processing and alignment options
max_fastq_records = 10000000
fastp_umi = false
markdups_umi = false

// Process configuration
processes_manual = false
processes_include = null
processes_exclude = null

// Reference genome information; iGenomes is effectively disabled but retained for linting
genome = null
force_genome = false
igenomes_base = 's3://ngi-igenomes/igenomes/'
igenomes_ignore = true
hmf_genomes_base = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes'

// Other reference data and config exposed to user on CLI
max_fastq_records = 10000000

isofox_counts = null
isofox_gc_ratios = null
isofox_gene_ids = null
Expand Down Expand Up @@ -266,7 +272,7 @@ plugins {
validation {
// NOTE(SW): entries here are generally have conditional defaults or are for internal use only
defaultIgnoreParams = [
"genomes",
'genomes',
'hmf_data_paths',
'panel_data_paths',
'ref_data_genome_fasta',
Expand All @@ -287,6 +293,10 @@ validation {
'ref_data_hmf_data_path',
'ref_data_panel_data_path',
'ref_data_virusbreakenddb_path',
'fastp_umi_length',
'fastp_umi_location',
'fastp_umi_skip',
'markdups_umi_duplex_delim',
]

lenientMode = true
Expand Down
Loading

0 comments on commit fc1bbf0

Please sign in to comment.