Skip to content

Commit

Permalink
Merge pull request #15 from UMCUGenetics/develop
Browse files Browse the repository at this point in the history
v1.1.0
  • Loading branch information
rernst authored Aug 31, 2020
2 parents bbd6415 + d726f61 commit daecc28
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 85 deletions.
2 changes: 1 addition & 1 deletion NextflowModules
Submodule NextflowModules updated 98 files
+11 −13 BWA-Mapping/bwa-0.7.17_samtools-1.9/Mapping.nf
+5 −5 BWA/0.7.17/BWASW.nf
+5 −7 BWA/0.7.17/Index.nf
+7 −7 BWA/0.7.17/MEM.nf
+18 −0 ControlFREEC/11.5/AssessSignificance.nf
+36 −0 ControlFREEC/11.5/Freec.nf
+18 −0 ControlFREEC/11.5/MakeGraph.nf
+18 −0 ControlFREEC/11.5/MakeKaryotype.nf
+6 −6 FastQC/0.11.5/FastQC.nf
+5 −5 FastQC/0.11.8/FastQC.nf
+25 −0 Fastp/0.14.1/Fastp.nf
+25 −0 Fastp/0.20.1/Fastp.nf
+33 −0 GATK/3.8-1-0-gf15c1c3ef/BaseRecalibrator.nf
+19 −0 GATK/3.8-1-0-gf15c1c3ef/CatVariants.nf
+26 −6 GATK/3.8-1-0-gf15c1c3ef/CombineVariants.nf
+24 −0 GATK/3.8-1-0-gf15c1c3ef/GenotypeGVCFs.nf
+36 −11 GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf
+11 −12 GATK/3.8-1-0-gf15c1c3ef/IndelRealigner.nf
+10 −11 GATK/3.8-1-0-gf15c1c3ef/RealignerTargetCreator.nf
+5 −5 GATK/3.8-1-0-gf15c1c3ef/SelectVariants.nf
+5 −5 GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf
+9 −9 GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf
+16 −16 GATK/4.1.3.0/BaseRecalibration.nf
+17 −17 GATK/4.1.3.0/BaseRecalibrationTable.nf
+13 −13 GATK/4.1.3.0/CollectMultipleMetrics.nf
+14 −14 GATK/4.1.3.0/CollectWGSMetrics.nf
+14 −15 GATK/4.1.3.0/CombineGVCFs.nf
+12 −12 GATK/4.1.3.0/GatherBaseRecalibrationTables.nf
+14 −16 GATK/4.1.3.0/GenotypeGVCFs.nf
+16 −16 GATK/4.1.3.0/HaplotypeCaller.nf
+14 −14 GATK/4.1.3.0/MergeVCFs.nf
+14 −15 GATK/4.1.3.0/SamToFastq.nf
+14 −14 GATK/4.1.3.0/SelectVariants.nf
+15 −17 GATK/4.1.3.0/SplitIntervals.nf
+23 −23 GATK/4.1.3.0/SplitNCigarReads.nf
+14 −14 GATK/4.1.3.0/VariantAnnotator.nf
+21 −21 GATK/4.1.3.0/VariantFiltration.nf
+13 −13 HTSeq/0.11.3/Count.nf
+26 −0 Manta/1.6.0/Manta.nf
+9 −12 MultiQC/1.5/MultiQC.nf
+6 −6 MultiQC/1.8/MultiQC.nf
+18 −0 MultiQC/1.9/MultiQC.nf
+5 −6 Picard/2.22.0/CollectHsMetrics.nf
+5 −6 Picard/2.22.0/CollectMultipleMetrics.nf
+18 −0 Picard/2.22.0/CollectWgsMetrics.nf
+5 −7 Picard/2.22.0/CreateSequenceDictionary.nf
+5 −6 Picard/2.22.0/EstimateLibraryComplexity.nf
+12 −12 Picard/2.22.0/IntervalListTools.nf
+20 −0 Preseq/2.0.3/LCExtrap.nf
+0 −20 Preseq/2.0.3/Lc_extrap.nf
+14 −1 README.md
+38 −14 RSeQC/3.0.1/RSeQC.nf
+0 −60 STAR-Fusion/1.8.1/StarFusion.nf
+0 −33 STAR/2.6.0c/AlignReads.nf
+0 −30 STAR/2.6.0c/GenomeGenerate.nf
+31 −27 STAR/2.7.3a/AlignReads.nf
+17 −17 STAR/2.7.3a/GenomeGenerate.nf
+59 −0 STARFusion/1.8.1/STARFusion.nf
+6 −6 Salmon/1.2.1/Index.nf
+21 −21 Salmon/1.2.1/Quant.nf
+11 −12 Salmon/1.2.1/QuantMerge.nf
+8 −8 Sambamba/0.6.8/Index.nf
+10 −10 Sambamba/0.6.8/MarkDup.nf
+10 −11 Sambamba/0.6.8/MergeBams.nf
+5 −5 Sambamba/0.7.0/Flagstat.nf
+18 −0 Sambamba/0.7.0/Index.nf
+10 −11 Sambamba/0.7.0/Markdup.nf
+5 −5 Sambamba/0.7.0/Merge.nf
+5 −5 Sambamba/0.7.0/ViewSort.nf
+6 −6 Sambamba/0.7.0/ViewUnmapped.nf
+5 −5 Samtools/1.10/Flagstat.nf
+10 −10 Samtools/1.10/MPileup.nf
+5 −5 Samtools/1.10/View.nf
+54 −0 SortMeRNA/4.2.0/SortMeRNA.nf
+0 −51 SortMeRNA/4.2.0/SortMeRna.nf
+40 −0 Subread/2.0.0/FeatureCounts.nf
+16 −14 TrimGalore/0.6.5/TrimGalore.nf
+21 −0 UCSC/377/GenePredToBed.nf
+5 −5 UCSC/377/GtfToGenePred.nf
+6 −6 Utils/CreateIntervaList.nf
+29 −0 Utils/MergeFastqLanes.nf
+4 −4 Utils/fastq.nf
+0 −29 Utils/mergeFastqLanes.nf
+8 −8 Utils/template.nf
+23 −20 bash/MergeFastQs.nf
+0 −24 fastp/0.14.1/Fastp.nf
+0 −24 fastp/0.20.1/Fastp.nf
+12 −13 fgbio/1.1.0/CallMolecularConsensusReads.nf
+0 −23 fgbio/1.1.0/DemuxFastqs.nf
+11 −11 fgbio/1.1.0/FilterConsensusReads.nf
+10 −10 fgbio/1.1.0/SortBam.nf
+36 −37 python/CountUmiFamilies.nf
+80 −83 python/MakeUmiBam.nf
+16 −16 snpEff/4.3t/SnpSiftAnnotate.nf
+13 −17 snpEff/4.3t/SnpSiftDbnsfp.nf
+15 −19 snpEff/4.3t/snpEffFilter.nf
+0 −36 subread/2.0.0/FeatureCounts.nf
+0 −21 ucsc/377/genePredToBed/GenePredToBed.nf
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ curl -s https://get.nextflow.io | bash

#### Running WES workflow
```bash
nextflow run WES.nf -c WES.config --fastq_path <fastq_dir_path> --outdir <output_dir_path> [-profile slurm|mac]
nextflow run WES.nf -c WES.config --fastq_path <fastq_dir_path> --outdir <output_dir_path> --email <email> [-profile slurm|mac]
```

#### Running WES Fingerprint workflow
```bash
nextflow run WES_Fingerprint.nf -c WES.config --bam_path <bam_dir_path> --outdir <output_dir_path> --email <email> [-profile slurm|mac]
```

#### Create Kinship container
Expand Down
11 changes: 2 additions & 9 deletions WES.config
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,10 @@ process {
}
}

withLabel: BWA_0_7_17_MEM {
withLabel: BWA_0_7_17_Mem {
cpus = 10
memory = '10G'
time = '180m'
}

withLabel: Sambamba_0_7_0_ViewSort {
cpus = 10
memory = '40G'
time = '40m'
clusterOptions = "$params.cluster_options --gres=tmpspace:10G"
time = '3h'
}

withLabel: Sambamba_0_7_0_MarkdupMerge {
Expand Down
130 changes: 63 additions & 67 deletions WES.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ nextflow.preview.dsl=2
include extractFastqPairFromDir from './NextflowModules/Utils/fastq.nf'

// Mapping modules
include MEM as BWA_MEM from './NextflowModules/BWA/0.7.17/MEM.nf' params(genome:"$params.genome", optional: '-c 100 -M')
include ViewSort as Sambamba_ViewSort from './NextflowModules/Sambamba/0.7.0/ViewSort.nf'
include BWAMapping from './NextflowModules/BWA-Mapping/bwa-0.7.17_samtools-1.9/Mapping.nf' params(genome_fasta: "$params.genome", optional: '-c 100 -M')
include MarkdupMerge as Sambamba_MarkdupMerge from './NextflowModules/Sambamba/0.7.0/Markdup.nf'

// IndelRealignment modules
Expand All @@ -15,7 +14,7 @@ include ViewUnmapped as Sambamba_ViewUnmapped from './NextflowModules/Sambamba/0
include Merge as Sambamba_Merge from './NextflowModules/Sambamba/0.7.0/Merge.nf'

// HaplotypeCaller modules
include IntervalListTools as PICARD_IntervalListTools from './NextflowModules/Picard/2.22.0/IntervalListTools.nf' params(scatter_count:'500')
include IntervalListTools as PICARD_IntervalListTools from './NextflowModules/Picard/2.22.0/IntervalListTools.nf' params(scatter_count:"500", optional: "")
include HaplotypeCaller as GATK_HaplotypeCaller from './NextflowModules/GATK/3.8-1-0-gf15c1c3ef/HaplotypeCaller.nf' params(gatk_path: "$params.gatk_path", genome:"$params.genome", optional: "$params.gatk_hc_options")
include VariantFiltrationSnpIndel as GATK_VariantFiltration from './NextflowModules/GATK/3.8-1-0-gf15c1c3ef/VariantFiltration.nf' params(
gatk_path: "$params.gatk_path", genome:"$params.genome", snp_filter: "$params.gatk_snp_filter", snp_cluster: "$params.gatk_snp_cluster", indel_filter: "$params.gatk_indel_filter"
Expand Down Expand Up @@ -50,10 +49,9 @@ if (!ped_file.exists()) {

workflow {
// Mapping
BWA_MEM(fastq_files)
Sambamba_ViewSort(BWA_MEM.out)
BWAMapping(fastq_files)
Sambamba_MarkdupMerge(
Sambamba_ViewSort.out.map{
BWAMapping.out.map{
sample_id, rg_id, bam_file, bai_file -> [sample_id, bam_file]
}.groupTuple()
)
Expand Down Expand Up @@ -94,14 +92,12 @@ workflow {
Sambamba_Flagstat(Sambamba_Merge.out)
GetStatsFromFlagstat(Sambamba_Flagstat.out.collect())

MultiQC(
Channel.empty().mix(
FastQC.out.flatten().map{file -> [analysis_id, file]},
PICARD_CollectMultipleMetrics.out.flatten().map{file -> [analysis_id, file]},
PICARD_EstimateLibraryComplexity.out.map{file -> [analysis_id, file]},
PICARD_CollectHsMetrics.out.map{file -> [analysis_id, file]}
).groupTuple()
)
MultiQC(analysis_id, Channel.empty().mix(
FastQC.out.collect(),
PICARD_CollectMultipleMetrics.out.collect(),
PICARD_EstimateLibraryComplexity.out.collect(),
PICARD_CollectHsMetrics.out.collect()
))

TrendAnalysisTool(
GATK_CombineVariants.out.map{id, vcf_file, idx_file -> [id, vcf_file]}
Expand All @@ -111,7 +107,7 @@ workflow {
)

//SavePedFile
SavePedFile()
SavePedFile()

// Repository versions
VersionLog()
Expand Down Expand Up @@ -146,13 +142,13 @@ process ExonCov {
shell = ['/bin/bash', '-eo', 'pipefail']

input:
tuple analysis_id, sample_id, file(bam_file), file(bai_file)
tuple(analysis_id, sample_id, path(bam_file), path(bai_file))

script:
"""
source ${params.exoncov_path}/venv/bin/activate
python ${params.exoncov_path}/ExonCov.py import_bam --threads ${task.cpus} --overwrite --exon_bed ${params.dxtracks_path}/${params.exoncov_bed} ${analysis_id} ${bam_file}
"""
"""
source ${params.exoncov_path}/venv/bin/activate
python ${params.exoncov_path}/ExonCov.py import_bam --threads ${task.cpus} --overwrite --exon_bed ${params.dxtracks_path}/${params.exoncov_bed} ${analysis_id} ${bam_file}
"""
}

process ExomeDepth {
Expand All @@ -162,16 +158,16 @@ process ExomeDepth {
shell = ['/bin/bash', '-eo', 'pipefail']

input:
tuple analysis_id, sample_id, file(bam_file), file(bai_file), refset
tuple(analysis_id, sample_id, path(bam_file), path(bai_file), refset)

output:
tuple sample_id, refset, file("UMCU_${refset}_${sample_id}*.vcf"), file("HC_${refset}_${sample_id}*.vcf"), file("${sample_id}*.xml"), file("UMCU_${refset}_${sample_id}*.log"), file("HC_${refset}_${sample_id}*.log"), file("UMCU_${refset}_${sample_id}*.igv"), file("HC_${refset}_${sample_id}*.igv")
tuple(sample_id, refset, path("UMCU_${refset}_${sample_id}*.vcf"), path("HC_${refset}_${sample_id}*.vcf"), path("${sample_id}*.xml"), path("UMCU_${refset}_${sample_id}*.log"), path("HC_${refset}_${sample_id}*.log"), path("UMCU_${refset}_${sample_id}*.igv"), path("HC_${refset}_${sample_id}*.igv"))

script:
"""
source ${params.exomedepth_path}/venv/bin/activate
python ${params.exomedepth_path}/run_ExomeDepth.py callcnv ./ ${bam_file} ${analysis_id} ${sample_id} ${refset}
"""
"""
source ${params.exomedepth_path}/venv/bin/activate
python ${params.exomedepth_path}/run_ExomeDepth.py callcnv ./ ${bam_file} ${analysis_id} ${sample_id} ${refset}
"""
}


Expand All @@ -186,19 +182,19 @@ process Kinship {
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple analysis_id, file(vcf_file), file(vcf_index)
tuple(analysis_id, path(vcf_file), path(vcf_index))

output:
tuple analysis_id, file("${analysis_id}.kinship"), file("${analysis_id}.kinship_check.out")
tuple(analysis_id, path("${analysis_id}.kinship"), path("${analysis_id}.kinship_check.out"))

script:
"""
${params.vcftools_path}/vcftools --vcf ${vcf_file} --plink
${params.plink_path}/plink --file out --make-bed --noweb
${params.king_path}/king -b plink.bed --kinship
cp king.kin0 ${analysis_id}.kinship
python ${baseDir}/assets/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
"""
"""
${params.vcftools_path}/vcftools --vcf ${vcf_file} --plink
${params.plink_path}/plink --file out --make-bed --noweb
${params.king_path}/king -b plink.bed --kinship
cp king.kin0 ${analysis_id}.kinship
python ${baseDir}/assets/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
"""
}

process GetStatsFromFlagstat {
Expand All @@ -208,15 +204,15 @@ process GetStatsFromFlagstat {
shell = ['/bin/bash', '-euo', 'pipefail']

input:
file(flagstat_files: "*")
path(flagstat_files)

output:
file('run_stats.txt')
path('run_stats.txt')

script:
"""
python ${baseDir}/assets/get_stats_from_flagstat.py ${flagstat_files} > run_stats.txt
"""
"""
python ${baseDir}/assets/get_stats_from_flagstat.py ${flagstat_files} > run_stats.txt
"""
}

process CreateHSmetricsSummary {
Expand All @@ -226,15 +222,15 @@ process CreateHSmetricsSummary {
shell = ['/bin/bash', '-euo', 'pipefail']

input:
file(hsmetrics_files: "*")
path(hsmetrics_files)

output:
file('HSMetrics_summary.txt')
path('HSMetrics_summary.txt')

script:
"""
python ${baseDir}/assets/create_hsmetrics_summary.py ${hsmetrics_files} > HSMetrics_summary.txt
"""
"""
python ${baseDir}/assets/create_hsmetrics_summary.py ${hsmetrics_files} > HSMetrics_summary.txt
"""
}

process TrendAnalysisTool {
Expand All @@ -244,13 +240,13 @@ process TrendAnalysisTool {
shell = ['/bin/bash', '-eo', 'pipefail']

input:
tuple analysis_id, file(input_files: "*")
tuple(analysis_id, path(input_files))

script:
"""
source ${params.trend_analysis_path}/venv/bin/activate
python ${params.trend_analysis_path}/trend_analysis.py upload processed_data ${analysis_id} .
"""
"""
source ${params.trend_analysis_path}/venv/bin/activate
python ${params.trend_analysis_path}/trend_analysis.py upload processed_data ${analysis_id} .
"""
}

process SavePedFile {
Expand All @@ -259,12 +255,12 @@ process SavePedFile {
shell = ['/bin/bash', '-euo', 'pipefail']

output:
file("*.ped")
path("*.ped")

script:
"""
cp ${ped_file} ./
"""
"""
cp ${ped_file} ./
"""
}

process VersionLog {
Expand All @@ -274,23 +270,23 @@ process VersionLog {
shell = ['/bin/bash', '-eo', 'pipefail']

output:
file('repository_version.log')
path('repository_version.log')

script:
"""
echo 'DxNextflowWes' > repository_version.log
git --git-dir=${workflow.projectDir}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
"""
echo 'DxNextflowWes' > repository_version.log
git --git-dir=${workflow.projectDir}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'Dx_tracks' >> repository_version.log
git --git-dir=${params.dxtracks_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'Dx_tracks' >> repository_version.log
git --git-dir=${params.dxtracks_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'ExonCov' >> repository_version.log
git --git-dir=${params.exoncov_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'ExonCov' >> repository_version.log
git --git-dir=${params.exoncov_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'ExomeDepth' >> repository_version.log
git --git-dir=${params.exomedepth_path}/../.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'ExomeDepth' >> repository_version.log
git --git-dir=${params.exomedepth_path}/../.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'TrendAnalysis' >> repository_version.log
git --git-dir=${params.trend_analysis_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
"""
echo 'TrendAnalysis' >> repository_version.log
git --git-dir=${params.trend_analysis_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
"""
}
55 changes: 55 additions & 0 deletions WES_Fingerprint.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl=2

include extractBamFromDir from './NextflowModules/Utils/bam.nf'

// Fingerprint modules
include UnifiedGenotyper as GATK_UnifiedGenotyper from './NextflowModules/GATK/3.8-1-0-gf15c1c3ef/UnifiedGenotyper.nf' params(gatk_path: "$params.gatk_path", genome:"$params.genome", optional: "--intervals $params.dxtracks_path/$params.fingerprint_target --output_mode EMIT_ALL_SITES")

def bam_files = extractBamFromDir(params.bam_path)
def analysis_id = params.outdir.split('/')[-1]

workflow {
// GATK UnifiedGenotyper (fingerprint)
GATK_UnifiedGenotyper(bam_files)
}

// Workflow completion notification
workflow.onComplete {
// HTML Template
def template = new File("$baseDir/assets/workflow_complete.html")
def binding = [
runName: analysis_id,
workflow: workflow
]
def engine = new groovy.text.GStringTemplateEngine()
def email_html = engine.createTemplate(template).make(binding).toString()

// Send email
if (workflow.success) {
def subject = "WES Fingerprint Workflow Successful: ${analysis_id}"
sendMail(to: params.email, subject: subject, body: email_html)
} else {
def subject = "WES Fingerprint Workflow Failed: ${analysis_id}"
sendMail(to: params.email, subject: subject, body: email_html)
}
}

process VersionLog {
// Custom process to log repository versions
tag {"VersionLog ${analysis_id}"}
label 'VersionLog'
shell = ['/bin/bash', '-eo', 'pipefail']

output:
path('repository_version.log')

script:
"""
echo 'DxNextflowWes' > repository_version.log
git --git-dir=${workflow.projectDir}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
echo 'Dx_tracks' >> repository_version.log
git --git-dir=${params.dxtracks_path}/.git log --pretty=oneline --decorate -n 2 >> repository_version.log
"""
}
9 changes: 6 additions & 3 deletions assets/check_kinship.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ def parse_ped(ped_file):
return samples


def check_kinship(kinship_file, samples):
kinship_setting = [0.177, 0.354]
def check_kinship(kinship_file, samples, kinship_setting):
kinship_errors = False
print_kinship('sample_1', 'sample_2', 'kinship', 'related', 'type', 'status') # header
for line in kinship_file:
Expand Down Expand Up @@ -83,12 +82,16 @@ def print_kinship(sample_1, sample_2, kinship, fam_status, relation_status, kins
parser.add_argument('ped_file', type=argparse.FileType('r'), help='PED file')
arguments = parser.parse_args()

# settings
kinship_setting = [0.177, 0.354]

# Parse ped file and check kinship
samples = parse_ped(arguments.ped_file)
kinship_errors = check_kinship(arguments.kinship_file, samples)
kinship_errors = check_kinship(arguments.kinship_file, samples, kinship_setting)

# Print summary
if kinship_errors:
print("\n# WARNING: Kinship errors found.")
else:
print("\n# No kinship errors found.")
print("# Used kinship check settings: {0}".format(kinship_setting))
8 changes: 4 additions & 4 deletions run_nextflow_wes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ module load Java/1.8.0_60
if [ \$? -eq 0 ]; then
echo "Nextflow done."
echo "Running Nextflow clean"
/hpc/diaggen/software/tools/nextflow clean -f -k -q
echo "Zip work directory"
zip -r -m -q work.zip work
find work -type f | egrep "\.(command|exitcode)" | zip -@ -q work.zip
echo "Remove work directory"
rm -r work
echo "Creating md5sum"
find -type f -not -iname 'md5sum.txt' -exec md5sum {} \; > md5sum.txt
Expand Down
Loading

0 comments on commit daecc28

Please sign in to comment.