Skip to content

Commit

Permalink
Merge pull request #261 from TDMedina/release_version
Browse files Browse the repository at this point in the history
WIP updates to the WES pipeline in preparation for future Gearshift deployment.
  • Loading branch information
RoanKanninga authored Oct 31, 2019
2 parents a148d0a + a29d625 commit 1adae1f
Show file tree
Hide file tree
Showing 45 changed files with 1,055 additions and 548 deletions.
Binary file added docs/attachments/NGS_DNA_Beta4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 9 additions & 2 deletions parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ caddVersion,CADD/v1.3
convadingVersion,CoNVaDING/1.1.6
cutadaptVersion,cutadapt/1.13-${toolChain}-Python-2.7.10
fastqcVersion,FastQC/0.11.5-Java-1.8.0_74
gatkVersion,GATK/3.7-Java-1.8.0_74
gatkVersion,GATK/4.1.2.0-Java-1.8.0_144-unlimited_JCE
gavinPlusVersion,gavin-plus/1.5.0-Java-1.8.0_74
hashdeepVersion,hashdeep/4.4-foss-2015b
iolibVersion,io_lib/1.14.9-${toolChain}
Expand Down Expand Up @@ -39,7 +39,7 @@ vepVersion,VEP/90.5
verifyBamIDVersion,verifyBamID/1.1.3-${toolChain}
xhmmVersion,xhmm/2016-01-04-cc14e528d909-${toolChain}
hpoVersion,90
gatkJar,GenomeAnalysisTK.jar
gatkJar,gatk-package-4.1.2.0-local.jar
gavinPlusJar,gavin-plus-1.5.0-RELEASE.jar
picardJar,picard.jar
sambambaTool,sambamba_${sambambaVer}
Expand Down Expand Up @@ -139,6 +139,9 @@ alignedSortedBamIdx,${fileWithIndexId}.sorted.bam.bai
sampleMergedBam,${sampleNameID}.merged.bam
sampleMergedBai,${sampleNameID}.merged.bai
sampleMergedBamIdx,${sampleNameID}.merged.bam.bai
sampleMergedRecalibratedBam,${sampleNameID}.merged.recalibrated.bam
sampleMergedRecalibratedBai,${sampleNameID}.merged.recalibrated.bai
sampleMergedRecalibratedBamIdx,${sampleNameID}.merged.recalibrated.bam.bai
dedupBam,${projectResultsDir}/alignment/${externalSampleID}.merged.dedup.bam
dedupBamIdx,${projectResultsDir}/alignment/${externalSampleID}.merged.dedup.bam.bai
dedupBamMetrics,${projectResultsDir}/qc/statistics/${externalSampleID}.merged.dedup.bam
Expand Down Expand Up @@ -222,6 +225,8 @@ projectVariantCallsSnpEff_ExAC_GoNL_CADD_Annotated,${projectPrefix}.batch-${batc
projectVariantCallsSnpEff_ExAC_GoNL_CADD_GATK_Annotated,${projectPrefix}.batch-${batchID}.variant.calls.snpeff.exac.gonl.cadd.gatk.vcf
sampleVariantsMergedIndelsVcf,${sampleNameID}.annotated.indels.vcf
sampleVariantsMergedSnpsVcf,${sampleNameID}.annotated.snps.vcf
projectVariantsIndelsOnlyVcf,${projectPrefix}.annotated.indels.vcf
projectVariantsSnpsOnlyVcf,${projectPrefix}.annotated.snps.vcf
projectVariantsMerged,${projectPrefix}.variant.calls.GATK.vcf
projectVariantsMergedIdx,${projectPrefix}.variant.calls.GATK.vcf.idx
projectVariantsMergedSorted,${projectPrefix}.variant.calls.GATK.sorted.vcf
Expand Down Expand Up @@ -252,6 +257,8 @@ projectVariantCallsVEP_Annotated,${projectPrefix}.batch-${batchID}.variant.calls
### 25, 26
sampleVariantsMergedIndelsFilteredVcf,${sampleNameID}.annotated.filtered.indels.vcf
sampleVariantsMergedSnpsFilteredVcf,${sampleNameID}.annotated.filtered.snps.vcf
projectVariantsIndelsOnlyFilteredVcf,${projectPrefix}.annotated.filtered.indels.vcf
projectVariantsSnpsOnlyFilteredVcf,${projectPrefix}.annotated.filtered.snps.vcf
sampleFinalVcf,${intermediateDir}/${externalSampleID}.final.vcf
projectFinalVcf,${projectPrefix}.final.vcf

Expand Down
7 changes: 7 additions & 0 deletions parameters_gearshift.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
queue,ll
prmName,prm01
prmHost,localhost
root,/
appsDir,${root}/apps/
toolChain_max,foss-2018b
toolChain_min,GCCcore-7.3.0
407 changes: 407 additions & 0 deletions parameters_new.csv

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions parameters_resources_exome.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mem_protocols_AnnotateVcf,4gb
mem_protocols_ApplyBaseRecalibration,12gb
mem_protocols_Autotest,4gb
mem_protocols_BaseRecalibrator,10gb
mem_protocols_BwaAlignAndSortSam,13gb
Expand Down Expand Up @@ -28,7 +29,7 @@ mem_protocols_Manta,5gb
mem_protocols_MarkDuplicates,10gb
mem_protocols_MergeBam,2gb
mem_protocols_MergeBatches,6gb
mem_protocols_MergeIndelsAndSnpsSample,4gb
mem_protocols_SplitVcfBySample,4gb
mem_protocols_MergeIndelsAndSnpsProject,4gb
mem_protocols_MultiQC,5gb
mem_protocols_PrepareFastQ,1gb
Expand All @@ -41,6 +42,7 @@ mem_protocols_VcfToTable,1gb
mem_protocols_XHMM,4gb
ppn_protocols_AnnotateVcf,4
ppn_protocols_Autotest,1
ppn_protocols_ApplyBaseRecalibration,4
ppn_protocols_BaseRecalibrator,8
ppn_protocols_BwaAlignAndSortSam,4
ppn_protocols_CartegeniaFiltering,2
Expand Down Expand Up @@ -70,7 +72,7 @@ ppn_protocols_MarkDuplicates,5
ppn_protocols_MergeBam,10
ppn_protocols_MergeBatches,2
ppn_protocols_MergeIndelsAndSnpsProject,1
ppn_protocols_MergeIndelsAndSnpsSample,1
ppn_protocols_SplitVcfBySample,1
ppn_protocols_MultiQC,1
ppn_protocols_PrepareFastQ,4
ppn_protocols_SnpEff,2
Expand All @@ -81,6 +83,7 @@ ppn_protocols_VariantGenotyping,2
ppn_protocols_VcfToTable,1
ppn_protocols_XHMM,1
walltime_protocols_AnnotateVcf,05:59:00
walltime_protocols_ApplyBaseRecalibration,05:59:00
walltime_protocols_Autotest,05:59:00
walltime_protocols_BaseRecalibrator,05:59:00
walltime_protocols_BwaAlignAndSortSam,05:59:00
Expand Down Expand Up @@ -111,7 +114,7 @@ walltime_protocols_MarkDuplicates,05:59:00
walltime_protocols_MergeBam,05:59:00
walltime_protocols_MergeBatches,05:59:00
walltime_protocols_MergeIndelsAndSnpsProject,05:59:00
walltime_protocols_MergeIndelsAndSnpsSample,05:59:00
walltime_protocols_SplitVcfBySample,05:59:00
walltime_protocols_MultiQC,05:59:00
walltime_protocols_PrepareFastQ,05:59:00
walltime_protocols_SnpEff,05:59:00
Expand Down
42 changes: 42 additions & 0 deletions protocols/ApplyBaseRecalibration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#Parameter mapping
#string groupname
#string tmpName
#string tmpDataDir
#string tempDir
#string intermediateDir
#string projectResultsDir
#string logsDir

#string gatkVersion
#string indexFile

#string project

#string externalSampleID
#string dedupBam

#string mergedBamRecalibratedTable
#string sampleMergedRecalibratedBam


#Load GATK module.
module load "${gatkVersion}"
module list

# Make a tmp folder for this step, which will be the output location.
makeTmpDir "${sampleMergedRecalibratedBam}" "${intermediateDir}"
tmpSampleMergedRecalibratedBam="${MC_tmpFile}"

# Create the list of BAM files for input.
bams=($(printf '%s\n' "${dedupBam[@]}" | sort -u ))
inputs=$(printf -- '--input=%s ' $(printf '%s\n' "${bams[@]}"))

gatk --java-options "-XX:ParallelGCThreads=1 -Djava.io.tmpdir=${tempDir} -Xmx9g" ApplyBQSR \
--reference="${indexFile}" \
${inputs} \
--bqsr-recal-file="${mergedBamRecalibratedTable}" \
--output="${tmpSampleMergedRecalibratedBam}"

mv "${tmpSampleMergedRecalibratedBam}" "${sampleMergedRecalibratedBam}"
mv "${tmpSampleMergedRecalibratedBam%.bam}.bai" "${sampleMergedRecalibratedBam%.bam}.bai"
echo "moved ${tmpSampleMergedRecalibratedBam} ${sampleMergedRecalibratedBam}"
21 changes: 8 additions & 13 deletions protocols/BaseRecalibrator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@
#string groupname
#string tmpDataDir
#string gatkVersion
#string gatkJar
#string dbSnp
#string sampleMergedBam
#string sambambaVersion
#string sambambaTool
#string mergedBamRecalibratedTable

module load "${gatkVersion}"
Expand All @@ -36,25 +34,22 @@ array_contains () {
return $in
}

INPUTS=()
for bamFile in "${sampleMergedBam[@]}"
do
array_contains INPUTS "$bamFile" || INPUTS+=("-I $bamFile") # If bamFile does not exist in array add it
array_contains INPUTBAMS "$bamFile" || INPUTBAMS+=("-I $bamFile") # If bamFile does not exist in array add it
array_contains INPUTS "--input=${bamFile}" || INPUTS+=("--input=${bamFile}") # If bamFile does not exist in array add it
done

makeTmpDir "${mergedBamRecalibratedTable}" "${intermediateDir}"
tmpMergedBamRecalibratedTable="${MC_tmpFile}"

"${sambambaTool}" index "${sampleMergedBam}"
sambamba index "${sampleMergedBam}"


java -XX:ParallelGCThreads=7 -Djava.io.tmpdir="${tempDir}" -Xmx9g -jar "${EBROOTGATK}/${gatkJar}" \
-T BaseRecalibrator \
-R "${indexFile}" \
${INPUTS[@]} \
-nct 8 \
-knownSites "${dbSnp}" \
-o "${tmpMergedBamRecalibratedTable}"
gatk --java-options "-XX:ParallelGCThreads=7 -Djava.io.tmpdir=${tempDir} -Xmx9g" BaseRecalibrator \
--reference="${indexFile}" \
${INPUTS[@]} \
--known-sites="${dbSnp}" \
--output="${tmpMergedBamRecalibratedTable}"

mv "${tmpMergedBamRecalibratedTable}" "${mergedBamRecalibratedTable}"
echo "moved ${tmpMergedBamRecalibratedTable} ${mergedBamRecalibratedTable}"
26 changes: 12 additions & 14 deletions protocols/BwaAlignAndSortSam.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ set -o pipefail
#string intermediateDir
#string filePrefix
#string alignedSortedBam
#string picardVersion
#string picardJar
#string cutadaptVersion
#string gatkVersion

makeTmpDir "${alignedSam}"
tmpAlignedSam="${MC_tmpFile}"
Expand All @@ -34,7 +32,7 @@ tmpAlignedSortedBam="${MC_tmpFile}"

#Load module BWA
module load "${bwaVersion}"
module load "${picardVersion}"
module load "${gatkVersion}"
module list

READGROUPLINE="@RG\tID:${filePrefix}\tPL:illumina\tLB:${externalSampleID}\tSM:${externalSampleID}"
Expand All @@ -56,11 +54,11 @@ then
"${fastq2}" \
> "${tmpAlignedSam}" &

java -Djava.io.tmpdir="${tempDir}" -Xmx12G -XX:ParallelGCThreads=2 -jar "${EBROOTPICARD}/${picardJar}" SortSam \
INPUT="${tmpAlignedSam}" \
OUTPUT="${tmpAlignedSortedBam}" \
SORT_ORDER=coordinate \
CREATE_INDEX=true
gatk --java-options "-Djava.io.tmpdir=${tempDir} -Xmx12G -XX:ParallelGCThreads=2" SortSam \
--INPUT="${tmpAlignedSam}" \
--OUTPUT="${tmpAlignedSortedBam}" \
--SORT_ORDER coordinate \
--CREATE_INDEX=true

echo "moving ${tmpAlignedSortedBam} ${alignedSortedBam}"
mv "${tmpAlignedSortedBam}" "${alignedSortedBam}"
Expand All @@ -80,11 +78,11 @@ else
"${srBarcodeRecodedFqGz}" \
> "${tmpAlignedSam}" &

java -Djava.io.tmpdir="${tempDir}" -Xmx12G -XX:ParallelGCThreads=2 -jar "${EBROOTPICARD}/${picardJar}" SortSam \
INPUT="${tmpAlignedSam}" \
OUTPUT="${tmpAlignedSortedBam}" \
SORT_ORDER=coordinate \
CREATE_INDEX=true
gatk --java-options "-Djava.io.tmpdir=${tempDir} -Xmx12G -XX:ParallelGCThreads=2" SortSam \
--INPUT="${tmpAlignedSam}" \
--OUTPUT="${tmpAlignedSortedBam}" \
--SORT_ORDER coordinate \
--CREATE_INDEX=true

echo "moving ${tmpAlignedSortedBam} ${alignedSortedBam}"
mv "${tmpAlignedSortedBam}" "${alignedSortedBam}"
Expand Down
17 changes: 8 additions & 9 deletions protocols/CartegeniaFiltering.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,14 @@ then
## make bedfile for only getting the variants of interest out of the big vcf
grep -v '^#' "${outputStep9_1ToSpecTree}" | awk 'BEGIN {OFS="\t"}{print $1,($2-1),$2,"GENE"}' > "${name}.allVariants.bed"

java -jar ${EBROOTGATK}/GenomeAnalysisTK.jar \
-T SelectVariants \
-R "${indexFile}" \
-V "${projectPrefix}.final.vcf" \
-o "${name}.InclAllelesParents.vcf" \
-sn "${child}" \
-sn "${father}" \
-sn "${mother}" \
-L "${name}.allVariants.bed"
gatk SelectVariants \
--reference="${indexFile}" \
--variant="${projectPrefix}.final.vcf" \
--output="${name}.InclAllelesParents.vcf" \
--sample-name="${child}" \
--sample-name="${father}" \
--sample-name="${mother}" \
--intervals="${name}.allVariants.bed"

## removing unnecessary information => keep only GT field
bcftools annotate -x ^FORMAT/GT "${name}.InclAllelesParents.vcf" | awk -v ch=${childPos} -v fa=${fatherPos} -v mo=${motherPos} 'BEGIN {OFS="\t"}{if ($0 !~ /^#/){split($ch,a,"/");split($fa,b,"/");split($mo,c,"/"); print $1,$2,$3,$4,$5,$6,$7,a[1],a[2],b[1],b[2],c[1],c[2],$8}}' | sort -V > "${name}.splittedAlleles.txt"
Expand Down
Loading

0 comments on commit 1adae1f

Please sign in to comment.