-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgatk_realign.scr
executable file
·106 lines (83 loc) · 2.39 KB
/
gatk_realign.scr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/sh
#$ -cwd
# -l mem=8G,time=16::
# This is the first step in the pipeline. It performs realignment of the bam file given as input.
GLOBAL="global_config.sh"
if [[ -e $GLOBAL ]]
then
. $GLOBAL
else
echo "Global config file not found. Exiting."
exit 1
fi
USAGE="Usage: $0 -I <Input bam file> -R <Reference fasta> -D <DBSNP file> -L \"#:#-#\""
while getopts I:L:R:D:h o
do case "$o" in
I) INP="$OPTARG";;
L) CHR="$OPTARG";;
R) REF="$OPTARG";;
D) DBSNP="$OPTARG";;
h) echo $USAGE
exit 1;;
esac
done
# Enforcing the need for providing chromosome list as input due to amount of
# time taken to run the step. Realignment will not be permitted without that input.
# To remove the restriction, change this line below.
if [[ $INP == "" || $REF == "" || $DBSNP == "" || $CHR == "" && $SGE_TASK_ID == "" ]]
then
echo $USAGE
exit 1
fi
DATAPATH=`dirname "$INP"`
CHR_NAME=`$SAMTOOLS idxstats $INP | grep chr`
if [[ $CHR == "" ]]
then
CHR="${SGE_TASK_ID}"
fi
if [[ `echo "$CHR" | grep chr` == "" && -n $CHR_NAME ]]
then
CHR="chr${CHR}"
fi
$GATK \
-L $CHR \
-T RealignerTargetCreator \
-I $INP \
-R $REF \
-D $DBSNP \
-o $INP$CHR.forRealigner.intervals 2>&1 > $DATAPATH/realignment$CHR.output
if [[ $? != 0 || `grep "$ERRORMESSAGE" "$DATAPATH"/realignment"$CHR".output "$DATAPATH"/pipeline.output` != "" ]]
then
echo "${CHR}RealignerTargetCreator FAILED"
exit 1
fi
$GATK \
-L $CHR \
-I $INP \
-R $REF \
-D $DBSNP \
-T IndelRealigner \
-compress 0 \
-targetIntervals $INP$CHR.forRealigner.intervals \
-o $INP$CHR.cleaned.bam 2>&1 >> $DATAPATH/realignment$CHR.output
# Need to implement for known-only/lane level cleaning?
rm -f $INP$CHR.forRealigner.intervals
if [[ $? != 0 || `grep "$ERRORMESSAGE" "$DATAPATH"/realignment"$CHR".output "$DATAPATH"/pipeline.output` != "" ]]
then
echo "${CHR}IndelRealigner FAILED"
exit 1
fi
$FIXMATE INPUT=$INP$CHR.cleaned.bam OUTPUT=$INP$CHR.fixed.bam SO=coordinate VALIDATION_STRINGENCY=SILENT 2>&1 >> $DATAPATH/realignment$CHR.output
rm -f $INP$CHR.cleaned.bam
if [[ $? != 0 ]]
then
echo "${CHR}Realigning: Call to FixMate FAILED"
exit 1
fi
$SAMTOOLS index $INP$CHR.fixed.bam 2>&1 >> $DATAPATH/realignment$CHR.output
if [[ $? != 0 ]]
then
echo "${CHR}Realigning: Samtools indexing FAILED"
exit 1
fi
echo "${CHR}: samtools index complete"