forked from jclev-uga/SWEEP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSWEEP.pl
154 lines (125 loc) · 6.6 KB
/
SWEEP.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env perl
use strict;
use warnings;
use threads;
no strict qw(subs refs);
use FindBin;
use lib ("$FindBin::Bin/../PerlLib");
use File::Basename;
use Cwd;
use Carp;
use Getopt::Long qw(:config no_ignore_case pass_through);
use threads;
use Data::Dumper;
my $usage = <<__EOUSAGE__;
######################################################################################################################
# #
# REQUIRED: #
# -b <string> sorted/indexed bam file of alignments #
# -g <string> indexed genome #
# -o <string> filtered vcf output name #
# #
# OPTIONAL: #
# -s <int> genotypic liklihood filtering stringency: default (0): 0 -> low, 1 -> medium, 2 -> high #
# -d <int> minimum read depth filter per genotype: default (4): 1 - 100 #
# -r <float> minimum ratio of alternate allele to reference allele: default (0): 0 - 2 #
# -w <int> window size in bp: default (100) should be <= read length for optimal quality #
# --no_cleanup does not delete intermediate vcf files: default (FALSE) #
# --ultimate performs ultimate filtering for all homozygous calls #
# checks all reads mapped to base for any alterate allele (will take longer) #
# REQUIRES Biopython with pysam module installed in your path!! #
# -vcf <string> Optional use an existing vcf file as input (bam files still need to be included) #
# -num_genotypes <int> if vcf file is used, enter number of genotypes #
# -h help (usage) #
######################################################################################################################
# #
# #
# To use this script there are certain requirements: #
# #
# (1) Samtools v0.1.9 and Bcftools v0.1.9 must be in your path -> Will not work with later versions #
# (2) To do this easily you can add the path in your shell script #
# (3) alignment Bam files must be sorted and indexed #
# (4) include as many bam files as you need separated with '-b' #
# (5) Example command line: #
# #
# perl SWEEP.pl -b gen1.sorted.bam -b gen2.sorted.bam -g genome.fa -o output.vcf -s 1 -d 5 -r 0.25 #
# #
######################################################################################################################
__EOUSAGE__
;
my @bamfile=();
my $genome;
my $output;
my $stringent=0;
my $filter=4;
my $lh;
my $ratio=0;
my $help_flag;
my $no_cleanup = 0;
my $ultimate = 0;
my $window = 100;
my $contingency = 0;
my $vcf = '';
my$vcfnum = 0;
&GetOptions ( 'h' => \$help_flag,
'b=s@' => \@bamfile,
'g=s' => \$genome,
'o=s' => \$output,
's:i' => \$stringent,
'd:i' => \$filter,
'r:f' => \$ratio,
'w:i' => \$window,
'no_cleanup' => \$no_cleanup,
'ultimate' => \$ultimate,
'vcf=s' => \$vcf,
'num_genotypes:i' => \$vcfnum
);
if (@ARGV) {
die "Error, don't understand arguments: @ARGV ";
}
if ($help_flag) { die $usage; }
open (STDERR, ">&STDOUT"); ## capturing stderr and stdout in a single stdout stream
main: {
if ($stringent == 0) {
$lh = 20; }
if ($stringent == 1) {
$lh = 125; }
if ($stringent == 2) {
$lh = 200; }
my $input_bam = join(' ', @bamfile);
my $length = 0+@bamfile;
my $depth = $filter*$length;
if ($vcf eq "") {
&process_cmd("samtools mpileup -u -f $genome $input_bam | bcftools view -bcv - > temp.raw.bcf");
&process_cmd("bcftools view temp.raw.bcf > prefilter.vcf");
&process_cmd("python scripts/Haplotype.py prefilter.vcf filter.vcf $length $window"); } else {
&process_cmd("python scripts/vcf.py $vcf filter.vcf $vcfnum $window");
}
if ($ultimate) {
&process_cmd("python scripts/Homozygous.py $genome filter.vcf ultimate.vcf '$input_bam'");
&process_cmd("python scripts/FilterDepth.py ultimate.vcf $output $depth $ratio $contingency"); }
unless ($ultimate) {
&process_cmd("python scripts/FindSNP.py filter.vcf stringent.vcf $lh $length");
&process_cmd("python scripts/FilterDepth.py stringent.vcf $output $depth $ratio $contingency"); }
unless ($no_cleanup) {
# unlink "temp.raw.bcf";
unlink "prefilter.vcf";
unlink "filter.vcf";
unlink "stringent.vcf";
}
unless ($no_cleanup) { if ($vcf eq '') { unlink "temp.raw.bcf"; }}
exit(0);
}
####
sub process_cmd {
my ($cmd) = @_;
print "CMD: $cmd\n";
my $start_time = time();
my $ret = system($cmd);
my $end_time = time();
if ($ret) {
die "Error, cmd: $cmd died with ret $ret";
}
print "CMD finished (" . ($end_time - $start_time) . " seconds)\n";
return;
}