forked from jclev-uga/SWEEP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf.py
115 lines (89 loc) · 3.38 KB
/
vcf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import sys
import re
##usage: python haplotypeSNPs.py input.txt output.txt
def main():
infile = open(sys.argv[1], "rU")
outfile = open(sys.argv[2], "w")
num = int(sys.argv[3])
win = int(sys.argv[4])
file = {}
x = 0
for line in infile:
line = line.strip()
if line[0].startswith('#'):
outfile.write(line)
outfile.write('\n')
continue
file[x] = line
x = x + 1
for line in range(x-2):
result = window(file[line],file[line+1],num,win)
if result == 0:
continue
elif result == 1:
outfile.write(file[line])
outfile.write('\n')
elif result == 2:
outfile.write(file[line+1])
outfile.write('\n')
infile.close()
outfile.close()
def window(first,second,num,win):
one=re.findall('(\d+\,\d+\,\d+)', first)
two=re.findall('(\d+\,\d+\,\d+)', second)
first=first.split()
second=second.split()
if first[0] == second[0]:
span = (int(second[1]) - int(first[1]))
if span <= win:
genotype1 = get_genotype(one,len(one))
genotype2 = get_genotype(two,len(two))
if genotype1 == 0 or genotype2 == 0:
return 0
if len(one) < num or len(two) < num:
return 0
if len(one) > num or len(two) > num:
return 0
for x in range(len(one)):
if (genotype1[x] == genotype2[x]):
continue
else:
if genotype1[x] == -1 or genotype2[x] == -1:
continue
if genotype1[x] == 1:
if genotype2[x] == 3:
continue
decision = 1
for i in range(len(one)):
if genotype2[i] != 2:
decision = 0
if decision == 1:
return 1
if genotype2[x] == 1:
if genotype1[x] == 3:
continue
decision = 1
for i in range(len(one)):
if genotype1[i] != 2:
decision = 0
if decision == 1:
return 2
return 0
def get_genotype(line, number):
genotype = []
for x in range(number):
prob = line[x].split(',')
if len(prob) > 3:
return 0
if int(prob[0]) == 0 and int(prob[1]) >= 0 and int(prob[2]) > 0:
genotype.append(1)
if int(prob[0]) > 0 and int(prob[1]) == 0 and int(prob[2]) > 0:
genotype.append(2)
if int(prob[0]) > 0 and int(prob[1]) >= 0 and int(prob[2]) == 0:
genotype.append(3)
if int(prob[0]) == 0 and int(prob[1]) >= 0 and int(prob[2]) == 0:
genotype.append(-1)
if int(prob[0]) > 0 and int(prob[1]) > 0 and int(prob[2]) > 0:
genotype.append(-1)
return genotype
main()