-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathtest.py
67 lines (55 loc) · 1.82 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
def read_fasta(fname):
seqs = []
seq = []
name = ""
namemap = {}
namelist = []
idx = 0
for line in open(fname, "r"):
if line[0] == ">":
if len(seq) > 0:
seqs.append("".join(seq))
namemap[name] = idx
namelist.append(name)
seq = []
idx += 1
name = line.lstrip(">").split()[0].rstrip()
else: seq.append(line.rstrip())
if len(seq) > 0:
seqs.append("".join(seq))
namemap[name] = idx
namelist.append(name)
return seqs, namemap, namelist
base_for = "ACGT"
base_rev = "TGCA"
comp_tab = str.maketrans(base_for, base_rev)
def revcomp(s): return s.translate(comp_tab)[::-1]
correct = 0
incorrect = 0
fname = sys.argv[1]
ksize = int(sys.argv[2])
seqs, namemap, namelist = read_fasta(fname)
with open("B.mtx", "r") as f:
next(f)
next(f)
for line in f.readlines():
tokens = tuple(int(v) for v in line.rstrip().split())
idxQ = tokens[0]
idxT = tokens[1]
if idxQ == idxT: continue
seqQ = seqs[idxQ]
seqT = seqs[idxT]
begQs = (tokens[2], tokens[4])
begTs = (tokens[3], tokens[5])
seedQs = tuple(seqQ[b:b+ksize] for b in begQs)
seedTs = tuple(seqT[b:b+ksize] for b in begTs)
if seedQs[0] != seedTs[0] and seedQs[0] != revcomp(seedTs[0]):
incorrect += 1
sys.stdout.write("{}\t{}\t{}\t{}\tno_match\n".format(idxQ, idxT, begQs[0], begTs[0]))
else: correct += 1
if seedQs[1] != seedTs[1] and seedQs[1] != revcomp(seedTs[1]):
incorrect += 1
sys.stdout.write("{}\t{}\t{}\t{}\tno_match\n".format(idxQ, idxT, begQs[1], begTs[1]))
else: correct += 1
print("correct={}\nincorrect={}\n".format(correct, incorrect))