-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmostFreq.py
38 lines (26 loc) · 861 Bytes
/
mostFreq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import sys
filedata = open(sys.argv[1]).read().split()
def mostFreq(text,k):
#given a DNA string text and an integer k, find all most frequent k-mers in text
#generate list of all kmers in text
kmerList = []
for i in range(len(text)-k+1):
kmerList.append(text[i:i+k])
#get the kmer counts
kmerCounts = {}
for kmer in kmerList:
kmerCounts[kmer] = kmerCounts.get(kmer,0) + 1
#identify most frequent kmers
maxCount = max(kmerCounts.values())
mostFreqKmers = [kmer for kmer,val in kmerCounts.items() if val == maxCount];
return mostFreqKmers
text = filedata[0]
k = int(filedata[1])
mostFreqKmers = mostFreq(text,k)
#print output to new file and open
fnew = 'ANS_'+sys.argv[1]
fh = open(fnew,'w')
fh.write(' '.join(mostFreqKmers))
fh.close()
import webbrowser
webbrowser.open(fnew)