-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeneratengrams.py
71 lines (60 loc) · 1.87 KB
/
generatengrams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
limit = 3
def ngrams(lines):
global limit
ngrams = []
for i in range(1, limit+1):
ndict = {}
for line in lines:
nline = ['<START>']*i + line + ['<END>']*i
for x in range(len(nline)- i) :
key = '_'.join(nline[x:x+i])
if key in ndict.keys():
ndict[key] += 1
else:
ndict[key] = 1
ngrams += [ndict]
return ngrams
def cleanLines(lines):
for i in range(len(lines)):
lines[i] = lines[i][:-1].split()
for x in range(len(lines[i])):
lines[i][x] = lines[i][x].lower()
return lines
def score(uinput, tngramsdict):
global limit
scores = []
uinput = [uinput.lower().split()]
cur_ngramsdict = ngrams(uinput)
for key in tngramsdict:
ngramsdict = tngramsdict[key]
fscore = 0.0
for i in range(len(cur_ngramsdict)):
cur_dict = cur_ngramsdict[i]
ansdict = ngramsdict[i]
precision = 0
for i in cur_dict.keys():
if i in ansdict.keys():
precision+=1
recall = 0
for i in ansdict.keys():
if i in cur_dict.keys():
recall+=1
fscore += 1.0/float((len(ansdict.keys())/float(precision) + len(ansdict.keys())/float(recall)))
scores+= [(key,fscore)]
return scores
def init():
ngramsdict = {}
path = './intents/'
for fil in os.listdir(path):
if fil.endswith('.dat'):
with open(path + fil) as f:
lines = f.readlines()
lines = cleanLines(lines)
ngramsdict[''.join(fil.split('.')[:-1])] = ngrams(lines)
return ngramsdict
def ngrammatch(uinput):
ngramsdict = init()
scores = score(uinput, ngramsdict)
#print scores
return scores