-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapriori.py
147 lines (124 loc) · 3.1 KB
/
apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
items1 = [1,2,3,4,5]
Ts1 = [
[1,3,4],
[2,3,5],
[1,2,3,5],
[2,5]
]
items = range(1,10)
Ts = [
[1,2,5],
[2,4],
[2,3],
[1,2,4],
[1,3],
[2,3],
[1,3],
[1,2,3,5],
[1,2,3]
]
s1 = int(0.5 * len(Ts))
s = 2
print "Transactions: {}. Probability thresh: {}".format(Ts, s)
def init_apriori(Ts, s):
#return F0
F0 = []
counts = [0] * len(items)
for tran in Ts:
for i, item in enumerate(items):
if item in tran:
counts[i] += 1
for i, c in enumerate(counts):
if c >= s:
F0.append([items[i]])
return F0
import copy
def can_prune(candidate, cur_F):
for i, item in enumerate(candidate):
tmp = copy.deepcopy(candidate)
tmp.pop(i)
if tmp not in cur_F:
return True
return False
def gen_apriori_set(cur_F):
C = []
#gen and prune
# gen
for f in cur_F :
for i in items :
tmp = copy.deepcopy(f)
if i not in f:
tmp.append(i)
#tmp = sorted(tmp)
if not can_prune(tmp, cur_F):#prune
C.append(tmp)
return C
def can_join(f1, f2):
assert len(f1) == len(f2)
for i in range(len(f1)-1):
if f1[i] != f2[i]:
return False
if f1[len(f1)-1] < f2[len(f1)-1]:
return True
return False
def join(f1, f2):
assert len(f1) == len(f2)
re = []
for i in range(len(f1)-1):
re.append(f1[i])
re.append(f1[len(f1)-1])
re.append(f2[len(f2) - 1])
return re
def gen_apriori_set1(cur_F):
C = []
#gen and prune
# gen
for f in cur_F :
for f1 in cur_F:
if can_join(f, f1):
tmp = join(f, f1)
#tmp = sorted(tmp)
if not can_prune(tmp, cur_F):#prune
C.append(tmp)
return C
def is_in_transaction(s, tran):
for item in s:
if item not in tran:
return False
return True
def apriori_algo(Ts, s, optim=True):
# init F0
F0 = init_apriori(Ts, s)
print F0
Fs = [F0]
cur_f = F0
while len(cur_f) > 0:
#Gen apriori set K + 1: Ck+1 (Candidate)
if optim :
C = gen_apriori_set1(cur_f) #Candidate
else:
C = gen_apriori_set(cur_f)
print "Candidate: ",C
counts_c = [0]*len(C)
for t in Ts:
for i, candidate in enumerate(C) :
#count(f1)++ | fi in Ck+1
if is_in_transaction(candidate, t):
counts_c[i] += 1
# delete count(fi) < s
for i in range(len(C)):
if counts_c[i] < s:
C[i] = None
cur_f = []
for i in C:
if i is not None :
cur_f.append(i)
if cur_f != [] :
Fs.append(cur_f)
print "cur_f", cur_f
#return {Fi|i:1->k}
return Fs
if __name__ == "__main__":
print apriori_algo(Ts, s, False)
print "------------------------"
print apriori_algo(Ts, s)