-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFuzzyCMean.py
308 lines (229 loc) · 7.87 KB
/
FuzzyCMean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
# Developed By
# Khashayar Azerakhsh
# SMBH - SmbHosseini.ir
import copy
import math
import random
import time
import matplotlib.pyplot as plt
import pandas
"""
Doc
m is fuzzy partition matrix exponent for controlling the degree of fuzzy overlap. m > 1 and typically m = 2
||*|| Similarity Level (Distance)
Cluster count as C
Xk is k'th sample
V - Center of Cluster
Uik - Similarity level of i'th sample in k'th cluster
U is a Matrix from Uik {0, 1} - sum of columns has to equals to 1
Algorithm Levels
1. initialize m, C, U -> guessing first clusters
2. Calculate Center of Clusters (V)
3. Calculate U from Clusters in level 2
4. if UI+1-UI||€e|| End Algorithm else go to level 2
"""
# ===== Mapper =====
def MapData(file):
"""
import the data into a list form a file name passed as an argument.
The file should only the data separated by a space.(or change the delimiter as required in split)
"""
data = []
f = open(str(file), 'r')
for line in f:
current = line.split()
for j in range(len(current)):
current[j] = float(current[j])
data.append(current)
print("Data Imported")
return data
def MapExcelData(file):
"""
Import data from Excel Or CSV Files
"""
# If Excel had multiple sheets
# xl = pandas.ExcelFile(data)
# print(xl.sheet_names)
# tmp = xl.parse('Sheet1')
# CSV Files
# csv = pandas.read_csv(data, header=None, sep=' ', dtype={0:float, 1:float})
# Excel Files
# data = pandas.read_excel(file, header=None, dtype={0: float, 1: float})
data = pandas.read_excel(file, dtype={0: float, 1: float})
DataList = data.get_values()
print("Data Imported")
return DataList
# Shuffle function for Reducer
def ShuffleData(data):
"""
randomise data, and also keeps record of the order of randomisation.
"""
order = list(range(len(data)))
random.shuffle(order)
new_data = [[] for _ in range(len(data))]
for index in range(len(order)):
new_data[index] = data[order[index]]
return new_data, order
# Sort to Original for Reducer
def DeShuffleData(data, order):
"""
return the original order of the data,
pass the order list returned in ShuffleData() as an argument
"""
new_data = [[] for _ in range(len(data))]
for index in range(len(order)):
new_data[order[index]] = data[index]
return new_data
def EndCondition(U, U_old):
"""
This is the end conditions, it happens when the U matrix
stops changing too much with successive iterations.
"""
# used for end condition
Epsilon = 0.00001
for i in range(len(U)):
for j in range(len(U[0])):
if abs(U[i][j] - U_old[i][j]) > Epsilon:
return False
return True
# Similarity Matrix in cluster numbers range
def Initialise_U(data, cluster_number):
"""
randomise U such that the rows add up to 1. it requires a global MAX.
"""
# used for randomising U
MAX = 10000.0
U = []
for _ in range(len(data)):
current = []
rand_sum = 0.0
for _ in range(cluster_number):
tmp = random.randint(1, int(MAX))
current.append(tmp)
rand_sum += tmp
for j in range(cluster_number):
current[j] = current[j] / rand_sum
U.append(current)
return U
def Distance(point, center):
"""
calculate the Distance between 2 points (taken as a list). We are referring to Euclidean Distance.
"""
if len(point) != len(center):
return -1
tmp = 0.0
for i in range(len(point)):
tmp += abs(point[i] - center[i]) ** 2
return math.sqrt(tmp)
# Set U's members to 1 & 0
def normalise_U(U):
"""
This de-fuzzifies the U, at the end of the clustering. It would assume that the point
is a member of the cluster who's membership is maximum.
"""
for i in range(len(U)):
maximum = max(U[i])
for j in range(len(U[0])):
U[i][j] = 0 if U[i][j] != maximum else 1
return U
# Graphical show
def color(cluster_number):
return ["#%06x" % random.randint(0, 0xFFFFFF) for _ in range(cluster_number)]
def animate(data, U, cluster_number, colors):
# plt.close("all")
cluster = [[] for _ in range(cluster_number)]
for i in range(len(U)):
for j in range(cluster_number):
if U[i][j] == 1:
cluster[j].append(data[i])
plt.ion()
plt.figure()
for i in range(cluster_number):
x_list_0 = [x for [x, y] in cluster[i]]
y_list_0 = [y for [x, y] in cluster[i]]
plt.plot(x_list_0, y_list_0, colors[i], marker='o', ls='')
plt.gca().set_aspect('equal', adjustable='box')
plt.axis('equal')
plt.show(block=False)
time.sleep(.5)
def FuzzyReducer(data, cluster_number, m=2):
"""
This is the main function, it would calculate the required center,
and return the final normalised membership matrix U.
"""
# ===== Mapper ======
# initialise the U matrix:
U = Initialise_U(data, cluster_number)
print(f"Cluster Numbers = {str(cluster_number)}")
# Graphical Show
colors = color(cluster_number)
plt.axis([0, 15, 0, 15])
plt.ion()
plt.show(block=False)
# animate(data, normalise_U(copy.deepcopy(U)), cluster_number, colors)
# --------- Reducer -----------
# initialise the loop
while True:
# create a copy of it, to check the end conditions
U_old = copy.deepcopy(U)
# cluster center vector
C = []
for j in range(cluster_number):
current_cluster_center = []
for i in range(len(data[0])): # this is the number of dimensions
_sum_num = 0.0
_sum_tmp = 0.0
for k in range(len(data)):
_sum_num += (U[k][j] ** m) * data[k][i]
_sum_tmp += (U[k][j] ** m)
current_cluster_center.append(_sum_num / _sum_tmp)
C.append(current_cluster_center)
# print('Cluster :', C)
# creating a Distance vector, useful in calculating the U matrix.
Distance_matrix = []
for i in range(len(data)):
current = [Distance(data[i], C[j]) for j in range(cluster_number)]
Distance_matrix.append(current)
# print('Distance Matrix : ', Distance_matrix)
# update U vector
for j in range(cluster_number):
for i in range(len(data)):
tmp = 0.0
for k in range(cluster_number):
tmp += (Distance_matrix[i][j] / Distance_matrix[i][k]) ** (2 / (m - 1))
U[i][j] = 1 / tmp
# print('Update U : ', U)
# Show graphical Change in every single update
# animate(data, normalise_U(copy.deepcopy(U)), cluster_number, colors)
if EndCondition(U, U_old):
print("finished clustering")
break
animate(data, normalise_U(copy.deepcopy(U)), cluster_number, colors)
# animate(data, U, cluster_number, colors)
# U = normalise_U(U)
# print("normalised U")
return U
def Print_Matrix(list):
"""
Prints the matrix
"""
for i in range(len(list)):
print(list[i])
# main section
if __name__ == '__main__':
# data = MapData("Blobs_Data_set.txt")
# data = MapData("Circle_Data_set.txt")
# data = MapData("Moons_Data_set.txt")
# data = MapData("DG.txt")
data = MapExcelData("ExcelData.xlsx")
# Shuffle Data And Save Order
data, order = ShuffleData(data)
print('Shuffled Data :')
Print_Matrix(data)
start = time.time()
# call the FuzzyReducer
final_location = FuzzyReducer(data, 2, 2)
# DeShuffle Data
final_location = DeShuffleData(final_location, order)
Print_Matrix(final_location)
print("time elapsed=", time.time() - start)