-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
af87eb6
commit 938961d
Showing
3 changed files
with
179 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
|
||
|
||
import random as rd | ||
import numpy as np | ||
|
||
|
||
|
||
data=np.array([[0,2],[0,0],[1,0],[5,0],[5,2]]) | ||
class K_clustering(): | ||
def __init__(self,k): | ||
self.k=k | ||
self.category=[] | ||
|
||
|
||
def initial_category(self,data): #随机选取k个样本点作为类的中心 | ||
sample=rd.sample(range(len(data)),self.k) | ||
for i in sample: | ||
self.category.append([i]) | ||
|
||
|
||
def centre_G(self,G): #计算类的中心 | ||
m=np.zeros(len(data[0])) | ||
for i in G: | ||
m+=data[i] | ||
m=m/len(G) | ||
return m | ||
def distance(self,X,Y):#计算两点距离,此处X,Y就不是索引了,是具体的 | ||
return np.sum((X-Y)**2)**1/2 | ||
|
||
def clustering(self,data): | ||
#self.initial_category(data) #随机选择 | ||
self.category=[[0],[1]] #书上的选择 | ||
|
||
stop=False | ||
|
||
|
||
G_drop=[] | ||
for i in self.category: | ||
G_drop.append(i[0]) | ||
#将一开始的K个添加入,不计入后面的分类 | ||
while(stop==False): | ||
m=[self.centre_G(G) for G in self.category] #得到各个类别的中心点 | ||
|
||
|
||
for i in range(len(data)): | ||
store=[] | ||
for m_i in range(len(m)): | ||
if i not in G_drop: | ||
#print(i,m_i) | ||
store.append(self.distance(data[i],m[m_i])) | ||
if store!=[]: | ||
min_=store.index(min(store)) | ||
self.category[min_].append(i) | ||
G_drop.append(i) | ||
|
||
if len(G_drop)==len(data): | ||
stop=True | ||
|
||
|
||
|
||
cluster=K_clustering(k=2) | ||
cluster.clustering(data) | ||
print(cluster.category) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
|
||
|
||
|
||
import numpy as np | ||
|
||
train_data=np.array([[1,2],[3,4],[5,6],[0,1],[2,2],[1,6]]) | ||
|
||
class agglomerative_clustering(): | ||
def __init__(self): | ||
#存储结构还tm是个二叉树啊!!! | ||
self.category=[] | ||
|
||
|
||
def distance_D(self,X): #计算样本之间的距离,书上例题14.1直接给出了距离矩阵D. | ||
D=np.zeros((len(X),len(X))) | ||
for i in range(len(X)): | ||
for j in range(len(X)): | ||
D[i][j]=np.sum((X[i]-X[j])**2)**(1/2) | ||
return D | ||
|
||
|
||
def distance_i_j(self,Di,Dj,D):#eg. Di=[1,3] Dj=[2,5] | ||
#计算类i与类j的距离 | ||
|
||
min_distance=10000 | ||
for i in Di: | ||
for j in Dj: | ||
if D[i][j]<=min_distance: | ||
min_distance=D[i][j] | ||
|
||
return min_distance | ||
def clustering(self,data): | ||
D=self.distance_D(data) | ||
|
||
|
||
stop=False | ||
G=[[i] for i in range(len(data))] #G=[[0],[1],[2].....] #当前类 | ||
G_i_j={} #类i,j的距离 | ||
for i in range(len(G)): | ||
for j in range(len(G)): | ||
if i<j: | ||
G_i_j[D[i][j]]=[i,j] | ||
G_drop=[]#通过聚类抛弃的,不再计入的,就是说已经是G中某个元素的子集,那么就可以drop掉,不参与后面的距离计算 | ||
while(stop==False): #D的迭代 | ||
|
||
min_value=min(G_i_j) | ||
G_new = G_i_j[min_value] | ||
G.append(G_new) | ||
for index in range(len(G)-1): | ||
for x in G[index]: | ||
if x in G_new: | ||
G_drop.append(index) | ||
|
||
G_i_j={} | ||
for i in range(len(G)): | ||
for j in range(len(G)): | ||
if i<j and i not in G_drop: | ||
G_i_j[self.distance_i_j(G[i],G[j],D)]=G[i]+G[j] | ||
|
||
if len(G[-1])==len(data): | ||
stop=True | ||
|
||
|
||
return G | ||
def clustering_D(self,D): | ||
|
||
|
||
stop=False | ||
G=[[i] for i in range(len(D))] #G=[[0],[1],[2].....] #当前类 | ||
G_i_j={} #类i,j的距离 | ||
for i in range(len(G)): | ||
for j in range(len(G)): | ||
if i<j: | ||
G_i_j[D[i][j]]=[i,j] | ||
G_drop=[]#通过聚类抛弃的,不再计入的,就是说已经是G中某个元素的子集,那么就可以drop掉,不参与后面的距离计算 | ||
while(stop==False): #D的迭代 | ||
|
||
min_value=min(G_i_j) | ||
G_new = G_i_j[min_value] | ||
G.append(G_new) | ||
for index in range(len(G)-1): | ||
for x in G[index]: | ||
if x in G_new: | ||
G_drop.append(index) | ||
|
||
G_i_j={} | ||
for i in range(len(G)): | ||
for j in range(len(G)): | ||
if i<j and i not in G_drop: | ||
G_i_j[self.distance_i_j(G[i],G[j],D)]=G[i]+G[j] | ||
|
||
if len(G[-1])==len(D): | ||
stop=True | ||
|
||
|
||
return G | ||
|
||
cluster=agglomerative_clustering() | ||
|
||
D=np.array([[0,7,2,9,3],[7,0,5,4,6],[2,5,0,8,1],[9,4,8,0,5],[3,6,1,5,0]]) | ||
|
||
|
||
|
||
G=cluster.clustering_D(D) | ||
|
||
print(G) #成功!至少与书上相符合了。 | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
my_dict = {3: [1, 2], 4: [5, 6]} | ||
|
||
|
||
|
||
for s in [i+0.5 for i in range(1,10)]: | ||
print(s) | ||
min_key = min(my_dict) | ||
min_value = my_dict[min_key] | ||
print(min_value) |