Skip to content

Commit

Permalink
12/3
Browse files Browse the repository at this point in the history
  • Loading branch information
BlackTea-c committed Dec 4, 2023
1 parent af87eb6 commit 938961d
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 4 deletions.
63 changes: 63 additions & 0 deletions 聚类/K均值聚类.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@


import random as rd
import numpy as np



data=np.array([[0,2],[0,0],[1,0],[5,0],[5,2]])
class K_clustering():
def __init__(self,k):
self.k=k
self.category=[]


def initial_category(self,data): #随机选取k个样本点作为类的中心
sample=rd.sample(range(len(data)),self.k)
for i in sample:
self.category.append([i])


def centre_G(self,G): #计算类的中心
m=np.zeros(len(data[0]))
for i in G:
m+=data[i]
m=m/len(G)
return m
def distance(self,X,Y):#计算两点距离,此处X,Y就不是索引了,是具体的
return np.sum((X-Y)**2)**1/2

def clustering(self,data):
#self.initial_category(data) #随机选择
self.category=[[0],[1]] #书上的选择

stop=False


G_drop=[]
for i in self.category:
G_drop.append(i[0])
#将一开始的K个添加入,不计入后面的分类
while(stop==False):
m=[self.centre_G(G) for G in self.category] #得到各个类别的中心点


for i in range(len(data)):
store=[]
for m_i in range(len(m)):
if i not in G_drop:
#print(i,m_i)
store.append(self.distance(data[i],m[m_i]))
if store!=[]:
min_=store.index(min(store))
self.category[min_].append(i)
G_drop.append(i)

if len(G_drop)==len(data):
stop=True



cluster=K_clustering(k=2)
cluster.clustering(data)
print(cluster.category)
112 changes: 112 additions & 0 deletions 聚类/层次聚类.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@



import numpy as np

train_data=np.array([[1,2],[3,4],[5,6],[0,1],[2,2],[1,6]])

class agglomerative_clustering():
def __init__(self):
#存储结构还tm是个二叉树啊!!!
self.category=[]


def distance_D(self,X): #计算样本之间的距离,书上例题14.1直接给出了距离矩阵D.
D=np.zeros((len(X),len(X)))
for i in range(len(X)):
for j in range(len(X)):
D[i][j]=np.sum((X[i]-X[j])**2)**(1/2)
return D


def distance_i_j(self,Di,Dj,D):#eg. Di=[1,3] Dj=[2,5]
#计算类i与类j的距离

min_distance=10000
for i in Di:
for j in Dj:
if D[i][j]<=min_distance:
min_distance=D[i][j]

return min_distance
def clustering(self,data):
D=self.distance_D(data)


stop=False
G=[[i] for i in range(len(data))] #G=[[0],[1],[2].....] #当前类
G_i_j={} #类i,j的距离
for i in range(len(G)):
for j in range(len(G)):
if i<j:
G_i_j[D[i][j]]=[i,j]
G_drop=[]#通过聚类抛弃的,不再计入的,就是说已经是G中某个元素的子集,那么就可以drop掉,不参与后面的距离计算
while(stop==False): #D的迭代

min_value=min(G_i_j)
G_new = G_i_j[min_value]
G.append(G_new)
for index in range(len(G)-1):
for x in G[index]:
if x in G_new:
G_drop.append(index)

G_i_j={}
for i in range(len(G)):
for j in range(len(G)):
if i<j and i not in G_drop:
G_i_j[self.distance_i_j(G[i],G[j],D)]=G[i]+G[j]

if len(G[-1])==len(data):
stop=True


return G
def clustering_D(self,D):


stop=False
G=[[i] for i in range(len(D))] #G=[[0],[1],[2].....] #当前类
G_i_j={} #类i,j的距离
for i in range(len(G)):
for j in range(len(G)):
if i<j:
G_i_j[D[i][j]]=[i,j]
G_drop=[]#通过聚类抛弃的,不再计入的,就是说已经是G中某个元素的子集,那么就可以drop掉,不参与后面的距离计算
while(stop==False): #D的迭代

min_value=min(G_i_j)
G_new = G_i_j[min_value]
G.append(G_new)
for index in range(len(G)-1):
for x in G[index]:
if x in G_new:
G_drop.append(index)

G_i_j={}
for i in range(len(G)):
for j in range(len(G)):
if i<j and i not in G_drop:
G_i_j[self.distance_i_j(G[i],G[j],D)]=G[i]+G[j]

if len(G[-1])==len(D):
stop=True


return G

cluster=agglomerative_clustering()

D=np.array([[0,7,2,9,3],[7,0,5,4,6],[2,5,0,8,1],[9,4,8,0,5],[3,6,1,5,0]])



G=cluster.clustering_D(D)

print(G) #成功!至少与书上相符合了。






8 changes: 4 additions & 4 deletions 草稿.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
my_dict = {3: [1, 2], 4: [5, 6]}



for s in [i+0.5 for i in range(1,10)]:
print(s)
min_key = min(my_dict)
min_value = my_dict[min_key]
print(min_value)

0 comments on commit 938961d

Please sign in to comment.