From 0de24871ae6f5f8731d1322be5d63d036994cee4 Mon Sep 17 00:00:00 2001 From: BlackTea-c <2286554510@qq.com> Date: Fri, 1 Dec 2023 11:49:05 +0800 Subject: [PATCH] 2023/12/1 --- .idea/Mygithub.iml | 2 +- .idea/misc.xml | 2 +- README.md | 37 +----- .../Boosting Tree.py" | 123 ++++++++++++++++++ 4 files changed, 130 insertions(+), 34 deletions(-) create mode 100644 "\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" diff --git a/.idea/Mygithub.iml b/.idea/Mygithub.iml index 36a6c2f..4bffeec 100644 --- a/.idea/Mygithub.iml +++ b/.idea/Mygithub.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index b9629c6..74e9fe5 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/README.md b/README.md index b2c81ae..8d96faf 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,10 @@ -# 感知机(Perceptron)教程 +# 鏉庤埅 銆婄粺璁″涔犳柟娉曘 绗簩鐗 浠g爜澶嶇幇浠ュ強鐩稿叧鍏紡鎺ㄥ +#浠g爜娉ㄩ噴浼氬啓寰楀緢璇︾粏. -## 感知机简介 -感知机是最简单的人工神经网络形式之一,用于解决二元分类问题。 +鍙傝價epo lihang-code -## 感知机的工作原理 -### 大致思想 -- 感知机接收多个输入(x1, x2, ..., xn),每个输入都有一个对应的权重(w1, w2, ..., wn)。 -- 输入和权重相乘并求和,加上偏置项(bias)。 -- 将得到的结果传递给激活函数(通常为阶跃函数),输出最终的分类结果。 -### 数学原理 -感知机的数学表达式如下所示: -- 输入和权重的线性组合: \( \sum_{i=1}^{n} w_i \cdot x_i + b \) -- 阶跃函数: \( f(x) = \begin{cases} 1, & \text{if } \sum_{i=1}^{n} w_i \cdot x_i + b > 0 \\ 0, & \text{otherwise} \end{cases} \) +Bug淇 -### 权重更新算法 -感知机的学习规则使用简单的权重更新算法,通过随机梯度下降(Stochastic Gradient Descent)来更新权重和偏置项以最小化误差。 -在每一轮训练中,对于每个输入样本(xi)和对应的真实标签(yi),权重更新规则如下: -- \( w_i = w_i + \alpha \cdot (y_i - \hat{y_i}) \cdot x_i \),其中 \(\alpha\) 是学习率(learning rate),\(\hat{y_i}\) 是预测值,\(y_i\) 是真实值。 -- \( b = b + \alpha \cdot (y_i - \hat{y_i}) \) - -### 对偶形式 -感知机还有一种对偶形式,使用输入数据和标签的内积(dot product)计算权重更新。 -对于权重更新,可以使用以下公式进行更新: -- \( w_i = w_i + \alpha \cdot (y_i - \hat{y_i}) \cdot x_i \) -- \( b = b + \alpha \cdot (y_i - \hat{y_i}) \cdot 1 \),其中 \(x_i\) 和 \(1\) 是输入数据和偏置项,通过内积计算。 - - - -## 总结 -感知机是最简单的神经网络形式之一,通过调整权重和偏置项,它可以学习并完成简单的二元分类任务。 - -## 文件解释 -eg1.py 原始问题 -eg_dual_form.py 对偶形式(起始就是计算下Gram矩阵) +# 2023/11/30 鎻愬崌鏂规硶 eg8.1 lihang-code杩愯鍑烘潵鏈鍚巗core=0.4 鑰屼功涓婃槸1.0 鍘熷洜鏄痩ihang-code閲屾妸G3(x)寮勯敊浜(搴旇涓簆ositive)锛屽叧閿湪浜 if weight_error_positive <= weight_error_nagetive: #杩欓噷搴旇鏀规垚<= 浠g爜绗60琛屻 \ No newline at end of file diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" new file mode 100644 index 0000000..6d71955 --- /dev/null +++ "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" @@ -0,0 +1,123 @@ + + + + + +import numpy as np + + +class AdaBoost: + def __init__(self, X, y, tol=0.05, max_iter=10): + # 璁粌鏁版嵁 瀹炰緥 + self.X = X + # 璁粌鏁版嵁 鏍囩 + self.y = y + # 璁粌涓鏉′欢 right_rate>self.tol + self.tol = tol + # 鏈澶ц凯浠f鏁 + self.max_iter = max_iter + # 鍒濆鍖栨牱鏈潈閲峸 + self.w = np.full((X.shape[0]), 1 / X.shape[0]) + self.G = [] # 寮卞垎绫诲櫒 + + def build_stump(self): #鍐崇瓥鏍戞々 + """ + 浠ュ甫鏉冮噸鐨勫垎绫昏宸渶灏忎负鐩爣锛岄夋嫨鏈浣冲垎绫婚槇鍊 + best_stump['dim'] 鍚堥傜殑鐗瑰緛鎵鍦ㄧ淮搴 + best_stump['thresh'] 鍚堥傜壒寰佺殑闃堝 + best_stump['ineq'] 鏍戞々鍒嗙被鐨勬爣璇唋t,rt + """ + m, n = np.shape(self.X) + # 鍒嗙被璇樊 + e_min = np.inf + # 灏忎簬鍒嗙被闃堝肩殑鏍锋湰灞炰簬鐨勬爣绛剧被鍒 + sign = None + # 鏈浼樺垎绫绘爲妗 + best_stump = {} + for i in range(n): + range_min = self.X[:, i].min() # 姹傛瘡涓绉嶇壒寰佺殑鏈澶ф渶灏忓 + range_max = self.X[:, i].max() + step_size = (range_max - range_min) / n + for j in range(-1, int(n) + 1): + thresh_val = range_min + j * step_size + # 璁$畻宸﹀瓙鏍戝拰鍙冲瓙鏍戠殑璇樊 + for inequal in ['lt', 'rt']: + predict_vals = self.base_estimator(self.X, i, thresh_val, + inequal) + err_arr = np.array(np.ones(m)) + err_arr[predict_vals.T == self.y.T] = 0 + weighted_error = np.dot(self.w, err_arr) + if weighted_error < e_min: + e_min = weighted_error + sign = predict_vals + best_stump['dim'] = i + best_stump['thresh'] = thresh_val + best_stump['ineq'] = inequal + return best_stump, sign, e_min + + def updata_w(self, alpha, predict): + """ + 鏇存柊鏍锋湰鏉冮噸w + """ + # 浠ヤ笅2琛屾牴鎹叕寮8.4 8.5 鏇存柊鏍锋湰鏉冮噸 + P = self.w * np.exp(-alpha * self.y * predict) + self.w = P / P.sum() + + @staticmethod + def base_estimator(X, dimen, threshVal, threshIneq): + """ + 璁$畻鍗曚釜寮卞垎绫诲櫒锛堝喅绛栨爲妗╋級棰勬祴杈撳嚭 + """ + ret_array = np.ones(np.shape(X)[0]) # 棰勬祴鐭╅樀 + # 宸﹀彾瀛 锛屾暣涓煩闃电殑鏍锋湰杩涜姣旇緝璧嬪 + if threshIneq == 'lt': + ret_array[X[:, dimen] <= threshVal] = -1.0 + else: + ret_array[X[:, dimen] > threshVal] = -1.0 + return ret_array + + def fit(self): + """ + 瀵硅缁冩暟鎹繘琛屽涔 + """ + G = 0 + for i in range(self.max_iter): + best_stump, sign, error = self.build_stump() # 鑾峰彇褰撳墠杩唬鏈浣冲垎绫婚槇鍊 + alpha = 1 / 2 * np.log((1 - error) / error) # 璁$畻鏈疆寮卞垎绫诲櫒鐨勭郴鏁 + # 寮卞垎绫诲櫒鏉冮噸 + best_stump['alpha'] = alpha + # 淇濆瓨寮卞垎绫诲櫒 + self.G.append(best_stump) + # 浠ヤ笅3琛岃绠楀綋鍓嶆诲垎绫诲櫒锛堜箣鍓嶆墍鏈夊急鍒嗙被鍣ㄥ姞鏉冨拰锛夊垎绫绘晥鐜 + G += alpha * sign + y_predict = np.sign(G) + error_rate = np.sum( + np.abs(y_predict - self.y)) / 2 / self.y.shape[0] + if error_rate < self.tol: # 婊¤冻涓鏉′欢 鍒欒烦鍑哄惊鐜 + print("杩唬娆℃暟:", i + 1) + break + else: + self.updata_w(alpha, y_predict) # 鑻ヤ笉婊¤冻锛屾洿鏂版潈閲嶏紝缁х画杩唬 + + def predict(self, X): + """ + 瀵规柊鏁版嵁杩涜棰勬祴 + """ + m = np.shape(X)[0] + G = np.zeros(m) + for i in range(len(self.G)): + stump = self.G[i] + # 閬嶅巻姣忎竴涓急鍒嗙被鍣紝杩涜鍔犳潈 + _G = self.base_estimator(X, stump['dim'], stump['thresh'], + stump['ineq']) + alpha = stump['alpha'] + G += alpha * _G + y_predict = np.sign(G) + return y_predict.astype(int) + + def score(self, X, y): + """瀵硅缁冩晥鏋滆繘琛岃瘎浠""" + y_predict = self.predict(X) + error_rate = np.sum(np.abs(y_predict - y)) / 2 / y.shape[0] + return 1 - error_rate +