From 0de24871ae6f5f8731d1322be5d63d036994cee4 Mon Sep 17 00:00:00 2001
From: BlackTea-c <2286554510@qq.com>
Date: Fri, 1 Dec 2023 11:49:05 +0800
Subject: [PATCH] 2023/12/1
---
.idea/Mygithub.iml | 2 +-
.idea/misc.xml | 2 +-
README.md | 37 +-----
.../Boosting Tree.py" | 123 ++++++++++++++++++
4 files changed, 130 insertions(+), 34 deletions(-)
create mode 100644 "\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py"
diff --git a/.idea/Mygithub.iml b/.idea/Mygithub.iml
index 36a6c2f..4bffeec 100644
--- a/.idea/Mygithub.iml
+++ b/.idea/Mygithub.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index b9629c6..74e9fe5 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff --git a/README.md b/README.md
index b2c81ae..8d96faf 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,10 @@
-# 感知机(Perceptron)教程
+# 鏉庤埅 銆婄粺璁″涔犳柟娉曘 绗簩鐗 浠g爜澶嶇幇浠ュ強鐩稿叧鍏紡鎺ㄥ
+#浠g爜娉ㄩ噴浼氬啓寰楀緢璇︾粏.
-## 感知机简介
-感知机是最简单的人工神经网络形式之一,用于解决二元分类问题。
+鍙傝價epo lihang-code
-## 感知机的工作原理
-### 大致思想
-- 感知机接收多个输入(x1, x2, ..., xn),每个输入都有一个对应的权重(w1, w2, ..., wn)。
-- 输入和权重相乘并求和,加上偏置项(bias)。
-- 将得到的结果传递给激活函数(通常为阶跃函数),输出最终的分类结果。
-### 数学原理
-感知机的数学表达式如下所示:
-- 输入和权重的线性组合: \( \sum_{i=1}^{n} w_i \cdot x_i + b \)
-- 阶跃函数: \( f(x) = \begin{cases} 1, & \text{if } \sum_{i=1}^{n} w_i \cdot x_i + b > 0 \\ 0, & \text{otherwise} \end{cases} \)
+Bug淇
-### 权重更新算法
-感知机的学习规则使用简单的权重更新算法,通过随机梯度下降(Stochastic Gradient Descent)来更新权重和偏置项以最小化误差。
-在每一轮训练中,对于每个输入样本(xi)和对应的真实标签(yi),权重更新规则如下:
-- \( w_i = w_i + \alpha \cdot (y_i - \hat{y_i}) \cdot x_i \),其中 \(\alpha\) 是学习率(learning rate),\(\hat{y_i}\) 是预测值,\(y_i\) 是真实值。
-- \( b = b + \alpha \cdot (y_i - \hat{y_i}) \)
-
-### 对偶形式
-感知机还有一种对偶形式,使用输入数据和标签的内积(dot product)计算权重更新。
-对于权重更新,可以使用以下公式进行更新:
-- \( w_i = w_i + \alpha \cdot (y_i - \hat{y_i}) \cdot x_i \)
-- \( b = b + \alpha \cdot (y_i - \hat{y_i}) \cdot 1 \),其中 \(x_i\) 和 \(1\) 是输入数据和偏置项,通过内积计算。
-
-
-
-## 总结
-感知机是最简单的神经网络形式之一,通过调整权重和偏置项,它可以学习并完成简单的二元分类任务。
-
-## 文件解释
-eg1.py 原始问题
-eg_dual_form.py 对偶形式(起始就是计算下Gram矩阵)
+# 2023/11/30 鎻愬崌鏂规硶 eg8.1 lihang-code杩愯鍑烘潵鏈鍚巗core=0.4 鑰屼功涓婃槸1.0 鍘熷洜鏄痩ihang-code閲屾妸G3(x)寮勯敊浜(搴旇涓簆ositive)锛屽叧閿湪浜 if weight_error_positive <= weight_error_nagetive: #杩欓噷搴旇鏀规垚<= 浠g爜绗60琛屻
\ No newline at end of file
diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py"
new file mode 100644
index 0000000..6d71955
--- /dev/null
+++ "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py"
@@ -0,0 +1,123 @@
+
+
+
+
+
+import numpy as np
+
+
+class AdaBoost:
+ def __init__(self, X, y, tol=0.05, max_iter=10):
+ # 璁粌鏁版嵁 瀹炰緥
+ self.X = X
+ # 璁粌鏁版嵁 鏍囩
+ self.y = y
+ # 璁粌涓鏉′欢 right_rate>self.tol
+ self.tol = tol
+ # 鏈澶ц凯浠f鏁
+ self.max_iter = max_iter
+ # 鍒濆鍖栨牱鏈潈閲峸
+ self.w = np.full((X.shape[0]), 1 / X.shape[0])
+ self.G = [] # 寮卞垎绫诲櫒
+
+ def build_stump(self): #鍐崇瓥鏍戞々
+ """
+ 浠ュ甫鏉冮噸鐨勫垎绫昏宸渶灏忎负鐩爣锛岄夋嫨鏈浣冲垎绫婚槇鍊
+ best_stump['dim'] 鍚堥傜殑鐗瑰緛鎵鍦ㄧ淮搴
+ best_stump['thresh'] 鍚堥傜壒寰佺殑闃堝
+ best_stump['ineq'] 鏍戞々鍒嗙被鐨勬爣璇唋t,rt
+ """
+ m, n = np.shape(self.X)
+ # 鍒嗙被璇樊
+ e_min = np.inf
+ # 灏忎簬鍒嗙被闃堝肩殑鏍锋湰灞炰簬鐨勬爣绛剧被鍒
+ sign = None
+ # 鏈浼樺垎绫绘爲妗
+ best_stump = {}
+ for i in range(n):
+ range_min = self.X[:, i].min() # 姹傛瘡涓绉嶇壒寰佺殑鏈澶ф渶灏忓
+ range_max = self.X[:, i].max()
+ step_size = (range_max - range_min) / n
+ for j in range(-1, int(n) + 1):
+ thresh_val = range_min + j * step_size
+ # 璁$畻宸﹀瓙鏍戝拰鍙冲瓙鏍戠殑璇樊
+ for inequal in ['lt', 'rt']:
+ predict_vals = self.base_estimator(self.X, i, thresh_val,
+ inequal)
+ err_arr = np.array(np.ones(m))
+ err_arr[predict_vals.T == self.y.T] = 0
+ weighted_error = np.dot(self.w, err_arr)
+ if weighted_error < e_min:
+ e_min = weighted_error
+ sign = predict_vals
+ best_stump['dim'] = i
+ best_stump['thresh'] = thresh_val
+ best_stump['ineq'] = inequal
+ return best_stump, sign, e_min
+
+ def updata_w(self, alpha, predict):
+ """
+ 鏇存柊鏍锋湰鏉冮噸w
+ """
+ # 浠ヤ笅2琛屾牴鎹叕寮8.4 8.5 鏇存柊鏍锋湰鏉冮噸
+ P = self.w * np.exp(-alpha * self.y * predict)
+ self.w = P / P.sum()
+
+ @staticmethod
+ def base_estimator(X, dimen, threshVal, threshIneq):
+ """
+ 璁$畻鍗曚釜寮卞垎绫诲櫒锛堝喅绛栨爲妗╋級棰勬祴杈撳嚭
+ """
+ ret_array = np.ones(np.shape(X)[0]) # 棰勬祴鐭╅樀
+ # 宸﹀彾瀛 锛屾暣涓煩闃电殑鏍锋湰杩涜姣旇緝璧嬪
+ if threshIneq == 'lt':
+ ret_array[X[:, dimen] <= threshVal] = -1.0
+ else:
+ ret_array[X[:, dimen] > threshVal] = -1.0
+ return ret_array
+
+ def fit(self):
+ """
+ 瀵硅缁冩暟鎹繘琛屽涔
+ """
+ G = 0
+ for i in range(self.max_iter):
+ best_stump, sign, error = self.build_stump() # 鑾峰彇褰撳墠杩唬鏈浣冲垎绫婚槇鍊
+ alpha = 1 / 2 * np.log((1 - error) / error) # 璁$畻鏈疆寮卞垎绫诲櫒鐨勭郴鏁
+ # 寮卞垎绫诲櫒鏉冮噸
+ best_stump['alpha'] = alpha
+ # 淇濆瓨寮卞垎绫诲櫒
+ self.G.append(best_stump)
+ # 浠ヤ笅3琛岃绠楀綋鍓嶆诲垎绫诲櫒锛堜箣鍓嶆墍鏈夊急鍒嗙被鍣ㄥ姞鏉冨拰锛夊垎绫绘晥鐜
+ G += alpha * sign
+ y_predict = np.sign(G)
+ error_rate = np.sum(
+ np.abs(y_predict - self.y)) / 2 / self.y.shape[0]
+ if error_rate < self.tol: # 婊¤冻涓鏉′欢 鍒欒烦鍑哄惊鐜
+ print("杩唬娆℃暟:", i + 1)
+ break
+ else:
+ self.updata_w(alpha, y_predict) # 鑻ヤ笉婊¤冻锛屾洿鏂版潈閲嶏紝缁х画杩唬
+
+ def predict(self, X):
+ """
+ 瀵规柊鏁版嵁杩涜棰勬祴
+ """
+ m = np.shape(X)[0]
+ G = np.zeros(m)
+ for i in range(len(self.G)):
+ stump = self.G[i]
+ # 閬嶅巻姣忎竴涓急鍒嗙被鍣紝杩涜鍔犳潈
+ _G = self.base_estimator(X, stump['dim'], stump['thresh'],
+ stump['ineq'])
+ alpha = stump['alpha']
+ G += alpha * _G
+ y_predict = np.sign(G)
+ return y_predict.astype(int)
+
+ def score(self, X, y):
+ """瀵硅缁冩晥鏋滆繘琛岃瘎浠"""
+ y_predict = self.predict(X)
+ error_rate = np.sum(np.abs(y_predict - y)) / 2 / y.shape[0]
+ return 1 - error_rate
+