diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/1.jpg" "b/\346\217\220\345\215\207\346\226\271\346\263\225/1.jpg" new file mode 100644 index 0000000..15b4655 Binary files /dev/null and "b/\346\217\220\345\215\207\346\226\271\346\263\225/1.jpg" differ diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/2.jpg" "b/\346\217\220\345\215\207\346\226\271\346\263\225/2.jpg" new file mode 100644 index 0000000..1a47766 Binary files /dev/null and "b/\346\217\220\345\215\207\346\226\271\346\263\225/2.jpg" differ diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/3.jpg" "b/\346\217\220\345\215\207\346\226\271\346\263\225/3.jpg" new file mode 100644 index 0000000..8eb523f Binary files /dev/null and "b/\346\217\220\345\215\207\346\226\271\346\263\225/3.jpg" differ diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree eg8.2.py" "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree eg8.2.py" new file mode 100644 index 0000000..fb8b57a --- /dev/null +++ "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree eg8.2.py" @@ -0,0 +1,110 @@ + + + + + +import numpy as np + + + +X_train=np.array([[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]]) +y_train=np.array([5.56,5.70,5.91,6.40,6.80,7.05,8.90,8.70,9.00,9.05]) +class AdaBoost: + def __init__(self, tol=0.1, max_iter=10): + # 训练中止条件 平方损失误差要求 + self.tol = tol + # 最大迭代次数 + self.max_iter = max_iter + # 初始化样本权重w + self.G = [] # 弱分类器,(s,c1,c2) + + def build_stump(self,X,y): #决策树桩 + #m, n = np.shape(self.X) #记录X 样本个数m 样本的特征维数n + """最开始输入的是(xi,yi) i=1-N + 然后计算出m(s),找出min的; 切分点s的learning_rate为1,从1.5开始 + 得到最优的s之后得到T1,所以此时f1=T1,计算残差r2i,T2就拟合r2i得到下一个s + 如此往复""" + best_m_s=10000 #初始化m_s,无穷大 + best_s=0 #初始化s + best_c1=0 + best_c2=0 + for s in [i+0.5 for i in range(1,10)]: + #计算s划分出的两个区域R1,R2对应的c1,c2(c1,c2为使得平方误差最小的值,则ci=1/Ni * sum(yi)) + c1_,c2_=0,0 + N1,N2=0,0 + for i in range(len(y)): + if X[i][0]=self.tol: + #print(X,res) + #print(self.build_stump(X,res)) + self.G.append(self.build_stump(X,res)) + res,res_square=self.residual(self.G,X,y) + print("epoch:",epoch,"train_loss:",res_square) + else: + break + + + def predict(self,x): + pass + def score(self,X,y): + pass + + + +Ada=AdaBoost(max_iter=100,tol=0.05) +Ada.fit(X_train,y_train) + +#完成哈哈哈哈 简单~ + + diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" deleted file mode 100644 index 6d71955..0000000 --- "a/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" +++ /dev/null @@ -1,123 +0,0 @@ - - - - - -import numpy as np - - -class AdaBoost: - def __init__(self, X, y, tol=0.05, max_iter=10): - # 训练数据 实例 - self.X = X - # 训练数据 标签 - self.y = y - # 训练中止条件 right_rate>self.tol - self.tol = tol - # 最大迭代次数 - self.max_iter = max_iter - # 初始化样本权重w - self.w = np.full((X.shape[0]), 1 / X.shape[0]) - self.G = [] # 弱分类器 - - def build_stump(self): #决策树桩 - """ - 以带权重的分类误差最小为目标,选择最佳分类阈值 - best_stump['dim'] 合适的特征所在维度 - best_stump['thresh'] 合适特征的阈值 - best_stump['ineq'] 树桩分类的标识lt,rt - """ - m, n = np.shape(self.X) - # 分类误差 - e_min = np.inf - # 小于分类阈值的样本属于的标签类别 - sign = None - # 最优分类树桩 - best_stump = {} - for i in range(n): - range_min = self.X[:, i].min() # 求每一种特征的最大最小值 - range_max = self.X[:, i].max() - step_size = (range_max - range_min) / n - for j in range(-1, int(n) + 1): - thresh_val = range_min + j * step_size - # 计算左子树和右子树的误差 - for inequal in ['lt', 'rt']: - predict_vals = self.base_estimator(self.X, i, thresh_val, - inequal) - err_arr = np.array(np.ones(m)) - err_arr[predict_vals.T == self.y.T] = 0 - weighted_error = np.dot(self.w, err_arr) - if weighted_error < e_min: - e_min = weighted_error - sign = predict_vals - best_stump['dim'] = i - best_stump['thresh'] = thresh_val - best_stump['ineq'] = inequal - return best_stump, sign, e_min - - def updata_w(self, alpha, predict): - """ - 更新样本权重w - """ - # 以下2行根据公式8.4 8.5 更新样本权重 - P = self.w * np.exp(-alpha * self.y * predict) - self.w = P / P.sum() - - @staticmethod - def base_estimator(X, dimen, threshVal, threshIneq): - """ - 计算单个弱分类器(决策树桩)预测输出 - """ - ret_array = np.ones(np.shape(X)[0]) # 预测矩阵 - # 左叶子 ,整个矩阵的样本进行比较赋值 - if threshIneq == 'lt': - ret_array[X[:, dimen] <= threshVal] = -1.0 - else: - ret_array[X[:, dimen] > threshVal] = -1.0 - return ret_array - - def fit(self): - """ - 对训练数据进行学习 - """ - G = 0 - for i in range(self.max_iter): - best_stump, sign, error = self.build_stump() # 获取当前迭代最佳分类阈值 - alpha = 1 / 2 * np.log((1 - error) / error) # 计算本轮弱分类器的系数 - # 弱分类器权重 - best_stump['alpha'] = alpha - # 保存弱分类器 - self.G.append(best_stump) - # 以下3行计算当前总分类器(之前所有弱分类器加权和)分类效率 - G += alpha * sign - y_predict = np.sign(G) - error_rate = np.sum( - np.abs(y_predict - self.y)) / 2 / self.y.shape[0] - if error_rate < self.tol: # 满足中止条件 则跳出循环 - print("迭代次数:", i + 1) - break - else: - self.updata_w(alpha, y_predict) # 若不满足,更新权重,继续迭代 - - def predict(self, X): - """ - 对新数据进行预测 - """ - m = np.shape(X)[0] - G = np.zeros(m) - for i in range(len(self.G)): - stump = self.G[i] - # 遍历每一个弱分类器,进行加权 - _G = self.base_estimator(X, stump['dim'], stump['thresh'], - stump['ineq']) - alpha = stump['alpha'] - G += alpha * _G - y_predict = np.sign(G) - return y_predict.astype(int) - - def score(self, X, y): - """对训练效果进行评价""" - y_predict = self.predict(X) - error_rate = np.sum(np.abs(y_predict - y)) / 2 / y.shape[0] - return 1 - error_rate - diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/README.md" "b/\346\217\220\345\215\207\346\226\271\346\263\225/README.md" new file mode 100644 index 0000000..1b1cb25 --- /dev/null +++ "b/\346\217\220\345\215\207\346\226\271\346\263\225/README.md" @@ -0,0 +1,7 @@ +# 笔记如下 + +![1](1.jpg) + +![1](2.jpg) + +![1](3.jpg) \ No newline at end of file diff --git "a/\350\215\211\347\250\277.py" "b/\350\215\211\347\250\277.py" index adf5c6b..f9b21be 100644 --- "a/\350\215\211\347\250\277.py" +++ "b/\350\215\211\347\250\277.py" @@ -1,4 +1,5 @@ -print(1>2) \ No newline at end of file +for s in [i+0.5 for i in range(1,10)]: + print(s) \ No newline at end of file