From 0de24871ae6f5f8731d1322be5d63d036994cee4 Mon Sep 17 00:00:00 2001
From: BlackTea-c <2286554510@qq.com>
Date: Fri, 1 Dec 2023 11:49:05 +0800
Subject: [PATCH] 2023/12/1

---
 .idea/Mygithub.iml                            |   2 +-
 .idea/misc.xml                                |   2 +-
 README.md                                     |  37 +-----
 .../Boosting Tree.py"                         | 123 ++++++++++++++++++
 4 files changed, 130 insertions(+), 34 deletions(-)
 create mode 100644 "\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py"
diff --git a/.idea/Mygithub.iml b/.idea/Mygithub.iml
index 36a6c2f..4bffeec 100644
--- a/.idea/Mygithub.iml
+++ b/.idea/Mygithub.iml
@@ -4,7 +4,7 @@
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/package" />
     </content>
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (Project_YOLO)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index b9629c6..74e9fe5 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (Mygithub) (2)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (Project_YOLO)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/README.md b/README.md
index b2c81ae..8d96faf 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,10 @@
-# ��֪����Perceptron���̳�
+# 李航 《统计学习方法》 第二版 代码复现以及相关公式推导
+#代码注释会写得很详细.
 
-## ��֪�����
-��֪������򵥵��˹���������ʽ֮һ�����ڽ����Ԫ�������⡣
+参考repo lihang-code
 
-## ��֪���Ĺ���ԭ��
 
-### ����˼��
-- ��֪�����ն�����루x1, x2, ..., xn����ÿ�����붼��һ����Ӧ��Ȩ�أ�w1, w2, ..., wn����
-- �����Ȩ����˲���ͣ�����ƫ���bias����
-- ���õ��Ľ�����ݸ��������ͨ��Ϊ��Ծ��������������յķ�������
 
-### ��ѧԭ��
-��֪������ѧ����ʽ������ʾ��
-- �����Ȩ�ص��������: \( \sum_{i=1}^{n} w_i \cdot x_i + b \)
-- ��Ծ������ \( f(x) = \begin{cases} 1, & \text{if } \sum_{i=1}^{n} w_i \cdot x_i + b > 0 \\ 0, & \text{otherwise} \end{cases} \)
+Bug修正
 
-### Ȩ�ظ����㷨
-��֪����ѧϰ����ʹ�ü򵥵�Ȩ�ظ����㷨��ͨ������ݶ��½���Stochastic Gradient Descent��������Ȩ�غ�ƫ��������С����
-��ÿһ��ѵ���У�����ÿ������������xi���Ͷ�Ӧ����ʵ��ǩ��yi����Ȩ�ظ��¹������£�
-- \( w_i = w_i + \alpha \cdot (y_i - \hat{y_i}) \cdot x_i \)������ \(\alpha\) ��ѧϰ�ʣ�learning rate����\(\hat{y_i}\) ��Ԥ��ֵ��\(y_i\) ����ʵֵ��
-- \( b = b + \alpha \cdot (y_i - \hat{y_i}) \)
-
-### ��ż��ʽ
-��֪������һ�ֶ�ż��ʽ��ʹ���������ݺͱ�ǩ���ڻ���dot product������Ȩ�ظ��¡�
-����Ȩ�ظ��£�����ʹ�����¹�ʽ���и��£�
-- \( w_i = w_i + \alpha \cdot (y_i - \hat{y_i}) \cdot x_i \)
-- \( b = b + \alpha \cdot (y_i - \hat{y_i}) \cdot 1 \)������ \(x_i\) �� \(1\) ���������ݺ�ƫ���ͨ���ڻ����㡣
-
-
-
-## �ܽ�
-��֪������򵥵���������ʽ֮һ��ͨ������Ȩ�غ�ƫ���������ѧϰ����ɼ򵥵Ķ�Ԫ��������
-
-## �ļ�����
-eg1.py ԭʼ����
-eg_dual_form.py ��ż��ʽ����ʼ���Ǽ�����Gram����
+# 2023/11/30 提升方法 eg8.1 lihang-code运行出来最后score=0.4 而书上是1.0 原因是lihang-code里把G3(x)弄错了(应该为positive)，关键在于 if weight_error_positive <= weight_error_nagetive:  #这里应该改成<=  代码第60行。
\ No newline at end of file
diff --git "a/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py" "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py"
new file mode 100644
index 0000000..6d71955
--- /dev/null
+++ "b/\346\217\220\345\215\207\346\226\271\346\263\225/Boosting Tree.py"	
@@ -0,0 +1,123 @@
+
+
+
+
+
+import numpy as np
+
+
+class AdaBoost:
+    def __init__(self, X, y, tol=0.05, max_iter=10):
+        # 训练数据 实例
+        self.X = X
+        # 训练数据 标签
+        self.y = y
+        # 训练中止条件 right_rate>self.tol
+        self.tol = tol
+        # 最大迭代次数
+        self.max_iter = max_iter
+        # 初始化样本权重w
+        self.w = np.full((X.shape[0]), 1 / X.shape[0])
+        self.G = []  # 弱分类器
+
+    def build_stump(self): #决策树桩
+        """
+        以带权重的分类误差最小为目标，选择最佳分类阈值
+        best_stump['dim'] 合适的特征所在维度
+        best_stump['thresh']  合适特征的阈值
+        best_stump['ineq']  树桩分类的标识lt,rt
+        """
+        m, n = np.shape(self.X)
+        # 分类误差
+        e_min = np.inf
+        # 小于分类阈值的样本属于的标签类别
+        sign = None
+        # 最优分类树桩
+        best_stump = {}
+        for i in range(n):
+            range_min = self.X[:, i].min()  # 求每一种特征的最大最小值
+            range_max = self.X[:, i].max()
+            step_size = (range_max - range_min) / n
+            for j in range(-1, int(n) + 1):
+                thresh_val = range_min + j * step_size
+                # 计算左子树和右子树的误差
+                for inequal in ['lt', 'rt']:
+                    predict_vals = self.base_estimator(self.X, i, thresh_val,
+                                                       inequal)
+                    err_arr = np.array(np.ones(m))
+                    err_arr[predict_vals.T == self.y.T] = 0
+                    weighted_error = np.dot(self.w, err_arr)
+                    if weighted_error < e_min:
+                        e_min = weighted_error
+                        sign = predict_vals
+                        best_stump['dim'] = i
+                        best_stump['thresh'] = thresh_val
+                        best_stump['ineq'] = inequal
+        return best_stump, sign, e_min
+
+    def updata_w(self, alpha, predict):
+        """
+        更新样本权重w
+        """
+        # 以下2行根据公式8.4 8.5 更新样本权重
+        P = self.w * np.exp(-alpha * self.y * predict)
+        self.w = P / P.sum()
+
+    @staticmethod
+    def base_estimator(X, dimen, threshVal, threshIneq):
+        """
+        计算单个弱分类器（决策树桩）预测输出
+        """
+        ret_array = np.ones(np.shape(X)[0])  # 预测矩阵
+        # 左叶子 ，整个矩阵的样本进行比较赋值
+        if threshIneq == 'lt':
+            ret_array[X[:, dimen] <= threshVal] = -1.0
+        else:
+            ret_array[X[:, dimen] > threshVal] = -1.0
+        return ret_array
+
+    def fit(self):
+        """
+        对训练数据进行学习
+        """
+        G = 0
+        for i in range(self.max_iter):
+            best_stump, sign, error = self.build_stump()  # 获取当前迭代最佳分类阈值
+            alpha = 1 / 2 * np.log((1 - error) / error)  # 计算本轮弱分类器的系数
+            # 弱分类器权重
+            best_stump['alpha'] = alpha
+            # 保存弱分类器
+            self.G.append(best_stump)
+            # 以下3行计算当前总分类器（之前所有弱分类器加权和）分类效率
+            G += alpha * sign
+            y_predict = np.sign(G)
+            error_rate = np.sum(
+                np.abs(y_predict - self.y)) / 2 / self.y.shape[0]
+            if error_rate < self.tol:  # 满足中止条件 则跳出循环
+                print("迭代次数:", i + 1)
+                break
+            else:
+                self.updata_w(alpha, y_predict)  # 若不满足，更新权重，继续迭代
+
+    def predict(self, X):
+        """
+        对新数据进行预测
+        """
+        m = np.shape(X)[0]
+        G = np.zeros(m)
+        for i in range(len(self.G)):
+            stump = self.G[i]
+            # 遍历每一个弱分类器，进行加权
+            _G = self.base_estimator(X, stump['dim'], stump['thresh'],
+                                     stump['ineq'])
+            alpha = stump['alpha']
+            G += alpha * _G
+        y_predict = np.sign(G)
+        return y_predict.astype(int)
+
+    def score(self, X, y):
+        """对训练效果进行评价"""
+        y_predict = self.predict(X)
+        error_rate = np.sum(np.abs(y_predict - y)) / 2 / y.shape[0]
+        return 1 - error_rate
+