Merge pull request dmlc#254 from lihang00/master

Python: add more params in sklearn wrapper.
dennysem · Apr 24, 2015 · f28a7a0 · f28a7a0
2 parents 1d5b4e1 + c6d2e16
commit f28a7a0
Showing 1 changed file with 48 additions and 17 deletions.
diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
@@ -781,31 +781,58 @@ class XGBModel(BaseEstimator):
         Number of boosted trees to fit.
     silent : boolean
         Whether to print messages while running boosting.
+    objective : string
+        Specify the learning task and the corresponding learning objective.
+
+    nthread : int
+        Number of parallel threads used to run xgboost.
+    gamma : float
+        Minimum loss reduction required to make a further partition on a leaf node of the tree.
+    min_child_weight : int
+        Minimum sum of instance weight(hessian) needed in a child.        
+    max_delta_step : int
+        Maximum delta step we allow each tree's weight estimation to be.
+    subsample : float
+        Subsample ratio of the training instance.
+    colsample_bytree : float
+        Subsample ratio of columns when constructing each tree.
+
+    base_score:
+        The initial prediction score of all instances, global bias.
+    seed : int
+        Random number seed.
     """
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear"):
+    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", 
+                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, 
+                 base_score=0.5, seed=0):
         if not SKLEARN_INSTALLED:
             raise Exception('sklearn needs to be installed in order to use this module')
         self.max_depth = max_depth
         self.learning_rate = learning_rate
-        self.silent = silent
         self.n_estimators = n_estimators
+        self.silent = silent
         self.objective = objective
+
+        self.nthread = nthread
+        self.gamma = gamma
+        self.min_child_weight = min_child_weight
+        self.max_delta_step = max_delta_step
+        self.subsample = subsample
+        self.colsample_bytree = colsample_bytree
+
+        self.base_score = base_score
+        self.seed = seed
+
         self._Booster = Booster()
 
-    def get_params(self, deep=True):
-        return {'max_depth': self.max_depth,
-                'learning_rate': self.learning_rate,
-                'n_estimators': self.n_estimators,
-                'silent': self.silent,
-                'objective': self.objective
-                }
-
     def get_xgb_params(self):
-        return {'eta': self.learning_rate,
-                'max_depth': self.max_depth,
-                'silent': 1 if self.silent else 0,
-                'objective': self.objective
-                }
+        xgb_params = self.get_params()
+
+        xgb_params['silent'] = 1 if self.silent else 0
+
+        if self.nthread <= 0:
+            xgb_params.pop('nthread', None)
+        return xgb_params
 
     def fit(self, X, y):
         trainDmatrix = DMatrix(X, label=y)
@@ -818,8 +845,12 @@ def predict(self, X):
 
 
 class XGBClassifier(XGBModel, ClassifierMixin):
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic"):
-        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective)
+    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic", 
+                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, 
+                 base_score=0.5, seed=0):
+        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective, 
+                                            nthread, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree,
+                                            base_score, seed)
 
     def fit(self, X, y, sample_weight=None):
         y_values = list(np.unique(y))