From 29e76c7ac00609845fd5abc6f07b2018e448ca08 Mon Sep 17 00:00:00 2001 From: HangLi Date: Thu, 23 Apr 2015 11:34:59 -0700 Subject: [PATCH 1/4] add more params in sklearn wrapper. --- wrapper/xgboost.py | 52 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 5bb6377c56c1..fac4da1861f3 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -753,37 +753,53 @@ class XGBModel(BaseEstimator): ---------- max_depth : int Maximum tree depth for base learners. + min_child_weight : int + minimum sum of instance weight(hessian) needed in a child. learning_rate : float Boosting learning rate (xgb's "eta") n_estimators : int Number of boosted trees to fit. silent : boolean Whether to print messages while running boosting. + objective : string + Specify the learning task and the corresponding learning objective. + subsample : float + Subsample ratio of the training instance. + colsample_bytree : float + Subsample ratio of columns when constructing each tree. + eval_metric : string + Evaluation metrics for validation data. + nthread : int + Number of parallel threads used to run xgboost. + seed : int + Random number seed. """ - def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear"): + def __init__(self, max_depth=3, min_child_weight=1, learning_rate=0.1, n_estimators=100, + silent=True, objective="reg:linear", subsample=1, colsample_bytree=1, eval_metric='error', + nthread=-1, seed=0): if not SKLEARN_INSTALLED: raise Exception('sklearn needs to be installed in order to use this module') self.max_depth = max_depth + self.min_child_weight = min_child_weight self.learning_rate = learning_rate self.silent = silent self.n_estimators = n_estimators self.objective = objective + self.subsample = subsample + self.colsample_bytree = colsample_bytree + self.eval_metric = eval_metric + self.nthread = nthread + self.seed = seed self._Booster = Booster() - def get_params(self, deep=True): - return {'max_depth': self.max_depth, - 'learning_rate': self.learning_rate, - 'n_estimators': self.n_estimators, - 'silent': self.silent, - 'objective': self.objective - } - def get_xgb_params(self): - return {'eta': self.learning_rate, - 'max_depth': self.max_depth, - 'silent': 1 if self.silent else 0, - 'objective': self.objective - } + xgb_params = self.get_params() + + xgb_params['silent'] = 1 if self.silent else 0 + + if self.nthread <= 0: + xgb_params.pop('nthread', None) + return xgb_params def fit(self, X, y): trainDmatrix = DMatrix(X, label=y) @@ -796,8 +812,12 @@ def predict(self, X): class XGBClassifier(XGBModel, ClassifierMixin): - def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic"): - super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective) + def __init__(self, max_depth=3, min_child_weight=1, learning_rate=0.1, n_estimators=100, + silent=True, objective="binary:logistic", subsample=1, colsample_bytree=1, eval_metric='error', + nthread=-1, seed=0): + super(XGBClassifier, self).__init__(max_depth, min_child_weight, learning_rate, n_estimators, + silent, objective, subsample, colsample_bytree,eval_metric, + nthread, seed) def fit(self, X, y, sample_weight=None): y_values = list(np.unique(y)) From fcb833373b3d2c027ed2fb13f12b53d31e1a14f1 Mon Sep 17 00:00:00 2001 From: HangLi Date: Thu, 23 Apr 2015 16:25:31 -0700 Subject: [PATCH 2/4] reorder parameters --- wrapper/xgboost.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index fac4da1861f3..5c8199c4d978 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -753,8 +753,6 @@ class XGBModel(BaseEstimator): ---------- max_depth : int Maximum tree depth for base learners. - min_child_weight : int - minimum sum of instance weight(hessian) needed in a child. learning_rate : float Boosting learning rate (xgb's "eta") n_estimators : int @@ -763,33 +761,38 @@ class XGBModel(BaseEstimator): Whether to print messages while running boosting. objective : string Specify the learning task and the corresponding learning objective. + + nthread : int + Number of parallel threads used to run xgboost. + min_child_weight : int + minimum sum of instance weight(hessian) needed in a child. subsample : float Subsample ratio of the training instance. colsample_bytree : float Subsample ratio of columns when constructing each tree. eval_metric : string Evaluation metrics for validation data. - nthread : int - Number of parallel threads used to run xgboost. seed : int Random number seed. """ - def __init__(self, max_depth=3, min_child_weight=1, learning_rate=0.1, n_estimators=100, - silent=True, objective="reg:linear", subsample=1, colsample_bytree=1, eval_metric='error', - nthread=-1, seed=0): + def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", + nthread=-1, min_child_weight=1, subsample=1, colsample_bytree=1, + eval_metric='error', seed=0): if not SKLEARN_INSTALLED: raise Exception('sklearn needs to be installed in order to use this module') self.max_depth = max_depth - self.min_child_weight = min_child_weight self.learning_rate = learning_rate - self.silent = silent self.n_estimators = n_estimators + self.silent = silent self.objective = objective + + self.nthread = nthread + self.min_child_weight = min_child_weight self.subsample = subsample self.colsample_bytree = colsample_bytree self.eval_metric = eval_metric - self.nthread = nthread self.seed = seed + self._Booster = Booster() def get_xgb_params(self): @@ -812,12 +815,12 @@ def predict(self, X): class XGBClassifier(XGBModel, ClassifierMixin): - def __init__(self, max_depth=3, min_child_weight=1, learning_rate=0.1, n_estimators=100, - silent=True, objective="binary:logistic", subsample=1, colsample_bytree=1, eval_metric='error', - nthread=-1, seed=0): - super(XGBClassifier, self).__init__(max_depth, min_child_weight, learning_rate, n_estimators, - silent, objective, subsample, colsample_bytree,eval_metric, - nthread, seed) + def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic", + nthread=-1, min_child_weight=1, subsample=1, colsample_bytree=1, + eval_metric='error', seed=0): + super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective, + nthread, min_child_weight, subsample, colsample_bytree, + eval_metric, seed) def fit(self, X, y, sample_weight=None): y_values = list(np.unique(y)) From 0058ebac9a9085d334d77f402cd7ed2f197e46d2 Mon Sep 17 00:00:00 2001 From: HangLi Date: Fri, 24 Apr 2015 08:50:22 -0700 Subject: [PATCH 3/4] add more params --- wrapper/xgboost.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 5c8199c4d978..7919686f629a 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -764,20 +764,27 @@ class XGBModel(BaseEstimator): nthread : int Number of parallel threads used to run xgboost. + gamma : float + Minimum loss reduction required to make a further partition on a leaf node of the tree. min_child_weight : int - minimum sum of instance weight(hessian) needed in a child. + Minimum sum of instance weight(hessian) needed in a child. + max_delta_step : int + Maximum delta step we allow each tree's weight estimation to be. subsample : float Subsample ratio of the training instance. colsample_bytree : float Subsample ratio of columns when constructing each tree. + + base_score: + The initial prediction score of all instances, global bias. eval_metric : string Evaluation metrics for validation data. seed : int Random number seed. """ def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", - nthread=-1, min_child_weight=1, subsample=1, colsample_bytree=1, - eval_metric='error', seed=0): + nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, + base_score=0.5, eval_metric='error', seed=0): if not SKLEARN_INSTALLED: raise Exception('sklearn needs to be installed in order to use this module') self.max_depth = max_depth @@ -787,9 +794,13 @@ def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True self.objective = objective self.nthread = nthread + self.gamma = gamma self.min_child_weight = min_child_weight + self.max_delta_step = max_delta_step self.subsample = subsample self.colsample_bytree = colsample_bytree + + self.base_score = base_score self.eval_metric = eval_metric self.seed = seed @@ -816,11 +827,11 @@ def predict(self, X): class XGBClassifier(XGBModel, ClassifierMixin): def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic", - nthread=-1, min_child_weight=1, subsample=1, colsample_bytree=1, - eval_metric='error', seed=0): + nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, + base_score=0.5, eval_metric='error', seed=0): super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective, - nthread, min_child_weight, subsample, colsample_bytree, - eval_metric, seed) + nthread, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree, + base_score, eval_metric, seed) def fit(self, X, y, sample_weight=None): y_values = list(np.unique(y)) From c6d2e16b610ab15f33391367a2e580fa1d73b2af Mon Sep 17 00:00:00 2001 From: HangLi Date: Fri, 24 Apr 2015 10:37:20 -0700 Subject: [PATCH 4/4] remove eval_metric --- wrapper/xgboost.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 7919686f629a..549ccbb80f1f 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -777,14 +777,12 @@ class XGBModel(BaseEstimator): base_score: The initial prediction score of all instances, global bias. - eval_metric : string - Evaluation metrics for validation data. seed : int Random number seed. """ def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, - base_score=0.5, eval_metric='error', seed=0): + base_score=0.5, seed=0): if not SKLEARN_INSTALLED: raise Exception('sklearn needs to be installed in order to use this module') self.max_depth = max_depth @@ -801,7 +799,6 @@ def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True self.colsample_bytree = colsample_bytree self.base_score = base_score - self.eval_metric = eval_metric self.seed = seed self._Booster = Booster() @@ -828,10 +825,10 @@ def predict(self, X): class XGBClassifier(XGBModel, ClassifierMixin): def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic", nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, - base_score=0.5, eval_metric='error', seed=0): + base_score=0.5, seed=0): super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective, nthread, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree, - base_score, eval_metric, seed) + base_score, seed) def fit(self, X, y, sample_weight=None): y_values = list(np.unique(y))