Skip to content

Commit

Permalink
working with the xgboost libraries.
Browse files Browse the repository at this point in the history
TODO:
- check results times or probabilities
- integrate in patient-similarity notebook
  • Loading branch information
Raul committed Jul 4, 2022
1 parent f439d32 commit 28a2732
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 8 deletions.
2 changes: 1 addition & 1 deletion xgbse/_debiased_bce.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def fit(
)
self.feature_importances_ = self.bst.get_score()
# predicting and encoding leaves
self.encoder = OneHotEncoder()
self.encoder = OneHotEncoder(handle_unknown="ignore")
leaves = self.bst.predict(
dtrain, pred_leaf=True, iteration_range=(0, self.bst.best_iteration + 1)
)
Expand Down
8 changes: 4 additions & 4 deletions xgbse/_kaplan_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,11 @@ def predict(self, X, return_ci=False, return_interval_probs=False, enable_catego
leaves = self.bst.predict(
d_matrix, pred_leaf=True, iteration_range=(0, self.bst.best_iteration + 1)
)

leaves_index = [x for x in leaves if x in self._train_survival.index]
# searching for kaplan meier curves in leaves
preds_df = self._train_survival.loc[leaves].reset_index(drop=True)
upper_ci = self._train_upper_ci.loc[leaves].reset_index(drop=True)
lower_ci = self._train_lower_ci.loc[leaves].reset_index(drop=True)
preds_df = self._train_survival.loc[leaves_index].reset_index(drop=True)
upper_ci = self._train_upper_ci.loc[leaves_index].reset_index(drop=True)
lower_ci = self._train_lower_ci.loc[leaves_index].reset_index(drop=True)

if return_ci and return_interval_probs:
raise ValueError(
Expand Down
11 changes: 9 additions & 2 deletions xgbse/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def fit(self, X, y, **kwargs):

return self

def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False):
def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=False, enable_categorical=False):

"""
Predicts survival as given by the base estimator. A survival function, its upper and lower
Expand All @@ -73,6 +73,13 @@ def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=Fals
ci_width (Float): width of confidence interval
enable_categorical: boolean, optional
.. versionadded:: 1.3.0
.. note:: This parameter is experimental
Experimental support of specializing for categorical features. Do not set
to True unless you are interested in development. Also, JSON/UBJSON
serialization format is required.
Returns:
([(pd.DataFrame, np.array, np.array), pd.DataFrame]):
preds_df: A dataframe of survival probabilities
Expand All @@ -92,7 +99,7 @@ def predict(self, X, return_ci=False, ci_width=0.683, return_interval_probs=Fals
for estimator in self.estimators_:

temp_preds = estimator.predict(
X, return_interval_probs=return_interval_probs
X, return_interval_probs=return_interval_probs, enable_categorical=enable_categorical
)
preds_list.append(temp_preds)

Expand Down
2 changes: 1 addition & 1 deletion xgbse/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def approx_brier_score(y_true, survival, aggregate="mean"):
# adding censoring distribution survival at event
event_time_windows = _match_times_to_windows(times, survival.columns)
scoring_df["cens_at_event"] = censoring_dist[event_time_windows].iloc[0].values

# TODO Something is broken when use sklearn.model_selection.cross_val_score
# list of window results
window_results = []

Expand Down

0 comments on commit 28a2732

Please sign in to comment.