Skip to content

Commit

Permalink
Extended documentaiton
Browse files Browse the repository at this point in the history
  • Loading branch information
perellonieto committed May 2, 2024
1 parent 851ad1d commit cd048c6
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 484 deletions.
102 changes: 102 additions & 0 deletions examples/xmpl_quickstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""
=============================
Quickstart
=============================
This example shows a simple comparison of the expected calibration error of a
non-calibrated method against a calibrated method.
"""
# Author: Miquel Perello Nieto <[email protected]>
# License: new BSD

print(__doc__)

##############################################################################
# First choose a classifier

from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()

##############################################################################
# And a dataset

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(
n_samples=100000, n_features=20, n_informative=4, n_redundant=4,
random_state=42
)

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, y)

##############################################################################
# We can see how calibrated it is after training

clf.fit(X_train, Y_train)

n_correct = sum(clf.predict(X_test) == Y_test)
n_test = Y_test.shape[0]

print(f"The classifier gets {n_correct} correct "
f"predictions out of {n_test}")

##############################################################################
# We can asses the confidence expected calibration error

from pycalib.metrics import conf_ECE

scores = clf.predict_proba(X_test)
cece = conf_ECE(Y_test, scores, bins=15)

print(f"The classifier gets a confidence expected "
f"calibration error of {cece:0.2f}")

##############################################################################
# Let's look at its reliability diagram

from pycalib.visualisations import plot_reliability_diagram

plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True,
show_bars=True, show_gaps=True)

##############################################################################
# We can see how a calibration can improve the conf-ECE

from pycalib.models import IsotonicCalibration
cal = IsotonicCalibration()

##############################################################################
# Now we can put together a probabilistic classifier with the chosen calibration
# method

from pycalib.models import CalibratedModel

cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal,
fit_estimator=False)

##############################################################################
# Now you can train both classifier and calibrator all together.

cal_clf.fit(X_train, Y_train)
n_correct = sum(cal_clf.predict(X_test) == Y_test)

print(f"The calibrated classifier gets {n_correct} "
f"correct predictions out of {n_test}")

scores_cal = cal_clf.predict_proba(X_test)
cece = conf_ECE(Y_test, scores_cal, bins=15)

print(f"The calibrated classifier gets a confidence "
f"expected calibration error of {cece:0.2f}")

##############################################################################
# Now you can train both classifier and calibrator all together.

from pycalib.visualisations import plot_reliability_diagram

plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True,
show_bars=True, show_gaps=True)
7 changes: 5 additions & 2 deletions pycalib/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ def full_ECE(y_true, probs, bins=15, power=1):

return s


# TODO: Speed up computation.
def _label_resampling(probs):
c = probs.cumsum(axis=1)
Expand All @@ -732,11 +733,13 @@ def _label_resampling(probs):
y[range(len(probs)), choices] = 1
return y


# Speed up of the previous label_resampling function
def get_one_hot(targets, nb_classes):
res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
return res.reshape(list(targets.shape)+[nb_classes])


def _label_resampling_v2(probs):
c = probs.cumsum(axis=1)
u = np.random.rand(len(c), 1)
Expand All @@ -745,7 +748,6 @@ def _label_resampling_v2(probs):
return y



# TODO: Speed up computation.
def _score_sampling(probs, samples=10000, ece_function=None):

Expand All @@ -760,7 +762,8 @@ def _score_sampling(probs, samples=10000, ece_function=None):


# This uses all available CPUS reducing the time by this factor
def _score_sampling_v2(probs, samples=10000, ece_function=None, processes=None):
def _score_sampling_v2(probs, samples=10000, ece_function=None,
processes=None):

probs = np.array(probs)

Expand Down
3 changes: 1 addition & 2 deletions pycalib/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@
LogisticCalibration,
SigmoidCalibration,
BinningCalibration,
CalibratedModel,
CalibratedClassifierCV)
CalibratedModel)
Loading

0 comments on commit cd048c6

Please sign in to comment.