Extended documentaiton

classifier-calibration · May 2, 2024 · cd048c6 · cd048c6
1 parent 851ad1d
commit cd048c6
Show file tree

Hide file tree

Showing 8 changed files with 198 additions and 484 deletions.
diff --git a/examples/xmpl_quickstart.py b/examples/xmpl_quickstart.py
@@ -0,0 +1,102 @@
+"""
+=============================
+Quickstart
+=============================
+
+This example shows a simple comparison of the expected calibration error of a
+non-calibrated method against a calibrated method.
+"""
+# Author: Miquel Perello Nieto <[email protected]>
+# License: new BSD
+
+print(__doc__)
+
+##############################################################################
+# First choose a classifier
+
+from sklearn.naive_bayes import GaussianNB
+
+clf = GaussianNB()
+
+##############################################################################
+# And a dataset
+
+from sklearn.datasets import make_classification
+from sklearn.model_selection import train_test_split
+
+X, y = make_classification(
+    n_samples=100000, n_features=20, n_informative=4, n_redundant=4,
+    random_state=42
+)
+
+from sklearn.model_selection import train_test_split
+
+X_train, X_test, Y_train, Y_test = train_test_split(X, y)
+
+##############################################################################
+# We can see how calibrated it is after training
+
+clf.fit(X_train, Y_train)
+
+n_correct = sum(clf.predict(X_test) == Y_test)
+n_test = Y_test.shape[0]
+
+print(f"The classifier gets {n_correct} correct "
+      f"predictions out of {n_test}")
+
+##############################################################################
+# We can asses the confidence expected calibration error
+
+from pycalib.metrics import conf_ECE
+
+scores = clf.predict_proba(X_test)
+cece = conf_ECE(Y_test, scores, bins=15)
+
+print(f"The classifier gets a confidence expected "
+      f"calibration error of {cece:0.2f}")
+
+##############################################################################
+# Let's look at its reliability diagram
+
+from pycalib.visualisations import plot_reliability_diagram
+
+plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True,
+                         show_bars=True, show_gaps=True)
+
+##############################################################################
+# We can see how a calibration can improve the conf-ECE
+
+from pycalib.models import IsotonicCalibration
+cal = IsotonicCalibration()
+
+##############################################################################
+# Now we can put together a probabilistic classifier with the chosen calibration
+# method
+
+from pycalib.models import CalibratedModel
+
+cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal,
+                          fit_estimator=False)
+
+##############################################################################
+# Now you can train both classifier and calibrator all together.
+
+cal_clf.fit(X_train, Y_train)
+n_correct = sum(cal_clf.predict(X_test) == Y_test)
+
+print(f"The calibrated classifier gets {n_correct} "
+      f"correct predictions out of {n_test}")
+
+scores_cal = cal_clf.predict_proba(X_test)
+cece = conf_ECE(Y_test, scores_cal, bins=15)
+
+print(f"The calibrated classifier gets a confidence "
+      f"expected calibration error of {cece:0.2f}")
+
+##############################################################################
+# Now you can train both classifier and calibrator all together.
+
+from pycalib.visualisations import plot_reliability_diagram
+
+plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True,
+                         show_bars=True, show_gaps=True)
diff --git a/pycalib/metrics.py b/pycalib/metrics.py
@@ -723,6 +723,7 @@ def full_ECE(y_true, probs, bins=15, power=1):
 
     return s
 
+
 # TODO: Speed up computation.
 def _label_resampling(probs):
     c = probs.cumsum(axis=1)
@@ -732,11 +733,13 @@ def _label_resampling(probs):
     y[range(len(probs)), choices] = 1
     return y
 
+
 # Speed up of the previous label_resampling function
 def get_one_hot(targets, nb_classes):
     res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
     return res.reshape(list(targets.shape)+[nb_classes])
 
+
 def _label_resampling_v2(probs):
     c = probs.cumsum(axis=1)
     u = np.random.rand(len(c), 1)
@@ -745,7 +748,6 @@ def _label_resampling_v2(probs):
     return y
 
 
-
 # TODO: Speed up computation.
 def _score_sampling(probs, samples=10000, ece_function=None):
 
@@ -760,7 +762,8 @@ def _score_sampling(probs, samples=10000, ece_function=None):
 
 
 # This uses all available CPUS reducing the time by this factor
-def _score_sampling_v2(probs, samples=10000, ece_function=None, processes=None):
+def _score_sampling_v2(probs, samples=10000, ece_function=None,
+                       processes=None):
 
     probs = np.array(probs)
 

diff --git a/pycalib/models/__init__.py b/pycalib/models/__init__.py
@@ -3,5 +3,4 @@
                           LogisticCalibration,
                           SigmoidCalibration,
                           BinningCalibration,
-                          CalibratedModel,
-                          CalibratedClassifierCV)
+                          CalibratedModel)