diff --git a/examples/xmpl_quickstart.py b/examples/xmpl_quickstart.py new file mode 100644 index 0000000..f56f759 --- /dev/null +++ b/examples/xmpl_quickstart.py @@ -0,0 +1,102 @@ +""" +============================= +Quickstart +============================= + +This example shows a simple comparison of the expected calibration error of a +non-calibrated method against a calibrated method. +""" +# Author: Miquel Perello Nieto +# License: new BSD + +print(__doc__) + +############################################################################## +# First choose a classifier + +from sklearn.naive_bayes import GaussianNB + +clf = GaussianNB() + +############################################################################## +# And a dataset + +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + +X, y = make_classification( + n_samples=100000, n_features=20, n_informative=4, n_redundant=4, + random_state=42 +) + +from sklearn.model_selection import train_test_split + +X_train, X_test, Y_train, Y_test = train_test_split(X, y) + +############################################################################## +# We can see how calibrated it is after training + +clf.fit(X_train, Y_train) + +n_correct = sum(clf.predict(X_test) == Y_test) +n_test = Y_test.shape[0] + +print(f"The classifier gets {n_correct} correct " + f"predictions out of {n_test}") + +############################################################################## +# We can asses the confidence expected calibration error + +from pycalib.metrics import conf_ECE + +scores = clf.predict_proba(X_test) +cece = conf_ECE(Y_test, scores, bins=15) + +print(f"The classifier gets a confidence expected " + f"calibration error of {cece:0.2f}") + +############################################################################## +# Let's look at its reliability diagram + +from pycalib.visualisations import plot_reliability_diagram + +plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True, + show_bars=True, show_gaps=True) + +############################################################################## +# We can see how a calibration can improve the conf-ECE + +from pycalib.models import IsotonicCalibration +cal = IsotonicCalibration() + +############################################################################## +# Now we can put together a probabilistic classifier with the chosen calibration +# method + +from pycalib.models import CalibratedModel + +cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal, + fit_estimator=False) + +############################################################################## +# Now you can train both classifier and calibrator all together. + +cal_clf.fit(X_train, Y_train) +n_correct = sum(cal_clf.predict(X_test) == Y_test) + +print(f"The calibrated classifier gets {n_correct} " + f"correct predictions out of {n_test}") + +scores_cal = cal_clf.predict_proba(X_test) +cece = conf_ECE(Y_test, scores_cal, bins=15) + +print(f"The calibrated classifier gets a confidence " + f"expected calibration error of {cece:0.2f}") + +############################################################################## +# Now you can train both classifier and calibrator all together. + +from pycalib.visualisations import plot_reliability_diagram + +plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True, + show_bars=True, show_gaps=True) diff --git a/pycalib/metrics.py b/pycalib/metrics.py index c4f522e..3c8d000 100644 --- a/pycalib/metrics.py +++ b/pycalib/metrics.py @@ -723,6 +723,7 @@ def full_ECE(y_true, probs, bins=15, power=1): return s + # TODO: Speed up computation. def _label_resampling(probs): c = probs.cumsum(axis=1) @@ -732,11 +733,13 @@ def _label_resampling(probs): y[range(len(probs)), choices] = 1 return y + # Speed up of the previous label_resampling function def get_one_hot(targets, nb_classes): res = np.eye(nb_classes)[np.array(targets).reshape(-1)] return res.reshape(list(targets.shape)+[nb_classes]) + def _label_resampling_v2(probs): c = probs.cumsum(axis=1) u = np.random.rand(len(c), 1) @@ -745,7 +748,6 @@ def _label_resampling_v2(probs): return y - # TODO: Speed up computation. def _score_sampling(probs, samples=10000, ece_function=None): @@ -760,7 +762,8 @@ def _score_sampling(probs, samples=10000, ece_function=None): # This uses all available CPUS reducing the time by this factor -def _score_sampling_v2(probs, samples=10000, ece_function=None, processes=None): +def _score_sampling_v2(probs, samples=10000, ece_function=None, + processes=None): probs = np.array(probs) diff --git a/pycalib/models/__init__.py b/pycalib/models/__init__.py index 3fc1b34..2b26c9f 100644 --- a/pycalib/models/__init__.py +++ b/pycalib/models/__init__.py @@ -3,5 +3,4 @@ LogisticCalibration, SigmoidCalibration, BinningCalibration, - CalibratedModel, - CalibratedClassifierCV) + CalibratedModel) diff --git a/pycalib/models/calibrators.py b/pycalib/models/calibrators.py index 63efe91..eb07caf 100644 --- a/pycalib/models/calibrators.py +++ b/pycalib/models/calibrators.py @@ -1,8 +1,11 @@ import numpy as np +from copy import deepcopy + from scipy.special import expit from sklearn.base import clone + from sklearn.utils import check_X_y, indexable from sklearn.linear_model import LogisticRegression from sklearn.calibration import _SigmoidCalibration @@ -21,6 +24,8 @@ from betacal import BetaCalibration +from typing import Literal + class _DummyCalibration(BaseEstimator, RegressorMixin): """Dummy Calibration model. The purpose of this class is to give @@ -89,6 +94,30 @@ def log_encode(x): class LogisticCalibration(LogisticRegression): + """Probability calibration with Logistic Regression aka Platt's scaling + + Parameters + ---------- + C: integer + solver: str 'lbfgs' + multi_class: str 'multinomial' + log_transform: boolean True + + Attributes + ---------- + classes_ : array, shape (n_classes) + The class labels. + + calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit") + The list of calibrated classifiers, one for each cross-validation fold, + which has been fitted on all but the validation fold and calibrated + on the validation fold. + + References + ---------- + .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to + Regularized Likelihood Methods, J. Platt, (1999) + """ def __init__(self, C=1.0, solver='lbfgs', multi_class='multinomial', log_transform=True): self.C_grid = C @@ -144,15 +173,44 @@ def predict(self, *args, **kwargs): class BinningCalibration(BaseEstimator, RegressorMixin): - def __init__(self, n_bins=10, strategy='uniform', alpha=1.0): - ''' - alpha : Laplace smoothing (x + a)/(N + 2a) - n_bins: Number of bins - stragegy: - - uniform: for equal width bins - - quantile: for equal frequency bins - - kmeans: for each bin with same nearest center to a 1D k-means - ''' + """Probability calibration with Binning calibration. + + Parameters + ---------- + n_bins: integer or list of integers + If integer, the number of bins to create in the score space in order to compute the + true fraction of positives during the training. + If a list of integers, a BinningCalibration method will be fitted for + each number of bins, and the best calibrator evaluated with the + validation set will be selected as final calibrator. + + + strategy: str {'uniform', 'quantile', 'kmeans'} + If uniform: for equal width bins + If quantile: for equal frequency bins + If kmeans: for each bin with same nearest center to a 1D k-means + + alpha: float + Laplace smoothing (x + a)/(N + 2a) + + Attributes + ---------- + classes_ : array, shape (n_classes) + The class labels. + + calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit") + The list of calibrated classifiers, one for each cross-validation fold, + which has been fitted on all but the validation fold and calibrated + on the validation fold. + + References + ---------- + .. [1] Obtaining calibrated probability estimates from decision trees + and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 + """ + _STRATEGIES = Literal["uniform", "quantile", "kmeans"] + + def __init__(self, n_bins=10, strategy: _STRATEGIES = 'uniform', alpha=1.0): self.strategy = strategy self.n_bins = n_bins self.n_bins_grid = n_bins @@ -225,19 +283,22 @@ def predict(self, scores, *args, **kwargs): class CalibratedModel(BaseEstimator, ClassifierMixin): + ''' Initialize a Calibrated model (classifier + calibrator) + + Parameters + ---------- + base_estimator : instance BaseEstimator + The classifier whose output decision function needs to be calibrated + to offer more accurate predict_proba outputs. If cv=prefit, the + classifier must have been fit already on data. + + calibrator : instance BaseEstimator + The calibrator to use. + ''' def __init__(self, base_estimator=None, calibrator=None, fit_estimator=True): - ''' Initialize a Calibrated model (classifier + calibrator) - - Parameters - ---------- - base_estimator : estimator - Classifier instance - calibrator : estimator - Calibrator instance - ''' self.calibrator = clone(calibrator) - self.base_estimator = clone(base_estimator) + self.base_estimator = deepcopy(base_estimator) self.fit_estimator = fit_estimator self.binary = False @@ -329,455 +390,3 @@ def predict(self, X): check_is_fitted(self, ["calibrator"]) return np.argmax(self.predict_proba(X), axis=1) - - -class CalibratedClassifierCV(BaseEstimator, ClassifierMixin): - """Probability calibration with isotonic regression, sigmoid or beta. - - With this class, the base_estimator is fit on the train set of the - cross-validation generator and the test set is used for calibration. - The probabilities for each of the folds are then averaged - for prediction. In case cv="prefit" is passed to __init__, - it is assumed that base_estimator has been - fitted already and all data is used for calibration. Note that - data for fitting the classifier and for calibrating it must be disjoint. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - base_estimator : instance BaseEstimator - The classifier whose output decision function needs to be calibrated - to offer more accurate predict_proba outputs. If cv=prefit, the - classifier must have been fit already on data. - - method : None, 'sigmoid', 'isotonic', 'beta', 'beta_am' or 'beta_ab' - The method to use for calibration. Can be 'sigmoid' which - corresponds to Platt's method, 'isotonic' which is a - non-parameteric approach or 'beta', 'beta_am' or 'beta_ab' which - correspond to three different beta calibration methods. It is - not advised to use isotonic calibration with too few calibration - samples ``(<<1000)`` since it tends to overfit. - Use beta models in this case. - - cv : integer, cross-validation generator, iterable or "prefit", optional - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - - None, to use the default 3-fold cross-validation, - - integer, to specify the number of folds. - - An object to be used as a cross-validation generator. - - An iterable yielding train/test splits. - - For integer/None inputs, if ``y`` is binary or multiclass, - :class:`StratifiedKFold` used. If ``y`` is neither binary nor - multiclass, :class:`KFold` is used. - - Refer :ref:`User Guide ` for the various - cross-validation strategies that can be used here. - - If "prefit" is passed, it is assumed that base_estimator has been - fitted already and all data is used for calibration. - - Attributes - ---------- - classes_ : array, shape (n_classes) - The class labels. - - calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit") - The list of calibrated classifiers, one for each cross-validation fold, - which has been fitted on all but the validation fold and calibrated - on the validation fold. - - References - ---------- - .. [1] Obtaining calibrated probability estimates from decision trees - and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 - - .. [2] Transforming Classifier Scores into Accurate Multiclass - Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) - - .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to - Regularized Likelihood Methods, J. Platt, (1999) - - .. [4] Predicting Good Probabilities with Supervised Learning, - A. Niculescu-Mizil & R. Caruana, ICML 2005 - """ - def __init__(self, base_estimator=None, method=None, cv=3, - score_type=None): - self.base_estimator = base_estimator - self.method = method - self.cv = cv - self.score_type = score_type - - def fit(self, X, y, sample_weight=None): - """Fit the calibrated model - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - Training data. - - y : array-like, shape (n_samples,) - Target values. - - sample_weight : array-like, shape = [n_samples] or None - Sample weights. If None, then samples are equally weighted. - - Returns - ------- - self : object - Returns an instance of self. - """ - X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'], - force_all_finite=False) - X, y = indexable(X, y) - lb = LabelBinarizer().fit(y) - self.classes_ = lb.classes_ - - # Check that each cross-validation fold can have at least one - # example per class - n_folds = self.cv if isinstance(self.cv, int) \ - else self.cv.n_folds if hasattr(self.cv, "n_folds") else None - if n_folds and \ - np.any([np.sum(y == class_) < n_folds for class_ in self.classes_]): - raise ValueError("Requesting %d-fold cross-validation but provided" - " less than %d examples for at least one class." - % (n_folds, n_folds)) - - self.calibrated_classifiers_ = [] - if self.base_estimator is None: - # we want all classifiers that don't expose a random_state - # to be deterministic (and we don't want to expose this one). - base_estimator = LinearSVC(random_state=0) - else: - base_estimator = self.base_estimator - - if self.cv == "prefit": - calibrated_classifier = _CalibratedClassifier( - base_estimator, method=self.method, - score_type=self.score_type) - if sample_weight is not None: - calibrated_classifier.fit(X, y, sample_weight) - else: - calibrated_classifier.fit(X, y) - self.calibrated_classifiers_.append(calibrated_classifier) - else: - cv = check_cv(self.cv, y, classifier=True) - fit_parameters = signature(base_estimator.fit).parameters - estimator_name = type(base_estimator).__name__ - if (sample_weight is not None - and "sample_weight" not in fit_parameters): - warnings.warn("%s does not support sample_weight. Samples" - " weights are only used for the calibration" - " itself." % estimator_name) - base_estimator_sample_weight = None - else: - base_estimator_sample_weight = sample_weight - for train, test in cv.split(X, y): - this_estimator = clone(base_estimator) - if base_estimator_sample_weight is not None: - this_estimator.fit( - X[train], y[train], - sample_weight=base_estimator_sample_weight[train]) - else: - this_estimator.fit(X[train], y[train]) - - calibrated_classifier = _CalibratedClassifier( - this_estimator, method=self.method, - score_type=self.score_type) - if sample_weight is not None: - calibrated_classifier.fit(X[test], y[test], - sample_weight[test]) - else: - calibrated_classifier.fit(X[test], y[test]) - self.calibrated_classifiers_.append(calibrated_classifier) - - return self - - def predict_proba(self, X): - """Posterior probabilities of classification - - This function returns posterior probabilities of classification - according to each class on an array of test vectors X. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - The samples. - - Returns - ------- - C : array, shape (n_samples, n_classes) - The predicted probas. - """ - check_is_fitted(self, ["classes_", "calibrated_classifiers_"]) - X = check_array(X, accept_sparse=['csc', 'csr', 'coo'], - force_all_finite=False) - # Compute the arithmetic mean of the predictions of the calibrated - # classfiers - mean_proba = np.zeros((X.shape[0], len(self.classes_))) - for calibrated_classifier in self.calibrated_classifiers_: - proba = calibrated_classifier.predict_proba(X) - mean_proba += proba - - mean_proba /= len(self.calibrated_classifiers_) - - return mean_proba - - def calibrate_scores(self, df): - """Posterior probabilities of classification - - This function returns posterior probabilities of classification - according to each class on an array of test vectors X. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - The samples. - - Returns - ------- - C : array, shape (n_samples, n_classes) - The predicted probas. - """ - check_is_fitted(self, ["classes_", "calibrated_classifiers_"]) - # Compute the arithmetic mean of the predictions of the calibrated - # classifiers - df = df.reshape(-1, 1) - mean_proba = np.zeros((len(df), len(self.classes_))) - for calibrated_classifier in self.calibrated_classifiers_: - proba = calibrated_classifier.calibrate_scores(df) - mean_proba += proba - - mean_proba /= len(self.calibrated_classifiers_) - - return mean_proba - - def predict(self, X): - """Predict the target of new samples. Can be different from the - prediction of the uncalibrated classifier. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - The samples. - - Returns - ------- - C : array, shape (n_samples,) - The predicted class. - """ - check_is_fitted(self, ["classes_", "calibrated_classifiers_"]) - return self.classes_[np.argmax(self.predict_proba(X), axis=1)] - - -class _CalibratedClassifier(object): - """Probability calibration with isotonic regression or sigmoid. - - It assumes that base_estimator has already been fit, and trains the - calibration on the input set of the fit function. Note that this class - should not be used as an estimator directly. Use CalibratedClassifierCV - with cv="prefit" instead. - - Parameters - ---------- - base_estimator : instance BaseEstimator - The classifier whose output decision function needs to be calibrated - to offer more accurate predict_proba outputs. No default value since - it has to be an already fitted estimator. - - method : 'sigmoid' | 'isotonic' | 'beta' | 'beta_am' | 'beta_ab' - The method to use for calibration. Can be 'sigmoid' which - corresponds to Platt's method, 'isotonic' which is a - non-parameteric approach based on isotonic regression or 'beta', - 'beta_am' or 'beta_ab' which correspond to beta calibration methods. - - References - ---------- - .. [1] Obtaining calibrated probability estimates from decision trees - and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 - - .. [2] Transforming Classifier Scores into Accurate Multiclass - Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) - - .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to - Regularized Likelihood Methods, J. Platt, (1999) - - .. [4] Predicting Good Probabilities with Supervised Learning, - A. Niculescu-Mizil & R. Caruana, ICML 2005 - """ - def __init__(self, base_estimator, method='beta', - score_type=None): - self.base_estimator = base_estimator - self.method = method - self.score_type = score_type - - def _preproc(self, X): - n_classes = len(self.classes_) - if self.score_type is None: - if hasattr(self.base_estimator, "decision_function"): - df = self.base_estimator.decision_function(X) - if df.ndim == 1: - df = df[:, np.newaxis] - elif hasattr(self.base_estimator, "predict_proba"): - df = self.base_estimator.predict_proba(X) - if n_classes == 2: - df = df[:, 1:] - else: - raise RuntimeError('classifier has no decision_function or ' - 'predict_proba method.') - else: - if self.score_type == "sigmoid": - df = self.base_estimator.decision_function(X) - df = expit(df) - if df.ndim == 1: - df = df[:, np.newaxis] - else: - if hasattr(self.base_estimator, self.score_type): - df = getattr(self.base_estimator, self.score_type)(X) - if self.score_type == "decision_function": - if df.ndim == 1: - df = df[:, np.newaxis] - elif self.score_type == "predict_proba": - if n_classes == 2: - df = df[:, 1:] - else: - raise RuntimeError('classifier has no ' + self.score_type - + 'method.') - - idx_pos_class = np.arange(df.shape[1]) - - return df, idx_pos_class - - def fit(self, X, y, sample_weight=None): - """Calibrate the fitted model - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - Training data. - - y : array-like, shape (n_samples,) - Target values. - - sample_weight : array-like, shape = [n_samples] or None - Sample weights. If None, then samples are equally weighted. - - Returns - ------- - self : object - Returns an instance of self. - """ - lb = LabelBinarizer() - Y = lb.fit_transform(y) - self.classes_ = lb.classes_ - - df, idx_pos_class = self._preproc(X) - self.calibrators_ = [] - - for k, this_df in zip(idx_pos_class, df.T): - if self.method is None: - calibrator = _DummyCalibration() - elif self.method == 'isotonic': - calibrator = IsotonicRegression(out_of_bounds='clip') - elif self.method == 'sigmoid': - calibrator = _SigmoidCalibration() - # TODO Remove BetaCalibration - elif self.method == 'beta': - calibrator = BetaCalibration(parameters="abm") - elif self.method == 'beta_am': - calibrator = BetaCalibration(parameters="am") - elif self.method == 'beta_ab': - calibrator = BetaCalibration(parameters="ab") - else: - raise ValueError('method should be None, "sigmoid", ' - '"isotonic", "beta", "beta2" or "beta05". ' - 'Got %s.' % self.method) - calibrator.fit(this_df, Y[:, k], sample_weight) - self.calibrators_.append(calibrator) - - return self - - def predict_proba(self, X): - """Posterior probabilities of classification - - This function returns posterior probabilities of classification - according to each class on an array of test vectors X. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - The samples. - - Returns - ------- - C : array, shape (n_samples, n_classes) - The predicted probas. Can be exact zeros. - """ - n_classes = len(self.classes_) - proba = np.zeros((X.shape[0], n_classes)) - - df, idx_pos_class = self._preproc(X) - for k, this_df, calibrator in \ - zip(idx_pos_class, df.T, self.calibrators_): - if n_classes == 2: - k += 1 - proba[:, k] = calibrator.predict(this_df) - - # Normalize the probabilities - if n_classes == 2: - proba[:, 0] = 1. - proba[:, 1] - else: - proba /= np.sum(proba, axis=1)[:, np.newaxis] - - # XXX : for some reason all probas can be 0 - proba[np.isnan(proba)] = 1. / n_classes - - # Deal with cases where the predicted probability minimally exceeds 1.0 - proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0 - - return proba - - def calibrate_scores(self, df): - """Posterior probabilities of classification - - This function returns posterior probabilities of classification - according to each class on an array of test vectors X. - - Parameters - ---------- - X : array-like, shape (n_samples, n_features) - The samples. - - Returns - ------- - C : array, shape (n_samples, n_classes) - The predicted probas. Can be exact zeros. - """ - n_classes = len(self.classes_) - proba = np.zeros((len(df), n_classes)) - idx_pos_class = [0] - - for k, this_df, calibrator in \ - zip(idx_pos_class, df.T, self.calibrators_): - if n_classes == 2: - k += 1 - pro = calibrator.predict(this_df) - if np.any(np.isnan(pro)): - pro[np.isnan(pro)] = calibrator.predict(this_df[np.isnan( - pro)] + 1e-300) - proba[:, k] = pro - - # Normalize the probabilities - if n_classes == 2: - proba[:, 0] = 1. - proba[:, 1] - else: - proba /= np.sum(proba, axis=1)[:, np.newaxis] - - # XXX : for some reason all probas can be 0 - proba[np.isnan(proba)] = 1. / n_classes - - # Deal with cases where the predicted probability minimally exceeds 1.0 - proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0 - return proba diff --git a/pycalib/stats.py b/pycalib/stats.py index d9102cc..48a28d2 100644 --- a/pycalib/stats.py +++ b/pycalib/stats.py @@ -13,13 +13,16 @@ def compute_friedmanchisquare(table: pd.DataFrame) -> TestResult: """ Compute Friedman test for repeated samples + Example: - n wine judges each rate k different wines. Are any of the k wines ranked consistently higher or lower than the others? + Our Calibration case: - n datasets each rate k different calibration methods. Are any of the k calibration methods ranked consistently higher or lower than the others? + This will output a statistic and a p-value SciPy does the following: - k: is the number of parameters passed to the function diff --git a/pycalib/tests/test_metrics.py b/pycalib/tests/test_metrics.py index b758ee6..b406456 100644 --- a/pycalib/tests/test_metrics.py +++ b/pycalib/tests/test_metrics.py @@ -139,7 +139,6 @@ def test_conf_mce(self): mce = MCE(Y, S, bins=2) self.assertAlmostEqual(mce, 0.4 - 1/4) - def test_calibrated_p_ece(self): p = np.random.rand(5000, 3) p /= p.sum(axis=1)[:, None] @@ -154,13 +153,13 @@ def test_uncalibrated_p_ece(self): p = np.random.rand(1000, 3) p /= p.sum(axis=1)[:, None] y = np.eye(3)[np.random.choice([0, 1, 2], size=p.shape[0])] - uncalibrated_pECE = pECE(y, p, samples=1000, ece_function=classwise_ECE) + uncalibrated_pECE = pECE(y, p, samples=1000, + ece_function=classwise_ECE) self.assertLess(uncalibrated_pECE, 0.04) uncalibrated_pECE = pECE(y, p, samples=1000, ece_function=conf_ECE) self.assertLess(uncalibrated_pECE, 0.04) - def main(): unittest.main() diff --git a/pycalib/visualisations/barycentric.py b/pycalib/visualisations/barycentric.py index 4cf95e1..61e9ae3 100644 --- a/pycalib/visualisations/barycentric.py +++ b/pycalib/visualisations/barycentric.py @@ -97,6 +97,7 @@ def draw_pdf_contours(dist, **kwargs): draw_func_contours(dist.pdf, **kwargs) +# TODO Speed up function. def draw_func_contours(func, labels=None, nlevels=200, subdiv=8, fig=None, ax=None, grid=True, **kwargs): ''' @@ -128,7 +129,7 @@ def draw_func_contours(func, labels=None, nlevels=200, subdiv=8, fig=None, # contour = ax.tricontourf(trimesh, z, nlevels, **kwargs) # contour = ax.tricontourf(trimesh, z, nlevels, extend='both') is_nan = ~np.isfinite(z) - #z[is_nan] = 0 + # z[is_nan] = 0 nan_id = np.where(is_nan)[0] triangles_mask = np.zeros(trimesh.triangles.shape[0]) for ni in nan_id: diff --git a/pycalib/visualisations/plot.py b/pycalib/visualisations/plot.py index 00e4646..7f91883 100644 --- a/pycalib/visualisations/plot.py +++ b/pycalib/visualisations/plot.py @@ -386,9 +386,9 @@ def plot_reliability_diagram(labels, scores, legend=None, if show_histogram: divider = make_axes_locatable(ax1) ax2 = divider.append_axes("bottom", size="20%", pad=0.1, - sharex=ax1) + sharex=ax1) - #ax2 = fig.add_subplot(spec[n_columns + i], + # ax2 = fig.add_subplot(spec[n_columns + i], # label='{}'.format(i)) for j, score in enumerate(scores_list): ax1.set_xticklabels([]) @@ -545,8 +545,7 @@ def plot_binary_reliability_diagram_gaps(y_true, p_pred, n_bins=15, title=None, ax.set_xticklabels([]) divider = make_axes_locatable(ax) - ax2 = divider.append_axes("bottom", size="20%", pad=0.1, - sharex=ax) + ax2 = divider.append_axes("bottom", size="20%", pad=0.1, sharex=ax) ax2.hist(p_pred, range=(0, 1), bins=n_bins, @@ -783,7 +782,6 @@ def plot_df_to_heatmap(df, title=None, figsize=None, annotate=True, return fig - def plot_calibration_map(scores_set, prob, legend_set, original_first=False, alpha=1, **kwargs): fig_calibration_map = plt.figure('calibration_map')