Merge branch 'fix/multiclass' into 'dev'

Fix Issues with Multiclass Scoring See merge request cdd/QSPRpred!45
CDDLeiden · Feb 20, 2023 · f6e9f95 · f6e9f95
2 parents be5d260 + 3e1a125
commit f6e9f95
Show file tree

Hide file tree

Showing 3 changed files with 128 additions and 110 deletions.
diff --git a/qsprpred/models/interfaces.py b/qsprpred/models/interfaces.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import pandas as pd
+from sklearn import metrics
 
 from qsprpred.data.data import QSPRDataset, MoleculeTable
 from qsprpred.data.utils.descriptorcalculator import DescriptorsCalculator
@@ -86,6 +87,40 @@ def __str__(self):
         """Return the name of the model and the underlying class as the identifier."""
 
         return f"{self.name} ({self.model.__class__.__name__ if self.model else self.alg.__class__.__name__ if self.alg else 'None'})"
+
+    # Adding scoring functions available for hyperparam optimization:
+    @property
+    def _needs_proba_to_score(self):
+        if self.task == ModelTasks.CLASSIFICATION:
+            return ['average_precision', 'neg_brier_score', 'neg_log_loss', 'roc_auc',
+                    'roc_auc_ovo', 'roc_auc_ovo_weighted', 'roc_auc_ovr', 'roc_auc_ovr_weighted']
+        elif self.task == ModelTasks.REGRESSION:
+            return []
+
+    @property
+    def _needs_discrete_to_score(self):
+        if self.task == ModelTasks.CLASSIFICATION:
+            return ['accuracy','balanced_accuracy', 'top_k_accuracy', 'f1', 'f1_micro',
+                    'f1_macro', 'f1_weighted', 'f1_samples', 'precision', 'precision_micro',
+                    'precision_macro', 'precision_weighted', 'precision_samples', 'recall',
+                    'recall_micro', 'recall_macro', 'recall_weighted', 'recall_samples']
+        elif self.task == ModelTasks.REGRESSION:
+            return []
+
+    @property
+    def _supported_scoring(self):
+        if self.task == ModelTasks.CLASSIFICATION:
+            return ['average_precision', 'neg_brier_score', 'neg_log_loss', 'roc_auc',
+                    'roc_auc_ovo', 'roc_auc_ovo_weighted', 'roc_auc_ovr', 'roc_auc_ovr_weighted'
+                    'accuracy','balanced_accuracy', 'top_k_accuracy', 'f1', 'f1_micro',
+                    'f1_macro', 'f1_weighted', 'f1_samples', 'precision', 'precision_micro',
+                    'precision_macro', 'precision_weighted', 'precision_samples', 'recall',
+                    'recall_micro', 'recall_macro', 'recall_weighted', 'recall_samples']
+        elif self.task == ModelTasks.REGRESSION:
+            return ['explained_variance', 'max_error', 'neg_mean_absolute_error', 'neg_mean_squared_error',
+                    'neg_root_mean_squared_error', 'neg_mean_squared_log_error', 'neg_median_absolute_error',
+                    'r2', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'neg_mean_absolute_percentage_error',
+                    'd2_absolute_error_score', 'd2_pinball_score', 'd2_tweedie_scor']
 
     @property
     def task(self):
@@ -97,6 +132,16 @@ def task(self):
         """
         return self.data.task if self.data else self.metaInfo['task']
 
+    @property
+    def nClasses(self):
+        """
+        The number of classes of the model, taken from the data set or deserialized from file if the model is loaded without data.
+
+        Returns:
+            int: number of classes of the model if the task is classification, otherwise 0
+        """
+        return self.data.nClasses if self.data else self.metaInfo['nClasses']
+
     @property
     def targetProperty(self):
         """
@@ -262,7 +307,8 @@ def save(self):
 
         self.metaInfo['name'] = self.name
         self.metaInfo['task'] = str(self.task)
-        self.metaInfo['th'] = self.data.th
+        self.metaInfo['th'] = self.data.th if self.data else self.metaInfo['th']
+        self.metaInfo['nClasses'] = self.nClasses
         self.metaInfo['target_property'] = self.targetProperty
         self.metaInfo['parameters_path'] = self.saveParams(self.parameters).replace(f"{self.baseDir}/", '')
         self.metaInfo['feature_calculator_path'] = self.saveDescriptorCalculator().replace(f"{self.baseDir}/", '') if self.featureCalculator else None
@@ -430,15 +476,18 @@ def predictMols(self, mols : List[str], use_probas : bool = False):
             predictions = self.predict(dataset)
             if (isclass(self.alg) and self.alg.__name__ == 'PLSRegression') or (type(self.alg).__name__ == 'PLSRegression'):
                 predictions = predictions[:, 0]
+            if self.task == ModelTasks.CLASSIFICATION:
+                predictions = predictions.astype(int)
         else:
             predictions = self.predictProba(dataset)
 
         if failed_indices:
-            predictions = list(predictions)
+            dim = 1 if len(predictions.shape) == 1 else predictions.shape[1]
             ret = []
+            predictions = list(predictions)
             for idx, pred in enumerate(mols):
                 if idx in failed_indices:
-                    ret.append(None)
+                    ret.append([np.nan] * dim if dim > 1 else np.nan)
                 else:
                     ret.append(predictions.pop(0))
             return np.array(ret)
@@ -465,3 +514,34 @@ def cleanFiles(self):
 
         if os.path.exists(self.outDir):
             shutil.rmtree(self.outDir)
+
+    def get_scoring_func(self, scoring):
+        """Get scoring function from sklearn.metrics.
+
+        Args:
+            scoring (Union[str, Callable]): metric name from sklearn.metrics or
+                user-defined scoring function.
+
+        Raises:
+            ValueError: If the scoring function is currently not supported by
+                GridSearch and BayesOptimization.
+
+        Returns:
+            score_func (Callable): scorer function from sklearn.metrics (`str` as input)
+            or user-defined function (`callable` as input)
+        """
+        if all([scoring not in self._supported_scoring, isinstance(scoring, str)]):
+            raise ValueError("Scoring function %s not supported. Supported scoring functions are: %s"
+                             % (scoring, self._supported_scoring))
+        elif callable(scoring):
+            return scoring
+        elif scoring is None:
+            if self.data.task == ModelTasks.REGRESSION:
+                scorer = metrics.get_scorer('explained_variance')
+            elif self.data.nClasses > 2:  # multiclass
+                return lambda y_true, y_pred : metrics.roc_auc_score(y_true, y_pred, multi_class='ovr', average='weighted')
+            else:
+                scorer = metrics.get_scorer('roc_auc')
+        else:
+            scorer = metrics.get_scorer(scoring)
+        return scorer._score_func
diff --git a/qsprpred/models/models.py b/qsprpred/models/models.py
@@ -35,10 +35,6 @@ class QSPRsklearn(QSPRModel):
     def __init__(self, base_dir: str, alg=None, data: QSPRDataset = None,
                  name: str = None, parameters: dict = None, autoload: bool = True):
         super().__init__(base_dir, alg, data, name, parameters, autoload)
-        # Adding scoring functions available for hyperparam optimization:
-        self._supported_scoring = [
-            'average_precision', 'neg_brier_score', 'neg_log_loss', 'roc_auc',
-            'roc_auc_ovo', 'roc_auc_ovo_weighted', 'roc_auc_ovr', 'roc_auc_ovr_weighted']
         # initialize models with defined parameters
         if self.data and (type(self.model) in [SVC, SVR]):
             logger.warning("parameter max_iter set to 10000 to avoid training getting stuck. \
@@ -195,13 +191,14 @@ def gridSearch(self, search_space_gs, scoring=None, n_jobs=1):
         self.model = self.model.set_params(**grid.best_params_)
         self.save()
 
-    def bayesOptimization(self, search_space_bs, n_trials, scoring=None, n_jobs=1):
+    def bayesOptimization(self, search_space_bs, n_trials, scoring=None, th=0.5, n_jobs=1):
         """Bayesian optimization of hyperparameters using optuna.
 
         Arguments:
             search_space_gs (dict): search space for the grid search
             n_trials (int): number of trials for bayes optimization
             scoring (Optional[str, Callable]): scoring function for the optimization.
+            th (float): threshold for scoring if `scoring in self._needs_discrete_to_score`.
             n_jobs (int): the number of parallel trials
 
         Example of search_space_bs for scikit-learn's MLPClassifier:
@@ -229,7 +226,7 @@ def bayesOptimization(self, search_space_bs, n_trials, scoring=None, n_jobs=1):
 
         study = optuna.create_study(direction='maximize')
         logger.info('Bayesian optimization started: %s' % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
-        study.optimize(lambda trial: self.objective(trial, scoring, search_space_bs), n_trials, n_jobs=n_jobs)
+        study.optimize(lambda trial: self.objective(trial, scoring, th, search_space_bs), n_trials, n_jobs=n_jobs)
         logger.info('Bayesian optimization ended: %s' % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
 
         trial = study.best_trial
@@ -239,12 +236,13 @@ def bayesOptimization(self, search_space_bs, n_trials, scoring=None, n_jobs=1):
         self.model = self.model.set_params(**trial.params)
         self.save()
 
-    def objective(self, trial, scoring, search_space_bs):
+    def objective(self, trial, scoring, th, search_space_bs):
         """Objective for bayesian optimization.
 
         Arguments:
             trial (int): current trial number
             scoring (Optional[str]): scoring function for the objective.
+            th (float): threshold for scoring if `scoring in self._needs_discrete_to_score`.
             search_space_bs (dict): search space for bayes optimization
         """
         bayesian_params = {}
@@ -267,49 +265,12 @@ def objective(self, trial, scoring, search_space_bs):
         self.model.set_params(**bayesian_params)
 
         y, y_ind = self.data.getTargetProperties()
+        if scoring in self._needs_discrete_to_score:
+            y = np.where(y > th, 1, 0)
         score_func = self.get_scoring_func(scoring)
-        try:
-            score = score_func(y, self.evaluate(save=False))
-        except ValueError:
-            logger.exception(
-                "Only one class present in y_true. ROC AUC score is not defined in that case. Score set to -1.")
-            score = -1
+        score = score_func(y, self.evaluate(save=False))
         return score
 
-    def get_scoring_func(self, scoring):
-        """Get scoring function from sklearn.metrics.
-
-        Args:
-            scoring (Union[str, Callable]): metric name from sklearn.metrics or
-                user-defined scoring function.
-
-        Raises:
-            ValueError: If the scoring function is currently not supported by
-                GridSearch and BayesOptimization.
-
-        Returns:
-            score_func (Callable): scorer function from sklearn.metrics (`str` as input)
-            or user-defined function (`callable` as input)
-        """
-        # TODO: to add support for more scoring functions we will need to ensure that
-        # the cross validation returns the correct input for the scoring function.
-        # It's possible to inspect that by calling `str(scorer)` and checking the attributes.
-        if all([scoring not in self._supported_scoring, isinstance(scoring, str)]):
-            raise ValueError("Scoring function %s not supported. Supported scoring functions are: %s"
-                             % (scoring, self._supported_scoring))
-        elif callable(scoring):
-            return scoring
-        elif scoring is None:
-            if self.data.task == ModelTasks.REGRESSION:
-                scorer = metrics.get_scorer('explained_variance')
-            elif self.data.nClasses > 2:  # multiclass
-                scorer = metrics.get_scorer('roc_auc_ovr_weighted')
-            else:
-                scorer = metrics.get_scorer('roc_auc')
-        else:
-            scorer = metrics.get_scorer(scoring)
-        return scorer._score_func
-
     def loadModel(self, alg: Union[Type, BaseEstimator] = None, params: dict = None):
         if alg is not None and isinstance(alg, BaseEstimator):
             if params:
@@ -396,9 +357,6 @@ def __init__(self,
         self.optimal_epochs = -1
         self.n_class = max(1, self.data.nClasses) if self.data else self.metaInfo['n_class']
         self.n_dim = self.data.X.shape[1] if self.data else self.metaInfo['n_dim']
-        self._supported_scoring = [
-            'average_precision', 'neg_brier_score', 'neg_log_loss', 'roc_auc',
-            'roc_auc_ovo', 'roc_auc_ovo_weighted', 'roc_auc_ovr', 'roc_auc_ovr_weighted']
         self.patience = patience
         self.tol = tol
 
@@ -562,7 +520,7 @@ def evaluate(self, save=True, ES_val_size=0.1):
         else:
             return cvs
 
-    def gridSearch(self, search_space_gs, scoring=None, ES_val_size=0.1):
+    def gridSearch(self, search_space_gs, scoring=None, th=0.5, ES_val_size=0.1):
         """Optimization of hyperparameters using gridSearch.
 
         Arguments:
@@ -574,6 +532,7 @@ def gridSearch(self, search_space_gs, scoring=None, ES_val_size=0.1):
                 neurons_hx (int) ~ number of neurons in other hidden layers
                 extra_layer (bool) ~ whether to add extra (3rd) hidden layer
             scoring (Optional[str, Callable]): scoring function for the grid search.
+            th (float): threshold for scoring if `scoring in self._needs_discrete_to_score`.
             ES_val_size (float): validation set size for early stopping in CV
         """
         self.model = self.loadModel(self.alg)
@@ -608,6 +567,8 @@ def gridSearch(self, search_space_gs, scoring=None, ES_val_size=0.1):
                 y_pred = self.model.predict(valid_loader)
                 if self.data.nClasses == 2:
                     y_pred = y_pred[:, 1]
+                if scoring in self._needs_discrete_to_score:
+                    y = np.where(y > th, 1, 0)
                 fold_scores.append(score_func(y_test, y_pred))
             os.remove('%s_temp.log' % self.outPrefix)
             param_score = np.mean(fold_scores)
@@ -622,13 +583,14 @@ def gridSearch(self, search_space_gs, scoring=None, ES_val_size=0.1):
         self.model.set_params(**self.parameters)
         self.save()
 
-    def bayesOptimization(self, search_space_bs, n_trials, scoring=None, n_jobs=1):
+    def bayesOptimization(self, search_space_bs, n_trials, scoring=None, th=0.5, n_jobs=1):
         """Bayesian optimization of hyperparameters using optuna.
 
-        arguments:
+        Arguments:
             search_space_gs (dict): search space for the grid search
             n_trials (int): number of trials for bayes optimization
             scoring (Optional[str, Callable]): scoring function for the optimization.
+            th (float): threshold for scoring if `scoring in self._needs_discrete_to_score`.
             n_jobs (int): the number of parallel trials
         """
         print('Bayesian optimization can take a while for some hyperparameter combinations')
@@ -642,7 +604,7 @@ def bayesOptimization(self, search_space_bs, n_trials, scoring=None, n_jobs=1):
 
         study = optuna.create_study(direction='maximize')
         logger.info('Bayesian optimization started: %s' % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
-        study.optimize(lambda trial: self.objective(trial, scoring, search_space_bs), n_trials, n_jobs=n_jobs)
+        study.optimize(lambda trial: self.objective(trial, scoring, th, search_space_bs), n_trials, n_jobs=n_jobs)
         logger.info('Bayesian optimization ended: %s' % datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
 
         trial = study.best_trial
@@ -653,11 +615,13 @@ def bayesOptimization(self, search_space_bs, n_trials, scoring=None, n_jobs=1):
         self.model.set_params(**self.parameters)
         self.save()
 
-    def objective(self, trial, scoring, search_space_bs):
+    def objective(self, trial, scoring, th, search_space_bs):
         """Objective for bayesian optimization.
 
-        arguments:
+        Arguments:
             trial (int): current trial number
+            scoring (Optional[str]): scoring function for the objective.
+            th (float): threshold for scoring if `scoring in self._needs_discrete_to_score`.
             search_space_bs (dict): search space for bayes optimization
         """
         bayesian_params = {}
@@ -679,13 +643,10 @@ def objective(self, trial, scoring, search_space_bs):
         self.model.set_params(**bayesian_params)
 
         y, y_ind = self.data.getTargetProperties()
+        if scoring in self._needs_discrete_to_score:
+            y = np.where(y > th, 1, 0)
         score_func = self.get_scoring_func(scoring)
-        try:
-            score = score_func(y, self.evaluate(save=False))
-        except ValueError:
-            logger.exception(
-                "Only one class present in y_true. ROC AUC score is not defined in that case. Score set to -1.")
-            score = -1
+        score = score_func(y, self.evaluate(save=False))
         return score
 
     def saveModel(self) -> str:
@@ -713,36 +674,3 @@ def predictProba(self, X: Union[pd.DataFrame, np.ndarray, QSPRDataset]):
 
         loader = self.model.get_dataloader(X)
         return self.model.predict(loader)
-
-    def get_scoring_func(self, scoring):
-        """Get scoring function from sklearn.metrics.
-
-        Args:
-            scoring (Union[str, Callable]): metric name from sklearn.metrics or
-                user-defined scoring function.
-
-        Raises:
-            ValueError: If the scoring function is currently not supported by
-                GridSearch and BayesOptimization.
-
-        Returns:
-            score_func (Callable): scorer function from sklearn.metrics (`str` as input)
-            or user-defined function (`callable` as input)
-        """
-        if all([scoring not in self._supported_scoring, isinstance(scoring, str)]):
-            raise ValueError("Scoring function %s not supported. Supported scoring functions are: %s"
-                             % (scoring, self._supported_scoring))
-        elif callable(scoring):
-            return scoring
-        elif scoring is None:
-            if self.data.task == ModelTasks.REGRESSION:
-                scorer = metrics.get_scorer('explained_variance')
-            elif self.data.nClasses > 2:  # multiclass
-                # Calling metrics.get_scorer('roc_auc_ovr_weighted') in this context
-                # raises the error `multi_class must be in ('ovo', 'ovr')` so let's avoid it
-                scorer = metrics.get_scorer('roc_auc_ovr_weighted')
-            else:
-                scorer = metrics.get_scorer('roc_auc')
-        else:
-            scorer = metrics.get_scorer(scoring)
-        return scorer._score_func