Merge pull request #35 from UBC-MDS/susannah_compare_f1

ensured consistency across compare_f1 and ensemble_compare_f1 and tes…
UBC-MDS · Jan 18, 2025 · eab5292 · eab5292
2 parents c0e4b6c + 47823bc
commit eab5292
Show file tree

Hide file tree

Showing 7 changed files with 143 additions and 198 deletions.
diff --git a/src/compare_classifiers/compare_f1.py b/src/compare_classifiers/compare_f1.py
@@ -1,8 +1,11 @@
+from compare_classifiers.error_handling.check_valid_estimators import check_valid_estimators
+from compare_classifiers.error_handling.check_valid_X import check_valid_X
+from compare_classifiers.error_handling.check_valid_y import check_valid_y
+
 import pandas as pd
-from sklearn.model_selection import cross_val_score
-import time
+from sklearn.model_selection import cross_validate
 
-def compare_f1(estimators, X, y):
+def compare_f1(estimators, X_train, y_train):
     """
     Show cross validation results, including fit time and f1 scores for each estimator.
 
@@ -32,34 +35,29 @@ def compare_f1(estimators, X, y):
     >>> compare_f1(estimators, X, y)
     """
 
+    # Check if estimators is valid or raise errors
+    check_valid_estimators(estimators, 'first')
+
+    # Check if X_train is valid or raise errors
+    check_valid_X(X_train, 'second')
+
+    # Check if y_train is valid or raise errors
+    check_valid_y(y_train, 'third')
+
+    labels = [e[0] for e in estimators]
+    classifiers = [e[1] for e in estimators]
+
     results = []
+    for cls in classifiers:
+        cv_results = cross_validate(cls, X_train, y_train, cv=5, scoring='f1_macro', return_train_score=True)
 
-    for name, estimator in estimators:
-        try:
-            start_time = time.time()
-
-            cv_results = cross_val_score(estimator, X, y, scoring='f1', cv=5)
-            fit_time = time.time() - start_time
-
-            test_score = cv_results.mean()
-
-            estimator.fit(X, y)
-            train_score = cross_val_score(estimator, X, y, scoring='f1', cv=5).mean()
-
-            results.append({
-                'Estimator': name,
-                'Fit Time': fit_time,
-                'Test Score (F1)': test_score,
-                'Train Score (F1)': train_score
-            })
-
-        except Exception as e:
-            print(f"Error with estimator {name}: {e}")
-            results.append({
-                'Estimator': name,
-                'Fit Time': None,
-                'Test Score (F1)': None,
-                'Train Score (F1)': None
-            })
+        results_df = pd.DataFrame({
+            'model': labels[classifiers.index(cls)],
+            'fit_time': cv_results['fit_time'].mean(),
+            'test_f1_score': cv_results['test_score'].mean(),
+            'train_f1_score': cv_results['train_score'].mean()
+        }, index=[0])
+
+        results.append(results_df)
 
-    return pd.DataFrame(results)
+    return pd.concat(results, ignore_index=True)
diff --git a/src/compare_classifiers/confusion_matrices.py b/src/compare_classifiers/confusion_matrices.py
@@ -29,8 +29,11 @@ def confusion_matrices(estimators, X_train, X_test, y_train, y_test):
     
     Returns:
     --------
-    None
-        Displays confusion matrices for each estimator using the provided training data.
+    fig : matplotlib.figure.Figure
+        The figure object containing all the subplots (axes) for the confusion matrices. This object manages the layout and rendering of the entire plot.
+    
+    axes : numpy.ndarray or list of matplotlib.axes.Axes
+        A 2D array (or list) of axes objects where the confusion matrices are plotted. Each element represents an individual subplot (axis) within the grid.
 
     Example:
     --------
@@ -57,11 +60,13 @@ def confusion_matrices(estimators, X_train, X_test, y_train, y_test):
 
     labels = [e[0] for e in estimators]
     classifiers = [e[1] for e in estimators]
+
+    # Fit each estimator
     for cls in classifiers:
         cls.fit(X_train, y_train)
 
+    # Plot confusion matrices in a single column
     fig, axes = plt.subplots(nrows=len(classifiers), ncols=1, figsize=(5*len(classifiers),5*len(classifiers)))
-
     for cls, ax in zip(classifiers, axes.flatten()):
         ConfusionMatrixDisplay(
             confusion_matrix=confusion_matrix(y_test[:50], cls.predict(X_test)[:50], labels=cls.classes_), 

diff --git a/tests/test_compare_f1.py b/tests/test_compare_f1.py
@@ -1,15 +1,14 @@
-import sys
-import os
+from compare_classifiers.compare_f1 import compare_f1
+
 import pandas as pd
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src/compare_classifiers')))
-from compare_f1 import compare_f1
-import pytest
+
 from sklearn.datasets import make_classification
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.svm import LinearSVC
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.exceptions import NotFittedError
+
+import pytest
 
 
 @pytest.fixture
@@ -26,7 +25,7 @@ def estimators():
         ('svm', make_pipeline(StandardScaler(), LinearSVC(random_state=42)))
     ]
 
-def test_compare_f1_basic(synthetic_data, estimators):
+def test_compare_f1(synthetic_data, estimators):
     X, y = synthetic_data
 
     # Test the function with valid input (estimators and dataset)
@@ -36,78 +35,16 @@ def test_compare_f1_basic(synthetic_data, estimators):
     assert isinstance(result, pd.DataFrame)
 
     # Check that the DataFrame has the correct columns
-    assert set(result.columns) == {'Estimator', 'Fit Time', 'Test Score (F1)', 'Train Score (F1)'}
+    assert set(result.columns) == {'model', 'fit_time', 'test_f1_score', 'train_f1_score'}
 
     # Check that each row corresponds to an estimator
-    assert len(result) == len(estimators)
+    assert result.shape[0] == len(estimators)
 
     # Ensure that all rows have non-null values for Fit Time, Test Score, and Train Score
     for index, row in result.iterrows():
-        assert row['Estimator'] in ['rf', 'svm']
-        assert row['Fit Time'] is not None
-        assert row['Test Score (F1)'] is not None
-        assert row['Train Score (F1)'] is not None
-
-def test_compare_f1_with_invalid_estimator(synthetic_data):
-    X, y = synthetic_data
-
-    # Test with an invalid estimator that raises an error
-    invalid_estimators = [
-        ('invalid', None)  # Invalid estimator (None)
-    ]
-
-    result = compare_f1(invalid_estimators, X, y)
-
-    # Check that the result has None for this invalid estimator
-    assert len(result) == 1
-    assert result['Estimator'][0] == 'invalid'
-    assert result['Fit Time'][0] is None
-    assert result['Test Score (F1)'][0] is None
-    assert result['Train Score (F1)'][0] is None
-
-def test_compare_f1_with_no_estimators(synthetic_data):
-    X, y = synthetic_data
-
-    # Test with an empty estimator list
-    result = compare_f1([], X, y)
-
-    # Check that the result is an empty DataFrame
-    assert result.empty
-
-def test_compare_f1_with_empty_data(synthetic_data):
-    X, y = synthetic_data
-
-    # Test with empty dataset (X or y)
-    result = compare_f1([('rf', RandomForestClassifier(n_estimators=10, random_state=42))], [], y)
-
-    # The function should handle empty X gracefully
-    assert result is not None
-    assert len(result) == 1
-    assert result['Estimator'][0] == 'rf'
-    assert result['Fit Time'][0] is None
-    assert result['Test Score (F1)'][0] is None
-    assert result['Train Score (F1)'][0] is None
-
-def test_compare_f1_with_unfitted_estimator(synthetic_data):
-    X, y = synthetic_data
-
-    # Test with an estimator that does not support fitting in the usual way (e.g., LinearSVC without scaling)
-    unfitted_estimators = [
-        ('svm_unfitted', LinearSVC(random_state=42))  # Not using a pipeline with StandardScaler
-    ]
-
-    result = compare_f1(unfitted_estimators, X, y)
-
-    # The result should be calculated even if it's not fitted correctly yet
-    assert len(result) == 1
-    assert result['Estimator'][0] == 'svm_unfitted'
-    assert result['Fit Time'][0] is not None
-    assert result['Test Score (F1)'][0] is not None
-    assert result['Train Score (F1)'][0] is not None
-
-def test_compare_f1_with_no_data():
-    # Test with no data (empty input)
-    result = compare_f1([], [], [])
-
-    # The result should be an empty DataFrame
-    assert result.empty
+        assert row['model'] in ['rf', 'svm']
+        assert row['fit_time'] is not None
+        assert row['test_f1_score'] is not None
+        assert row['train_f1_score'] is not None
+        assert 0 <= row['test_f1_score'] <= 1  # Verify the range of test_f1_score
+        assert 0 <= row['train_f1_score'] <= 1  # Verify the range of train_f1_score
diff --git a/tests/test_confusion_matrices.py b/tests/test_confusion_matrices.py
@@ -17,28 +17,12 @@
 
 model_dict = models()
 knn5 = model_dict['knn5']
-knn5_and_mnb = [
-    ('knn5', knn5),
-    ('mnp', model_dict['mnp'])
-]
-two_pipes = [
-    ('pipe_rf', model_dict['pipe_rf']),
-    ('pipe_svm', model_dict['pipe_svm'])
-]
-multi_ind = [
-    ('logreg', model_dict['logreg']),
-    ('gb', model_dict['gb']),
-    ('svm', model_dict['svm']),
-    ('rf', model_dict['rf']),
-    ('knn5', knn5)
-]
-multi_pipe = [
-    ('pipe_svm', model_dict['pipe_svm']),
-    ('pipe_rf', model_dict['pipe_rf']),
-    ('pipe_knn5', model_dict['pipe_knn5']),
-    ('pipe_gb', model_dict['pipe_gb']),
-    ('pipe_mnp', model_dict['pipe_mnp'])
-]
+knn5 = model_dict['knn5']
+knn5_and_mnb = model_dict['knn5_and_mnb']
+two_pipes = model_dict['two_pipes']
+multi_ind = model_dict['multi_ind']
+multi_pipe = model_dict['multi_pipe']
+
 
 def test_individual_success():
     """When estimators is a list of individual Classifiers, returns the plot containing one confusion matrix for each estimator."""

diff --git a/tests/test_data.py b/tests/test_data.py
@@ -46,7 +46,7 @@ def models():
     """Create models as estimators for function tests.
     Note: Please use individual classifiers with X_train_ss and X_test_ss and pipeline with X_train and X_test_rs"""
 
-    # valid data
+    # create valid classifiers
     rf = RandomForestClassifier(n_estimators=10, random_state=seed)
     svm = SVC(kernel='rbf', decision_function_shape='ovr', random_state=seed)
     logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=seed)
@@ -59,9 +59,33 @@ def models():
     pipe_gb = make_pipeline(RobustScaler(), gb)
     pipe_mnp = make_pipeline(RobustScaler(), mnp)
 
-    # invalid data
+    # create lists of valid estimators
+    knn5_and_mnb = [
+    ('knn5', knn5),
+    ('mnp', mnp)
+    ]
+    two_pipes = [
+        ('pipe_rf', pipe_rf),
+        ('pipe_svm', pipe_svm)
+    ]
+    multi_ind = [
+        ('logreg', logreg),
+        ('gb', gb),
+        ('svm', svm),
+        ('rf', rf),
+        ('knn5', knn5)
+    ]
+    multi_pipe = [
+        ('pipe_svm', pipe_svm),
+        ('pipe_rf', pipe_rf),
+        ('pipe_knn5', pipe_knn5),
+        ('pipe_gb', pipe_gb),
+        ('pipe_mnp', pipe_mnp)
+    ]
+
+    # create invalid estimators
     rfr = RandomForestRegressor()
     pipe_regressor = make_pipeline(RobustScaler(), rfr)
 
-    return {'rf': rf, 'svm': svm, 'logreg': logreg, 'gb': gb, 'knn5': knn5, 'mnp': mnp, 'pipe_svm': pipe_svm, 'pipe_rf': pipe_rf, 'pipe_knn5': pipe_knn5, 'pipe_gb': pipe_gb, 'pipe_mnp': pipe_mnp, 'rfr': rfr, 'pipe_regressor': pipe_regressor}
+    return {'knn5': knn5, 'knn5_and_mnb': knn5_and_mnb, 'two_pipes': two_pipes, 'multi_ind': multi_ind, 'multi_pipe': multi_pipe, 'rfr': rfr, 'pipe_regressor': pipe_regressor}