From ff41bacd0d2f43961bf4ad290f4816892cedd6ff Mon Sep 17 00:00:00 2001
From: Bryan Lee <blee9000@student.ubc.ca>
Date: Sat, 11 Jan 2025 01:32:14 -0800
Subject: [PATCH 1/4] adding scoring_df docstring

---
 .../compare_classifiers/scoring_df.py         | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 src/compare_classifiers/compare_classifiers/scoring_df.py

diff --git a/src/compare_classifiers/compare_classifiers/scoring_df.py b/src/compare_classifiers/compare_classifiers/scoring_df.py
new file mode 100644
index 0000000..3a3b289
--- /dev/null
+++ b/src/compare_classifiers/compare_classifiers/scoring_df.py
@@ -0,0 +1,24 @@
+def scoring_df(pipeline, X, y):
+    """
+    Evaluates the performance and timing of a scikit-learn pipeline. For each model
+    in the pipeline, the following metrics are calculated:
+    - F1 Score
+    - Accuracy
+    - Precision
+    - Recall
+
+    Parameters
+    ----------
+    - pipeline (Pipeline): A scikit-learn pipeline containing one or more models or transformers.
+    - X (array-like or DataFrame): Feature matrix for training and testing.
+    - y (array-like or DataFrame): Target vector for training and testing.
+
+    Returns:
+    - pd.DataFrame: A DataFrame containing performance metrics (F1 Score, Accuracy, Precision, Recall).
+    >>> pipeline = Pipeline([
+    >>>     ('scaler', StandardScaler()), 
+    >>>     ('svc', SVC(kernel='linear', random_state=42)),
+    >>>     ('random_forest',RandomForestRandomForestClassifier(n_estimators=100))
+    >>> ])
+    >>> scoring_df(pipeline, X_train, y_train)
+    """
\ No newline at end of file

From 5d03d32739a2717e510628f01c01de980a3cdaf0 Mon Sep 17 00:00:00 2001
From: Bryan Lee <blee9000@student.ubc.ca>
Date: Sat, 11 Jan 2025 01:51:50 -0800
Subject: [PATCH 2/4] contributing md fixed

---
 CONTRIBUTING.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 91e73be..a6994f1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -46,13 +46,15 @@ Ready to contribute? Here's how to set up `compare_classifiers` for local develo
 2. Install `compare_classifiers` using `poetry`:
 
     ```console
-    $ poetry install
+    $ poetry install compare_classifiers
+
     ```
 
 3. Use `git` (or similar) to create a branch for local development and make your changes:
 
     ```console
     $ git checkout -b name-of-your-bugfix-or-feature
+
     ```
 
 4. When you're done making changes, check that your changes conform to any code formatting requirements and pass any tests.

From 028904d090c3ce0b67d9fd50775c46bea57bbf75 Mon Sep 17 00:00:00 2001
From: Bryan Lee <blee9000@student.ubc.ca>
Date: Sat, 11 Jan 2025 15:12:32 -0800
Subject: [PATCH 3/4] making changes to docstring as recommended

---
 .../compare_classifiers/compare_f1.py         | 39 +++++++++++++++++++
 .../compare_classifiers/scoring_df.py         | 24 ------------
 2 files changed, 39 insertions(+), 24 deletions(-)
 create mode 100644 src/compare_classifiers/compare_classifiers/compare_f1.py
 delete mode 100644 src/compare_classifiers/compare_classifiers/scoring_df.py

diff --git a/src/compare_classifiers/compare_classifiers/compare_f1.py b/src/compare_classifiers/compare_classifiers/compare_f1.py
new file mode 100644
index 0000000..d8b3a54
--- /dev/null
+++ b/src/compare_classifiers/compare_classifiers/compare_f1.py
@@ -0,0 +1,39 @@
+def compare_f1(estimators, X, y):
+    """
+    Evaluates the performance and timing of a scikit-learn pipeline. 
+    For each model in the list, the following metrics are calculated:
+    - F1 Score
+    - Accuracy
+    - Precision
+    - Recall
+
+    Parameters
+    ----------
+    - estimators : list or pandas series
+        A scikit-learn pipeline containing one or more models or transformers.
+    
+    - X : Pandas Data frame 
+        Feature matrix for training and testing.
+
+    - y : list or pandas series 
+        Target vector for training and testing.
+
+    Returns:
+    --------
+    - pandas DataFrame 
+        A DataFrame containing performance metrics (F1 Score, Accuracy, 
+        Precision, Recall).
+
+    Example:
+    -------- 
+    >>> models = [('lr', LogisticRegression()), ('rf', RandomForestClassifier())]
+    ... # X_train = ... # feature matrix for training
+    ... # y_train = ... # target vector for training
+    >>> compare_f1(pipeline, X_train, y_train)
+    """
+
+# Example usage:
+# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # generating training and testing split
+# estimators = [('lr', LogisticRegression()), ('rf', RandomForestClassifier())] # generating list of models to score
+# result = compare_f1(estimators, X_train, y_train)
+# print(result)
\ No newline at end of file
diff --git a/src/compare_classifiers/compare_classifiers/scoring_df.py b/src/compare_classifiers/compare_classifiers/scoring_df.py
deleted file mode 100644
index 3a3b289..0000000
--- a/src/compare_classifiers/compare_classifiers/scoring_df.py
+++ /dev/null
@@ -1,24 +0,0 @@
-def scoring_df(pipeline, X, y):
-    """
-    Evaluates the performance and timing of a scikit-learn pipeline. For each model
-    in the pipeline, the following metrics are calculated:
-    - F1 Score
-    - Accuracy
-    - Precision
-    - Recall
-
-    Parameters
-    ----------
-    - pipeline (Pipeline): A scikit-learn pipeline containing one or more models or transformers.
-    - X (array-like or DataFrame): Feature matrix for training and testing.
-    - y (array-like or DataFrame): Target vector for training and testing.
-
-    Returns:
-    - pd.DataFrame: A DataFrame containing performance metrics (F1 Score, Accuracy, Precision, Recall).
-    >>> pipeline = Pipeline([
-    >>>     ('scaler', StandardScaler()), 
-    >>>     ('svc', SVC(kernel='linear', random_state=42)),
-    >>>     ('random_forest',RandomForestRandomForestClassifier(n_estimators=100))
-    >>> ])
-    >>> scoring_df(pipeline, X_train, y_train)
-    """
\ No newline at end of file

From 75c6ad455d34c40e0d9ff188090e6a97518ef1c1 Mon Sep 17 00:00:00 2001
From: Susannah Sun <thisis.susannahsun@gmail.com>
Date: Sat, 11 Jan 2025 16:18:15 -0800
Subject: [PATCH 4/4] make wording consistent across all function docstrings

---
 .../compare_classifiers/compare_f1.py         | 43 ++++++++-----------
 .../ensemble_compare_f1.py                    |  2 +-
 2 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/src/compare_classifiers/compare_classifiers/compare_f1.py b/src/compare_classifiers/compare_classifiers/compare_f1.py
index d8b3a54..f4e2e67 100644
--- a/src/compare_classifiers/compare_classifiers/compare_f1.py
+++ b/src/compare_classifiers/compare_classifiers/compare_f1.py
@@ -1,39 +1,30 @@
 def compare_f1(estimators, X, y):
     """
-    Evaluates the performance and timing of a scikit-learn pipeline. 
-    For each model in the list, the following metrics are calculated:
-    - F1 Score
-    - Accuracy
-    - Precision
-    - Recall
+    Show cross validation results, including fit time and f1 scores for each estimator.
 
     Parameters
     ----------
-    - estimators : list or pandas series
-        A scikit-learn pipeline containing one or more models or transformers.
+    estimators : list of tuples
+        A list of (name, estimator) tuples, consisting of individual estimators to be processed through the voting or stacking classifying ensemble. Each tuple contains a string: name/label of estimator, and a model: the estimator, which implements
+        the scikit-learn API (`fit`, `predict`, etc.).
     
-    - X : Pandas Data frame 
-        Feature matrix for training and testing.
-
-    - y : list or pandas series 
-        Target vector for training and testing.
+    X_train : Pandas data frame
+        Data frame containing training data along with n features.
+        
+    y_train : Pandas series
+        Target class labels for data in X_train.
 
     Returns:
     --------
-    - pandas DataFrame 
-        A DataFrame containing performance metrics (F1 Score, Accuracy, 
-        Precision, Recall).
+    Pandas data frame
+        A data frame showing cross validation results on training data, with 3 columns: fit_time, test_score, train_score and 1 rows for each estimator.
 
     Example:
     -------- 
-    >>> models = [('lr', LogisticRegression()), ('rf', RandomForestClassifier())]
-    ... # X_train = ... # feature matrix for training
-    ... # y_train = ... # target vector for training
-    >>> compare_f1(pipeline, X_train, y_train)
+    >>> estimators = [
+    ...     ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
+    ...     ('svm', make_pipeline(StandardScaler(), LinearSVC(random_state=42)))
+    ... ]
+    >>> compare_f1(estimators, X, y)
     """
-
-# Example usage:
-# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # generating training and testing split
-# estimators = [('lr', LogisticRegression()), ('rf', RandomForestClassifier())] # generating list of models to score
-# result = compare_f1(estimators, X_train, y_train)
-# print(result)
\ No newline at end of file
+    pass
\ No newline at end of file
diff --git a/src/compare_classifiers/compare_classifiers/ensemble_compare_f1.py b/src/compare_classifiers/compare_classifiers/ensemble_compare_f1.py
index 761b050..ab1cbea 100644
--- a/src/compare_classifiers/compare_classifiers/ensemble_compare_f1.py
+++ b/src/compare_classifiers/compare_classifiers/ensemble_compare_f1.py
@@ -25,7 +25,7 @@ def ensemble_compare_f1(estimators, X_train, y_train):
     ...     ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
     ...     ('svm', make_pipeline(StandardScaler(), LinearSVC(random_state=42)))
     ... ]
-    ensemble_compare_f1(estimators, X, y)
+    >>> ensemble_compare_f1(estimators, X, y)
     """
     # ...existing code...