Merge pull request #77 from UBC-MDS/test_summary_doc

Test summary docs and readme updated
UBC-MDS · Feb 2, 2025 · 902db6c · 902db6c
2 parents bc43463 + 90e2795
commit 902db6c
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -20,10 +20,10 @@ $ pip install tidylinreg
 
 The `tidylinreg` package fits a linear model to a dataset, and can be used to carry out regression. 
 `tidylinreg` computes and returns a list of summary statistics of the fitted linear model, including standard error, confidence intervals, and p-values.
-These summary statistics are ouput as a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html). This is advantageous as it allows for fast and convenient manipulation of large regression models,
+These summary statistics are output as a [Pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html). This is advantageous as it allows for fast and convenient manipulation of large regression models,
 where, for example, insignificant parameters can easily be filtered out!
 
-## Functions
+## FunctionsF
 
 `tidylinreg` is built around the `LinearModel` object, which offers three useful methods:
 
@@ -32,10 +32,10 @@ where, for example, insignificant parameters can easily be filtered out!
     the object must be fitted to the data before anything else!
     - Please be advised that at the current state of development, `fit` only accepts continuous regressors. If your data is categorical,
     first transforming into dummy variables with encoding techniques, such as [One-Hot Encoding](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html)
-    - Watch out for collinearity! `tidlinreg` will let you know if there is any linear dependence in your data
+    - Watch out for collinearity! `tidylinreg` will let you know if there is any linear dependence in your data
     before fitting.
     provided by Scikit-Learn.
-    - For convenience, the intercept is automatically included into the regression model. No need to modify your data to accomodate this!
+    - For convenience, the intercept is automatically included into the regression model. No need to modify your data to accommodate this!
 - `predict`:
     - Predict the response using given test regressor data. Remember to fit the model first!
 - `summary`:
@@ -87,7 +87,7 @@ Once `tidylinreg` is installed, you can import the `LinearModel` object to begin
     my_linear_model.summary(ci=True)
     ```
 
-    The default significance level is 0.05, giving 95% confidence intervals. We can change this by modifying the `alpha` arguument. For example, if we want wider 99% confidence intervals, we can set `alpha` to 0.01:
+    The default significance level is 0.05, giving 95% confidence intervals. We can change this by modifying the `alpha` argument. For example, if we want wider 99% confidence intervals, we can set `alpha` to 0.01:
 
     ```python
     my_linear_model.summary(ci=True, alpha=0.01)

diff --git a/tests/test_summary.py b/tests/test_summary.py
@@ -4,83 +4,64 @@
 import statsmodels.api as sm
 from tidylinreg.tidylinreg import LinearModel
 
-
 @pytest.fixture
 def linear_model():
-    """Fixture for initializing a LinearModel instance."""
+    """Return a LinearModel instance."""
     return LinearModel()
 
-
 @pytest.fixture
 def example_data():
-    """Fixture for creating example data."""
+    """Generate sample data with a linear relationship and noise."""
     np.random.seed(524)
     X = pd.DataFrame({'x': [-2, -1, 0, 1, 2]})
     y = 3 * X.squeeze() + 2 + np.random.normal(0, 0.5, X.shape[0])
     return X, y
 
-
 def test_summary_no_fit(linear_model):
-    """Test that summary raises an error if the model is not fitted."""
+    """Ensure summary() raises an error if model is not fitted."""
     with pytest.raises(ValueError):
         linear_model.summary()
 
-
 def test_summary_basic(linear_model, example_data):
-    """Test the basic functionality of the summary method."""
+    """Verify summary() returns a DataFrame with expected columns."""
     X, y = example_data
     linear_model.fit(X, y)
     summary_df = linear_model.summary()
 
-    assert isinstance(summary_df, pd.DataFrame), "Summary output should be a DataFrame."
-    assert all(
-        col in summary_df.columns
-        for col in ["Parameter", "Estimate", "Std. Error", "T-Statistic", "P-Value"]
-    ), "Summary should contain the expected columns."
-
+    assert isinstance(summary_df, pd.DataFrame)
+    assert all(col in summary_df.columns for col in 
+               ["Parameter", "Estimate", "Std. Error", "T-Statistic", "P-Value"])
 
 def test_summary_with_ci(linear_model, example_data):
-    """Test summary output when confidence intervals are requested."""
+    """Ensure summary() includes confidence intervals when requested."""
     X, y = example_data
     linear_model.fit(X, y)
     summary_df = linear_model.summary(ci=True, alpha=0.05)
 
-    assert isinstance(summary_df, pd.DataFrame), "Summary output should be a DataFrame."
-    assert all(
-        col in summary_df.columns
-        for col in ["Parameter", "Estimate", "Std. Error", "T-Statistic", "P-Value", "CI Lower", "CI Upper"]
-    ), "Summary should include confidence intervals when requested."
-
+    assert isinstance(summary_df, pd.DataFrame)
+    assert all(col in summary_df.columns for col in 
+               ["Parameter", "Estimate", "Std. Error", "T-Statistic", "P-Value", "CI Lower", "CI Upper"])
+
 def test_summary_output_without_ci(example_data):
-    """
-    Test if summary outputs the correct DataFrame structure without confidence intervals.
-    """
+    """Check that summary() excludes confidence intervals when ci=False."""
     X, y = example_data
     model = LinearModel()
     model.fit(X, y)
     model.get_std_error()
     model.get_test_statistic()
     summary = model.summary(ci=False)
 
-    assert isinstance(summary, pd.DataFrame), "Summary should return a DataFrame."
-
-    assert "Parameter" in summary.columns, "Summary should include 'Parameter' column."
-    assert "Estimate" in summary.columns, "Summary should include 'Estimate' column."
-    assert "Std. Error" in summary.columns, "Summary should include 'Std. Error' column."
-    assert "T-Statistic" in summary.columns, "Summary should include 't-value' column."
-    assert "P-Value" in summary.columns, "Summary should include 'p-value' column."
-    assert "CI Lower" not in summary.columns, "Confidence intervals should not be included when ci=False."
-    assert "CI Upper" not in summary.columns, "Confidence intervals should not be included when ci=False."
-
-
+    assert isinstance(summary, pd.DataFrame)
+    assert "CI Lower" not in summary.columns
+    assert "CI Upper" not in summary.columns
 
 def test_summary_invalid_alpha(linear_model, example_data):
-    """Test that an error is raised for invalid alpha values."""
+    """Ensure summary() raises errors for invalid alpha values."""
     X, y = example_data
     linear_model.fit(X, y)
- 
+
     with pytest.raises(TypeError):
-        linear_model.summary(ci=True, alpha="hello")    
+        linear_model.summary(ci=True, alpha="hello")
     with pytest.raises(ValueError):
         linear_model.summary(ci=True, alpha=0)
     with pytest.raises(ValueError):
@@ -90,23 +71,16 @@ def test_summary_invalid_alpha(linear_model, example_data):
     with pytest.raises(ValueError):
         linear_model.summary(ci=True, alpha=1.1)
 
-
 def test_summary_values(linear_model, example_data):
-    """Test that the summary values are correct."""
+    """Verify summary() estimates match those from statsmodels."""
     X, y = example_data
     linear_model.fit(X, y)
     summary_df = linear_model.summary(ci=True, alpha=0.05)
-
-    # Use statsmodels to calculate expected values
-
+
     X_with_const = sm.add_constant(X)
     model = sm.OLS(y, X_with_const).fit()
-
-    # Replace expected values with those from statsmodels
-    expected_params = model.params.values  # Intercept and slope
-    expected_pvalues = model.pvalues  # P-values for parameters
-
-    # Check parameter estimates
-    assert np.allclose(summary_df["Estimate"].values, expected_params, atol=0.1), \
-        f"Parameter estimates are incorrect: {summary_df['Estimate'].values}."
-
+
+    expected_params = model.params.values
+    expected_pvalues = model.pvalues
+
+    assert np.allclose(summary_df["Estimate"].values, expected_params, atol=0.1)