From 8edf6a063a7091e6d51b375580c12d34764976ed Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Tue, 19 Nov 2024 13:55:10 +0000 Subject: [PATCH 1/3] fix failing doctest, use posterior expectation for r2 score --- causalpy/pymc_models.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index bbd93110..ea9c245f 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -54,7 +54,7 @@ class PyMCModel(pm.Model): ... "chains": 2, ... "draws": 2000, ... "progressbar": False, - ... "random_seed": rng, + ... "random_seed": 42, ... } ... ) >>> model.fit(X, y) @@ -63,8 +63,8 @@ class PyMCModel(pm.Model): >>> model.predict(X_new) Inference data... >>> model.score(X, y) - r2 0.390344 - r2_std 0.081135 + r2 0.19157 + r2_std 0.11238 dtype: float64 """ @@ -123,7 +123,6 @@ def predict(self, X): # Ensure random_seed is used in sample_prior_predictive() and # sample_posterior_predictive() if provided in sample_kwargs. random_seed = self.sample_kwargs.get("random_seed", None) - self._data_setter(X) with self: # sample with new input data post_pred = pm.sample_posterior_predictive( @@ -137,18 +136,19 @@ def predict(self, X): def score(self, X, y) -> pd.Series: """Score the Bayesian :math:`R^2` given inputs ``X`` and outputs ``y``. + Note that the score is based on a comparison of the observed data ``y`` and the + model's expected value of the data, `mu`. + .. caution:: The Bayesian :math:`R^2` is not the same as the traditional coefficient of determination, https://en.wikipedia.org/wiki/Coefficient_of_determination. """ - yhat = self.predict(X) - yhat = az.extract( - yhat, group="posterior_predictive", var_names="y_hat" - ).T.values + mu = self.predict(X) + mu = az.extract(mu, group="posterior_predictive", var_names="mu").T.values # Note: First argument must be a 1D array - return r2_score(y.flatten(), yhat) + return r2_score(y.flatten(), mu) def calculate_impact(self, y_true, y_pred): pre_data = xr.DataArray(y_true, dims=["obs_ind"]) From 55159b85aee40c98fe007581885025f9a1f6e3af Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Tue, 19 Nov 2024 14:13:02 +0000 Subject: [PATCH 2/3] re-order operations in docstring --- causalpy/pymc_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index ea9c245f..91eb9457 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -59,13 +59,13 @@ class PyMCModel(pm.Model): ... ) >>> model.fit(X, y) Inference data... - >>> X_new = rng.normal(loc=0, scale=1, size=(20,2)) - >>> model.predict(X_new) - Inference data... >>> model.score(X, y) r2 0.19157 r2_std 0.11238 dtype: float64 + >>> X_new = rng.normal(loc=0, scale=1, size=(20,2)) + >>> model.predict(X_new) + Inference data... """ def __init__(self, sample_kwargs: Optional[Dict[str, Any]] = None): From cdf2ee530af562e1001a55b7010f105b4920914e Mon Sep 17 00:00:00 2001 From: "Benjamin T. Vincent" Date: Thu, 21 Nov 2024 21:10:42 +0000 Subject: [PATCH 3/3] try to make doctests not care about exact numberical result --- causalpy/pymc_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py index 91eb9457..32470dd9 100644 --- a/causalpy/pymc_models.py +++ b/causalpy/pymc_models.py @@ -59,9 +59,9 @@ class PyMCModel(pm.Model): ... ) >>> model.fit(X, y) Inference data... - >>> model.score(X, y) - r2 0.19157 - r2_std 0.11238 + >>> model.score(X, y) # doctest: +ELLIPSIS + r2 ... + r2_std ... dtype: float64 >>> X_new = rng.normal(loc=0, scale=1, size=(20,2)) >>> model.predict(X_new)