diff --git a/pymc_experimental/linearmodel.py b/pymc_experimental/linearmodel.py index c2efbf33..f0e48876 100644 --- a/pymc_experimental/linearmodel.py +++ b/pymc_experimental/linearmodel.py @@ -67,8 +67,6 @@ def build_model(self, X: pd.DataFrame, y: pd.Series): """ cfg = self.model_config - # The model is built with placeholder data. - # Actual data will be set by _data_setter when fitting or evaluating the model. # Data array size can change but number of dimensions must stay the same. with pm.Model() as self.model: x = pm.MutableData("x", np.zeros((1,)), dims="observation") @@ -94,6 +92,8 @@ def build_model(self, X: pd.DataFrame, y: pd.Series): dims="observation", ) + self._data_setter(X, y) + def _data_setter(self, X: pd.DataFrame, y: Optional[Union[pd.DataFrame, pd.Series]] = None): with self.model: pm.set_data({"x": X.squeeze()}) diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py index e61e38bc..6e2a256e 100644 --- a/pymc_experimental/model_builder.py +++ b/pymc_experimental/model_builder.py @@ -299,8 +299,8 @@ def sample_model(self, **kwargs): with self.model: sampler_args = {**self.sampler_config, **kwargs} idata = pm.sample(**sampler_args) - idata.extend(pm.sample_prior_predictive()) - idata.extend(pm.sample_posterior_predictive(idata)) + idata.extend(pm.sample_prior_predictive(), join="right") + idata.extend(pm.sample_posterior_predictive(idata), join="right") idata = self.set_idata_attrs(idata) return idata @@ -595,23 +595,22 @@ def sample_prior_predictive( Prior predictive samples for each input X_pred """ if y_pred is None: - y_pred = np.zeros(len(X_pred)) + y_pred = pd.Series(np.zeros(len(X_pred)), name=self.output_var) if samples is None: samples = self.sampler_config.get("draws", 500) if self.model is None: self.build_model(X_pred, y_pred) - - self._data_setter(X_pred, y_pred) - if self.model is not None: - with self.model: # sample with new input data - prior_pred: az.InferenceData = pm.sample_prior_predictive(samples, **kwargs) - self.set_idata_attrs(prior_pred) - if extend_idata: - if self.idata is not None: - self.idata.extend(prior_pred) - else: - self.idata = prior_pred + else: + self._data_setter(X_pred, y_pred) + with self.model: # sample with new input data + prior_pred: az.InferenceData = pm.sample_prior_predictive(samples, **kwargs) + self.set_idata_attrs(prior_pred) + if extend_idata: + if self.idata is not None: + self.idata.extend(prior_pred, join="right") + else: + self.idata = prior_pred prior_predictive_samples = az.extract(prior_pred, "prior_predictive", combined=combined) @@ -641,7 +640,7 @@ def sample_posterior_predictive(self, X_pred, extend_idata, combined, **kwargs): with self.model: # sample with new input data post_pred = pm.sample_posterior_predictive(self.idata, **kwargs) if extend_idata: - self.idata.extend(post_pred) + self.idata.extend(post_pred, join="right") posterior_predictive_samples = az.extract( post_pred, "posterior_predictive", combined=combined diff --git a/pymc_experimental/tests/test_linearmodel.py b/pymc_experimental/tests/test_linearmodel.py index ef870ff1..1a169c4a 100644 --- a/pymc_experimental/tests/test_linearmodel.py +++ b/pymc_experimental/tests/test_linearmodel.py @@ -36,6 +36,15 @@ sklearn_available = False +@pytest.fixture(scope="module") +def toy_actual_params(): + return { + "intercept": 3, + "slope": 5, + "obs_error": 0.5, + } + + @pytest.fixture(scope="module") def toy_X(): x = np.linspace(start=0, stop=1, num=100) @@ -44,9 +53,10 @@ def toy_X(): @pytest.fixture(scope="module") -def toy_y(toy_X): - y = 5 * toy_X["input"] + 3 - y = y + np.random.normal(0, 1, size=len(toy_X)) +def toy_y(toy_X, toy_actual_params): + y = toy_actual_params["slope"] * toy_X["input"] + toy_actual_params["intercept"] + rng = np.random.default_rng(427) + y = y + rng.normal(0, toy_actual_params["obs_error"], size=len(toy_X)) y = pd.Series(y, name="output") return y @@ -54,13 +64,13 @@ def toy_y(toy_X): @pytest.fixture(scope="module") def fitted_linear_model_instance(toy_X, toy_y): sampler_config = { - "draws": 500, - "tune": 300, + "draws": 50, + "tune": 30, "chains": 2, "target_accept": 0.95, } model = LinearModel(sampler_config=sampler_config) - model.fit(toy_X, toy_y) + model.fit(toy_X, toy_y, random_seed=312) return model @@ -101,6 +111,23 @@ def test_fit(fitted_linear_model_instance): assert isinstance(post_pred, xr.DataArray) +def test_parameter_fit(toy_X, toy_y, toy_actual_params): + """Check that the fit model recovered the data-generating parameters.""" + # Fit the model with a sufficient number of samples + sampler_config = { + "draws": 500, + "tune": 300, + "chains": 2, + "target_accept": 0.95, + } + model = LinearModel(sampler_config=sampler_config) + model.fit(toy_X, toy_y, random_seed=312) + fit_params = model.idata.posterior.mean() + np.testing.assert_allclose(fit_params["intercept"], toy_actual_params["intercept"], rtol=0.1) + np.testing.assert_allclose(fit_params["slope"], toy_actual_params["slope"], rtol=0.1) + np.testing.assert_allclose(fit_params["σ_model_fmc"], toy_actual_params["obs_error"], rtol=0.1) + + def test_predict(fitted_linear_model_instance): model = fitted_linear_model_instance X_pred = pd.DataFrame({"input": np.random.uniform(low=0, high=1, size=100)}) diff --git a/pymc_experimental/tests/test_model_builder.py b/pymc_experimental/tests/test_model_builder.py index a0b24346..3f769e54 100644 --- a/pymc_experimental/tests/test_model_builder.py +++ b/pymc_experimental/tests/test_model_builder.py @@ -41,11 +41,13 @@ def toy_y(toy_X): return y -@pytest.fixture(scope="module") -def fitted_model_instance(toy_X, toy_y): +def get_unfitted_model_instance(X, y): + """Creates an unfitted model instance to which idata can be copied in + and then used as a fitted model instance. That way a fitted model + can be used multiple times without having to run `fit` multiple times.""" sampler_config = { - "draws": 100, - "tune": 100, + "draws": 20, + "tune": 10, "chains": 2, "target_accept": 0.95, } @@ -57,7 +59,30 @@ def fitted_model_instance(toy_X, toy_y): model = test_ModelBuilder( model_config=model_config, sampler_config=sampler_config, test_parameter="test_paramter" ) - model.fit(toy_X) + # Do the things that `model.fit` does except sample to create idata. + model._generate_and_preprocess_model_data(X, y.values.flatten()) + model.build_model(X, y) + return model + + +@pytest.fixture(scope="module") +def fitted_model_instance_base(toy_X, toy_y): + """Because fitting takes a relatively long time, this is intended to + be used only once and then have new instances created and fit data patched in + for tests that use a fitted model instance. Tests should use + `fitted_model_instance` instead of this.""" + model = get_unfitted_model_instance(toy_X, toy_y) + model.fit(toy_X, toy_y) + return model + + +@pytest.fixture +def fitted_model_instance(toy_X, toy_y, fitted_model_instance_base): + """Get a fitted model instance. A new instance is created and fit data is + patched in, so tests using this fixture can modify the model object without + affecting other tests.""" + model = get_unfitted_model_instance(toy_X, toy_y) + model.idata = fitted_model_instance_base.idata.copy() return model @@ -131,8 +156,8 @@ def _generate_and_preprocess_model_data( @staticmethod def get_default_sampler_config() -> Dict: return { - "draws": 1_000, - "tune": 1_000, + "draws": 10, + "tune": 10, "chains": 3, "target_accept": 0.95, } @@ -142,6 +167,9 @@ def test_save_input_params(fitted_model_instance): assert fitted_model_instance.idata.attrs["test_paramter"] == '"test_paramter"' +@pytest.mark.skipif( + sys.platform == "win32", reason="Permissions for temp files not granted on windows CI." +) def test_save_load(fitted_model_instance): temp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False) fitted_model_instance.save(temp.name) @@ -193,9 +221,6 @@ def test_fit_no_y(toy_X): assert "posterior" in model_builder.idata.groups() -@pytest.mark.skipif( - sys.platform == "win32", reason="Permissions for temp files not granted on windows CI." -) def test_predict(fitted_model_instance): x_pred = np.random.uniform(low=0, high=1, size=100) prediction_data = pd.DataFrame({"input": x_pred}) @@ -220,6 +245,41 @@ def test_sample_posterior_predictive(fitted_model_instance, combined): assert np.issubdtype(pred[fitted_model_instance.output_var].dtype, np.floating) +@pytest.mark.parametrize("group", ["prior_predictive", "posterior_predictive"]) +@pytest.mark.parametrize("extend_idata", [True, False]) +def test_sample_xxx_extend_idata_param(fitted_model_instance, group, extend_idata): + output_var = fitted_model_instance.output_var + idata_prev = fitted_model_instance.idata[group][output_var] + + # Since coordinates are provided, the dimension must match + n_pred = 100 # Must match toy_x + x_pred = np.random.uniform(0, 1, n_pred) + + prediction_data = pd.DataFrame({"input": x_pred}) + if group == "prior_predictive": + prediction_method = fitted_model_instance.sample_prior_predictive + else: # group == "posterior_predictive": + prediction_method = fitted_model_instance.sample_posterior_predictive + + pred = prediction_method(prediction_data["input"], combined=False, extend_idata=extend_idata) + + pred_unstacked = pred[output_var].values + idata_now = fitted_model_instance.idata[group][output_var].values + + if extend_idata: + # After sampling, data in the model should be the same as the predictions + np.testing.assert_array_equal(idata_now, pred_unstacked) + # Data in the model should NOT be the same as before + if idata_now.shape == idata_prev.values.shape: + assert np.sum(np.abs(idata_now - idata_prev.values) < 1e-5) <= 2 + else: + # After sampling, data in the model should be the same as it was before + np.testing.assert_array_equal(idata_now, idata_prev.values) + # Data in the model should NOT be the same as the predictions + if idata_now.shape == pred_unstacked.shape: + assert np.sum(np.abs(idata_now - pred_unstacked) < 1e-5) <= 2 + + def test_model_config_formatting(): model_config = { "a": {