From f67ff8bc22fcc72ead96eeeaef04173ed53650cb Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Fri, 30 Jun 2023 11:54:11 +0200 Subject: [PATCH] Use "unobserved" as imputed variable suffix instead of "missing" --- pymc/model.py | 2 +- tests/backends/test_arviz.py | 4 +-- tests/test_model.py | 60 ++++++++++++++++++++--------------- tests/test_model_graph.py | 6 ++-- tests/tuning/test_starting.py | 6 ++-- 5 files changed, 43 insertions(+), 35 deletions(-) diff --git a/pymc/model.py b/pymc/model.py index adf6dafbfed..e2cf8fade08 100644 --- a/pymc/model.py +++ b/pymc/model.py @@ -1378,7 +1378,7 @@ def make_obs_var( self.observed_RVs.append(observed_rv) # Register FreeRV corresponding to unobserved components - self.register_rv(unobserved_rv, f"{name}_missing", transform=transform) + self.register_rv(unobserved_rv, f"{name}_unobserved", transform=transform) # Register Deterministic that combines observed and missing # Note: This can widely increase memory consumption during sampling for large datasets diff --git a/tests/backends/test_arviz.py b/tests/backends/test_arviz.py index 55364f62646..4f6d312e296 100644 --- a/tests/backends/test_arviz.py +++ b/tests/backends/test_arviz.py @@ -338,10 +338,10 @@ def test_missing_data_model(self): ) # make sure that data is really missing - assert "y_missing" in model.named_vars + assert "y_unobserved" in model.named_vars test_dict = { - "posterior": ["x", "y_missing"], + "posterior": ["x", "y_unobserved"], "observed_data": ["y_observed"], "log_likelihood": ["y_observed"], } diff --git a/tests/test_model.py b/tests/test_model.py index 9306b129022..f4d2bbe78d1 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -357,13 +357,13 @@ def test_missing_data(self): gf = m.logp_dlogp_function() gf._extra_are_set = True - assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type + assert m["x2_unobserved"].type == gf._extra_vars_shared["x2_unobserved"].type # The dtype of the merged observed/missing deterministic should match the RV dtype assert m.deterministics[0].type.dtype == x2.type.dtype point = m.initial_point(random_seed=None).copy() - del point["x2_missing"] + del point["x2_unobserved"] res = [gf(DictToArrayBijection.map(Point(point, model=m))) for i in range(5)] @@ -566,7 +566,7 @@ def test_make_obs_var(): assert masked_output != fake_distribution assert not isinstance(masked_output, RandomVariable) # Ensure it has missing values - assert {"testing_inputs_missing"} == {v.name for v in fake_model.value_vars} + assert {"testing_inputs_unobserved"} == {v.name for v in fake_model.value_vars} assert {"testing_inputs", "testing_inputs_observed"} == { v.name for v in fake_model.observed_RVs } @@ -1221,7 +1221,7 @@ def test_missing_basic(self, missing_data): with pytest.warns(ImputationWarning): _ = pm.Normal("y", x, 1, observed=missing_data) - assert "y_missing" in model.named_vars + assert "y_unobserved" in model.named_vars test_point = model.initial_point() assert not np.isnan(model.compile_logp()(test_point)) @@ -1238,7 +1238,7 @@ def test_missing_with_predictors(self): with pytest.warns(ImputationWarning): y = pm.Normal("y", x * predictors, 1, observed=data) - assert "y_missing" in model.named_vars + assert "y_unobserved" in model.named_vars test_point = model.initial_point() assert not np.isnan(model.compile_logp()(test_point)) @@ -1278,17 +1278,19 @@ def test_interval_missing_observations(self): with pytest.warns(ImputationWarning): theta2 = pm.Normal("theta2", mu=theta1, observed=obs2) - assert isinstance(model.rvs_to_transforms[model["theta1_missing"]], IntervalTransform) + assert isinstance( + model.rvs_to_transforms[model["theta1_unobserved"]], IntervalTransform + ) assert model.rvs_to_transforms[model["theta1_observed"]] is None prior_trace = pm.sample_prior_predictive(random_seed=rng, return_inferencedata=False) assert set(prior_trace.keys()) == { "theta1", "theta1_observed", - "theta1_missing", + "theta1_unobserved", "theta2", "theta2_observed", - "theta2_missing", + "theta2_unobserved", } # Make sure the observed + missing combined deterministics have the @@ -1303,14 +1305,16 @@ def test_interval_missing_observations(self): # Make sure the missing parts of the combined deterministic matches the # sampled missing and observed variable values assert ( - np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_missing"]) == 0.0 + np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_unobserved"]) + == 0.0 ) assert ( np.mean(prior_trace["theta1"][:, ~obs1.mask] - prior_trace["theta1_observed"]) == 0.0 ) assert ( - np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_missing"]) == 0.0 + np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_unobserved"]) + == 0.0 ) assert ( np.mean(prior_trace["theta2"][:, ~obs2.mask] - prior_trace["theta2_observed"]) @@ -1326,18 +1330,22 @@ def test_interval_missing_observations(self): ) assert set(trace.varnames) == { "theta1", - "theta1_missing", - "theta1_missing_interval__", + "theta1_unobserved", + "theta1_unobserved_interval__", "theta2", - "theta2_missing", + "theta2_unobserved", } # Make sure that the missing values are newly generated samples and that # the observed and deterministic match - assert np.all(0 < trace["theta1_missing"].mean(0)) - assert np.all(0 < trace["theta2_missing"].mean(0)) - assert np.isclose(np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_missing"]), 0) - assert np.isclose(np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_missing"]), 0) + assert np.all(0 < trace["theta1_unobserved"].mean(0)) + assert np.all(0 < trace["theta2_unobserved"].mean(0)) + assert np.isclose( + np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_unobserved"]), 0 + ) + assert np.isclose( + np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_unobserved"]), 0 + ) # Make sure that the observed values are unchanged assert np.allclose(np.var(trace["theta1"][:, ~obs1.mask], 0), 0.0) @@ -1378,7 +1386,7 @@ def test_missing_logp1(self): with pytest.warns(ImputationWarning): x = pm.Gamma("x", 1, 1, observed=[1, 1, 1, np.nan]) - logp_val = m2.compile_logp()({"x_missing_log__": np.array([0])}) + logp_val = m2.compile_logp()({"x_unobserved_log__": np.array([0])}) assert logp_val == -4.0 def test_missing_logp2(self): @@ -1394,7 +1402,7 @@ def test_missing_logp2(self): "theta2", mu=theta1, observed=np.array([np.nan, np.nan, 2, np.nan, 4]) ) m_missing_logp = m_missing.compile_logp()( - {"theta1_missing": [2, 4], "theta2_missing": [0, 1, 3]} + {"theta1_unobserved": [2, 4], "theta2_unobserved": [0, 1, 3]} ) assert m_logp == m_missing_logp @@ -1407,7 +1415,7 @@ def test_missing_multivariate_separable(self): a=[1, 2, 3], observed=np.array([[0.3, 0.3, 0.4], [np.nan, np.nan, np.nan]]), ) - assert (m_miss["x_missing"].owner.op, pm.Dirichlet) + assert (m_miss["x_unobserved"].owner.op, pm.Dirichlet) assert (m_miss["x_observed"].owner.op, pm.Dirichlet) with pm.Model() as m_unobs: @@ -1415,7 +1423,7 @@ def test_missing_multivariate_separable(self): inp_vals = simplex.forward(np.array([[0.3, 0.3, 0.4]])).eval() np.testing.assert_allclose( - m_miss.compile_logp(jacobian=False)({"x_missing_simplex__": inp_vals}), + m_miss.compile_logp(jacobian=False)({"x_unobserved_simplex__": inp_vals}), m_unobs.compile_logp(jacobian=False)({"x_simplex__": inp_vals}) * 2, ) @@ -1428,12 +1436,12 @@ def test_missing_multivariate_unseparable(self): observed=np.array([[0.3, 0.3, np.nan], [np.nan, np.nan, 0.4]]), ) - assert isinstance(m_miss["x_missing"].owner.op, PartialObservedRV) + assert isinstance(m_miss["x_unobserved"].owner.op, PartialObservedRV) assert isinstance(m_miss["x_observed"].owner.op, PartialObservedRV) inp_values = np.array([0.3, 0.3, 0.4]) np.testing.assert_allclose( - m_miss.compile_logp()({"x_missing": [0.4, 0.3, 0.3]}), + m_miss.compile_logp()({"x_unobserved": [0.4, 0.3, 0.3]}), st.dirichlet.logpdf(inp_values, [1, 2, 3]) * 2, ) @@ -1451,7 +1459,7 @@ def test_missing_vector_parameter(self): assert np.all(x_draws[:, 0] < 0) assert np.all(x_draws[:, 1] > 0) assert np.isclose( - m.compile_logp()({"x_missing": np.array([-10, 10, -10, 10])}), + m.compile_logp()({"x_unobserved": np.array([-10, 10, -10, 10])}), st.norm(scale=0.1).logpdf(0) * 6, ) @@ -1470,7 +1478,7 @@ def test_missing_symmetric(self): x_obs_rv = m["x_observed"] x_obs_vv = m.rvs_to_values[x_obs_rv] - x_unobs_rv = m["x_missing"] + x_unobs_rv = m["x_unobserved"] x_unobs_vv = m.rvs_to_values[x_unobs_rv] logp = transformed_conditional_logp( @@ -1506,7 +1514,7 @@ def test_symbolic_random_variable(self): observed=data, ) np.testing.assert_almost_equal( - model.compile_logp()({"x_missing": [0] * 3}), + model.compile_logp()({"x_unobserved": [0] * 3}), st.norm.logcdf(0) * 10, ) diff --git a/tests/test_model_graph.py b/tests/test_model_graph.py index 9c2e1caa37c..0ad38737e7d 100644 --- a/tests/test_model_graph.py +++ b/tests/test_model_graph.py @@ -145,13 +145,13 @@ def model_with_imputations(): compute_graph = { "a": set(), - "L_missing": {"a"}, + "L_unobserved": {"a"}, "L_observed": {"a"}, - "L": {"L_missing", "L_observed"}, + "L": {"L_unobserved", "L_observed"}, } plates = { "": {"a"}, - "2": {"L_missing"}, + "2": {"L_unobserved"}, "10": {"L_observed"}, "12": {"L"}, } diff --git a/tests/tuning/test_starting.py b/tests/tuning/test_starting.py index 4e7a3540ee2..cfdb891891b 100644 --- a/tests/tuning/test_starting.py +++ b/tests/tuning/test_starting.py @@ -146,9 +146,9 @@ def test_find_MAP_issue_4488(): y = pm.Deterministic("y", x + 1) map_estimate = find_MAP() - assert not set.difference({"x_missing", "x_missing_log__", "y"}, set(map_estimate.keys())) - np.testing.assert_allclose(map_estimate["x_missing"], 0.2, rtol=1e-4, atol=1e-4) - np.testing.assert_allclose(map_estimate["y"], [2.0, map_estimate["x_missing"][0] + 1]) + assert not set.difference({"x_unobserved", "x_unobserved_log__", "y"}, set(map_estimate.keys())) + np.testing.assert_allclose(map_estimate["x_unobserved"], 0.2, rtol=1e-4, atol=1e-4) + np.testing.assert_allclose(map_estimate["y"], [2.0, map_estimate["x_unobserved"][0] + 1]) def test_find_MAP_warning_non_free_RVs():