From 74b2d3096708848a28fde9e23634f164de085761 Mon Sep 17 00:00:00 2001 From: kleeman Date: Fri, 30 Dec 2022 13:35:42 -0800 Subject: [PATCH] sparse GP uses block diagonal LDLT --- include/albatross/src/models/sparse_gp.hpp | 56 +++++++++++++++------- tests/test_sparse_gp.cc | 2 +- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/include/albatross/src/models/sparse_gp.hpp b/include/albatross/src/models/sparse_gp.hpp index fa118f5f..68d20ddb 100644 --- a/include/albatross/src/models/sparse_gp.hpp +++ b/include/albatross/src/models/sparse_gp.hpp @@ -10,8 +10,8 @@ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef INCLUDE_ALBATROSS_MODELS_SPARSE_GP_H_ -#define INCLUDE_ALBATROSS_MODELS_SPARSE_GP_H_ +#ifndef ALBATROSS_SRC_MODELS_SPARSE_GP_HPP +#define ALBATROSS_SRC_MODELS_SPARSE_GP_HPP namespace albatross { @@ -74,7 +74,7 @@ template struct SparseGPFit {}; template struct Fit> { std::vector train_features; - Eigen::SerializableLDLT train_covariance; + BlockDiagonalLDLT train_covariance; Eigen::MatrixXd sigma_R; Eigen::Matrix permutation_indices; Eigen::VectorXd information; @@ -83,7 +83,7 @@ template struct Fit> { Fit(){}; Fit(const std::vector &features_, - const Eigen::SerializableLDLT &train_covariance_, + const BlockDiagonalLDLT &train_covariance_, const Eigen::MatrixXd sigma_R_, const Eigen::Matrix permutation_indices_, const Eigen::VectorXd &information_, Eigen::Index numerical_rank_) @@ -351,12 +351,12 @@ class SparseGaussianProcessRegression // Sigma = (B^T B)^-1 // Eigen::ColPivHouseholderQR - compute_sigma_qr(const Eigen::SerializableLDLT &K_uu_ldlt, + compute_sigma_qr(const BlockDiagonalLDLT &K_uu_ldlt, const BlockDiagonalLDLT &A_ldlt, const Eigen::MatrixXd &K_fu) const { Eigen::MatrixXd B(A_ldlt.rows() + K_uu_ldlt.rows(), K_uu_ldlt.rows()); B.topRows(A_ldlt.rows()) = A_ldlt.sqrt_solve(K_fu); - B.bottomRows(K_uu_ldlt.rows()) = K_uu_ldlt.sqrt_transpose(); + B.bottomRows(K_uu_ldlt.rows()) = K_uu_ldlt.sqrt_transpose().to_dense(); return B.colPivHouseholderQr(); }; @@ -371,7 +371,12 @@ class SparseGaussianProcessRegression auto u = inducing_point_strategy_(this->covariance_function_, features); ALBATROSS_ASSERT(!u.empty() && "Empty inducing points!"); - const auto K_uu_ldlt = compute_kuu_ldlt(&u); + // Compute K_uu = P_cols.T * D * P_cols + // where D is a block diagonal. Instead of storing the permutation + // we can just use D and reorder the inducing points + const auto structured_K_uu_ldlt = compute_kuu_ldlt(&u); + const auto &K_uu_ldlt = structured_K_uu_ldlt.matrix; + u = structured_K_uu_ldlt.P_cols * u; BlockDiagonalLDLT A_ldlt; Eigen::MatrixXd K_fu; @@ -401,14 +406,23 @@ class SparseGaussianProcessRegression new_fit.train_features = new_inducing_points; - const Eigen::MatrixXd K_zz = - this->covariance_function_(new_inducing_points, Base::threads_.get()); - new_fit.train_covariance = Eigen::SerializableLDLT(K_zz); + auto K_zz = this->covariance_function_(new_fit.train_features, + Base::threads_.get()); + K_zz.diagonal() += + inducing_nugget_.value * Eigen::VectorXd::Ones(K_zz.rows()); + const auto block_diag_K_zz = linalg::infer_block_diagonal_matrix(K_zz); + // Store only the inner block diagonal representation and reorder + // the inducing points to match; + new_fit.train_covariance = block_diag_K_zz.matrix.ldlt(); + + const auto &P = block_diag_K_zz.P_cols; + new_fit.train_features = P * new_fit.train_features; + JointDistribution prediction(P * prediction_.mean, + P * prediction_.covariance * P.transpose()); // We're going to need to take the sqrt of the new covariance which // could be extremely small, so here we add a small nugget to avoid // numerical instability - JointDistribution prediction(prediction_); prediction.covariance.diagonal() += Eigen::VectorXd::Constant( cast::to_index(prediction.size()), 1, details::DEFAULT_NUGGET); new_fit.information = new_fit.train_covariance.solve(prediction.mean); @@ -439,6 +453,7 @@ class SparseGaussianProcessRegression // We can then compute and store the QR decomposition of B // as we do in a normal fit. const Eigen::SerializableLDLT C_ldlt(prediction.covariance); + // think about this, probably a better way: const Eigen::MatrixXd sigma_inv_sqrt = C_ldlt.sqrt_solve(K_zz); const auto B_qr = sigma_inv_sqrt.colPivHouseholderQr(); @@ -529,7 +544,12 @@ class SparseGaussianProcessRegression auto u = inducing_point_strategy_(this->covariance_function_, dataset.features); - const auto K_uu_ldlt = compute_kuu_ldlt(&u); + // Compute K_uu = P_cols.T * D * P_cols + // where D is a block diagonal. Instead of storing the permutation + // we can just use D and reorder the inducing points + const auto structured_K_uu_ldlt = compute_kuu_ldlt(&u); + const auto &K_uu_ldlt = structured_K_uu_ldlt.matrix; + u = structured_K_uu_ldlt.P_cols * u; BlockDiagonalLDLT A_ldlt; Eigen::MatrixXd K_fu; @@ -599,13 +619,14 @@ class SparseGaussianProcessRegression private: template - auto + Structured compute_kuu_ldlt(std::vector *inducing_features) const { auto K_uu = this->covariance_function_(*inducing_features, Base::threads_.get()); K_uu.diagonal() += inducing_nugget_.value * Eigen::VectorXd::Ones(K_uu.rows()); - return Eigen::SerializableLDLT(K_uu); + const auto block_diag_K_uu = linalg::infer_block_diagonal_matrix(K_uu); + return structured::ldlt(block_diag_K_uu); } // This method takes care of a lot of the common book keeping required to @@ -619,7 +640,7 @@ class SparseGaussianProcessRegression const std::vector &inducing_features, const std::vector &out_of_order_features, const MarginalDistribution &out_of_order_targets, - const Eigen::SerializableLDLT &K_uu_ldlt, BlockDiagonalLDLT *A_ldlt, + const BlockDiagonalLDLT &K_uu_ldlt, BlockDiagonalLDLT *A_ldlt, Eigen::MatrixXd *K_fu, Eigen::VectorXd *y) const { ALBATROSS_ASSERT(A_ldlt != nullptr); @@ -660,7 +681,8 @@ class SparseGaussianProcessRegression // Q_ff = K_fu K_uu^-1 K_uf // = K_fu K_uu^-T/2 K_uu^-1/2 K_uf // = P^T P - const Eigen::MatrixXd P = K_uu_ldlt.sqrt_solve(K_fu->transpose()); + const Eigen::MatrixXd P = + K_uu_ldlt.sqrt_solve(K_fu->transpose(), Base::threads_.get()); // We only need the diagonal blocks of Q_ff to get A BlockDiagonal Q_ff_diag; @@ -756,4 +778,4 @@ auto sparse_gp_from_covariance(CovFunc covariance_function, } // namespace albatross -#endif /* INCLUDE_ALBATROSS_MODELS_SPARSE_GP_H_ */ +#endif // ALBATROSS_SRC_MODELS_SPARSE_GP_HPP diff --git a/tests/test_sparse_gp.cc b/tests/test_sparse_gp.cc index eb57bb79..07b3dedf 100644 --- a/tests/test_sparse_gp.cc +++ b/tests/test_sparse_gp.cc @@ -355,7 +355,7 @@ TYPED_TEST(SparseGaussianProcessTest, test_rebase_inducing_points) { auto high_res_pred = high_res_fit.predict_with_measurement_noise(test_features).joint(); // Increasing the inducing points shouldn't change much - EXPECT_LT((high_res_pred.mean - full_pred.mean).norm(), 1e-6); + EXPECT_LT((high_res_pred.mean - full_pred.mean).norm(), 1e-5); const auto low_high_res_fit = rebase_inducing_points(low_res_fit, high_res_features);