From 8090c4779107af053157e75199c4b5230349f1f7 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 8 Aug 2024 01:04:54 +1000
Subject: [PATCH 01/12] Make BlockDiagonalLDLT serialiseable

---
 BUILD.bazel                                   |  1 +
 include/albatross/serialize/GP                |  1 +
 .../albatross/src/cereal/block_diagonal.hpp   | 27 +++++++++++++++++++
 3 files changed, 29 insertions(+)
 create mode 100644 include/albatross/src/cereal/block_diagonal.hpp
diff --git a/BUILD.bazel b/BUILD.bazel
index a7f054c4..431088f1 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -48,6 +48,7 @@ cc_library(
         "include/albatross/serialize/Ransac",
         "include/albatross/src/cereal/ThreadPool.hpp",
         "include/albatross/src/cereal/block_utils.hpp",
+        "include/albatross/src/cereal/block_diagonal.hpp",
         "include/albatross/src/cereal/dataset.hpp",
         "include/albatross/src/cereal/distribution.hpp",
         "include/albatross/src/cereal/eigen.hpp",
diff --git a/include/albatross/serialize/GP b/include/albatross/serialize/GP
index f54633c5..2ecee09c 100644
--- a/include/albatross/serialize/GP
+++ b/include/albatross/serialize/GP
@@ -18,6 +18,7 @@
 
 #include "../src/cereal/block_utils.hpp"
 #include "../src/cereal/serializable_ldlt.hpp"
+#include "../src/cereal/block_diagonal.hpp"
 #include "../src/cereal/gp.hpp"
 #include "../src/cereal/representations.hpp"
 #include "../src/cereal/serializable_spqr.hpp"
diff --git a/include/albatross/src/cereal/block_diagonal.hpp b/include/albatross/src/cereal/block_diagonal.hpp
new file mode 100644
index 00000000..964602bd
--- /dev/null
+++ b/include/albatross/src/cereal/block_diagonal.hpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2019 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef ALBATROSS_SRC_CEREAL_BLOCK_DIAGONAL_HPP_
+#define ALBATROSS_SRC_CEREAL_BLOCK_DIAGONAL_HPP_
+
+namespace cereal {
+
+template <typename Archive>
+inline void serialize(Archive &archive,
+                      const albatross::BlockDiagonalLDLT &ldlt,
+                      const std::uint32_t version) {
+  archive(cereal::make_nvp("blocks", ldlt.blocks));
+}
+
+} // namespace cereal
+
+#endif /* ALBATROSS_SRC_CEREAL_BLOCK_DIAGONAL_HPP_ */

From 31a0b21ad09f5305e352177db2cc4f917c27dd9e Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 8 Aug 2024 01:05:35 +1000
Subject: [PATCH 02/12] Factor out common sparse GP stuff

---
 BUILD.bazel                                   |  1 +
 include/albatross/SparseGP                    |  1 +
 .../albatross/src/models/sparse_common.hpp    | 92 +++++++++++++++++++
 include/albatross/src/models/sparse_gp.hpp    | 71 --------------
 4 files changed, 94 insertions(+), 71 deletions(-)
 create mode 100644 include/albatross/src/models/sparse_common.hpp

diff --git a/BUILD.bazel b/BUILD.bazel
index 431088f1..71536037 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -130,6 +130,7 @@ cc_library(
         "include/albatross/src/models/null_model.hpp",
         "include/albatross/src/models/ransac.hpp",
         "include/albatross/src/models/ransac_gp.hpp",
+        "include/albatross/src/models/sparse_common.hpp",
         "include/albatross/src/models/sparse_gp.hpp",
         "include/albatross/src/samplers/callbacks.hpp",
         "include/albatross/src/samplers/ensemble.hpp",
diff --git a/include/albatross/SparseGP b/include/albatross/SparseGP
index 2a26aab2..925bb506 100644
--- a/include/albatross/SparseGP
+++ b/include/albatross/SparseGP
@@ -23,6 +23,7 @@
 #include <albatross/src/eigen/serializable_spqr.hpp>
 #include <albatross/src/linalg/block_utils.hpp>
 #include <albatross/src/utils/eigen_utils.hpp>
+#include <albatross/src/models/sparse_common.hpp>
 #include <albatross/src/models/sparse_gp.hpp>
 
 #endif
diff --git a/include/albatross/src/models/sparse_common.hpp b/include/albatross/src/models/sparse_common.hpp
new file mode 100644
index 00000000..b5267b99
--- /dev/null
+++ b/include/albatross/src/models/sparse_common.hpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2024 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
+#define INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
+
+namespace albatross {
+
+namespace details {
+
+constexpr double DEFAULT_NUGGET = 1e-8;
+
+inline std::string measurement_nugget_name() { return "measurement_nugget"; }
+
+inline std::string inducing_nugget_name() { return "inducing_nugget"; }
+
+static constexpr double cSparseRNugget = 1.e-10;
+
+} // namespace details
+
+struct UniformlySpacedInducingPoints {
+
+  UniformlySpacedInducingPoints(std::size_t num_points_ = 10)
+      : num_points(num_points_) {}
+
+  template <typename CovarianceFunction>
+  std::vector<double> operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
+                                 const std::vector<double> &features) const {
+    double min = *std::min_element(features.begin(), features.end());
+    double max = *std::max_element(features.begin(), features.end());
+    return linspace(min, max, num_points);
+  }
+
+  std::size_t num_points;
+};
+
+struct StateSpaceInducingPointStrategy {
+
+  template <typename CovarianceFunction, typename FeatureType,
+            std::enable_if_t<has_valid_state_space_representation<
+                                 CovarianceFunction, FeatureType>::value,
+                             int> = 0>
+  auto operator()(const CovarianceFunction &cov,
+                  const std::vector<FeatureType> &features) const {
+    return cov.state_space_representation(features);
+  }
+
+  template <typename CovarianceFunction, typename FeatureType,
+            std::enable_if_t<!has_valid_state_space_representation<
+                                 CovarianceFunction, FeatureType>::value,
+                             int> = 0>
+  auto
+  operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
+             const std::vector<FeatureType> &features ALBATROSS_UNUSED) const
+      ALBATROSS_FAIL(
+          CovarianceFunction,
+          "Covariance function is missing state_space_representation method, "
+          "be sure _ssr_impl has been defined for the types concerned");
+};
+
+struct SPQRImplementation {
+  using QRType = Eigen::SPQR<Eigen::SparseMatrix<double>>;
+
+  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
+                                         ThreadPool *threads) {
+    return SPQR_create(m.sparseView(), threads);
+  }
+};
+
+struct DenseQRImplementation {
+  using QRType = Eigen::ColPivHouseholderQR<Eigen::MatrixXd>;
+
+  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
+                                         ThreadPool *threads
+                                         __attribute__((unused))) {
+    return std::make_unique<QRType>(m);
+  }
+};
+
+
+} // namespace albatross
+
+#endif  // INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
\ No newline at end of file
diff --git a/include/albatross/src/models/sparse_gp.hpp b/include/albatross/src/models/sparse_gp.hpp
index 36234e4e..90985cb4 100644
--- a/include/albatross/src/models/sparse_gp.hpp
+++ b/include/albatross/src/models/sparse_gp.hpp
@@ -15,81 +15,10 @@
 
 namespace albatross {
 
-namespace details {
-
-constexpr double DEFAULT_NUGGET = 1e-8;
-
-inline std::string measurement_nugget_name() { return "measurement_nugget"; }
-
-inline std::string inducing_nugget_name() { return "inducing_nugget"; }
-
-static constexpr double cSparseRNugget = 1.e-10;
-
-} // namespace details
-
 template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
           typename InducingPointStrategy, typename QRImplementation>
 class SparseGaussianProcessRegression;
 
-struct UniformlySpacedInducingPoints {
-
-  UniformlySpacedInducingPoints(std::size_t num_points_ = 10)
-      : num_points(num_points_) {}
-
-  template <typename CovarianceFunction>
-  std::vector<double> operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
-                                 const std::vector<double> &features) const {
-    double min = *std::min_element(features.begin(), features.end());
-    double max = *std::max_element(features.begin(), features.end());
-    return linspace(min, max, num_points);
-  }
-
-  std::size_t num_points;
-};
-
-struct StateSpaceInducingPointStrategy {
-
-  template <typename CovarianceFunction, typename FeatureType,
-            std::enable_if_t<has_valid_state_space_representation<
-                                 CovarianceFunction, FeatureType>::value,
-                             int> = 0>
-  auto operator()(const CovarianceFunction &cov,
-                  const std::vector<FeatureType> &features) const {
-    return cov.state_space_representation(features);
-  }
-
-  template <typename CovarianceFunction, typename FeatureType,
-            std::enable_if_t<!has_valid_state_space_representation<
-                                 CovarianceFunction, FeatureType>::value,
-                             int> = 0>
-  auto
-  operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
-             const std::vector<FeatureType> &features ALBATROSS_UNUSED) const
-      ALBATROSS_FAIL(
-          CovarianceFunction,
-          "Covariance function is missing state_space_representation method, "
-          "be sure _ssr_impl has been defined for the types concerned");
-};
-
-struct SPQRImplementation {
-  using QRType = Eigen::SPQR<Eigen::SparseMatrix<double>>;
-
-  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
-                                         ThreadPool *threads) {
-    return SPQR_create(m.sparseView(), threads);
-  }
-};
-
-struct DenseQRImplementation {
-  using QRType = Eigen::ColPivHouseholderQR<Eigen::MatrixXd>;
-
-  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
-                                         ThreadPool *threads
-                                         __attribute__((unused))) {
-    return std::make_unique<QRType>(m);
-  }
-};
-
 template <typename FeatureType> struct SparseGPFit {};
 
 template <typename FeatureType> struct Fit<SparseGPFit<FeatureType>> {

From 0af3b22f687b11513d98af0f1efed17bcf575085 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 8 Aug 2024 01:06:09 +1000
Subject: [PATCH 03/12] Introduce PIC approximation

---
 BUILD.bazel                             |    2 +
 doc/src/crappy-pic.rst                  |  164 ++++
 doc/src/sparse-gp-details.rst           |  141 +++
 include/albatross/SparseGP              |    1 +
 include/albatross/src/cereal/gp.hpp     |   24 +
 include/albatross/src/models/pic_gp.hpp | 1054 +++++++++++++++++++++++
 tests/test_pic_gp.cc                    |  404 +++++++++
 7 files changed, 1790 insertions(+)
 create mode 100644 doc/src/crappy-pic.rst
 create mode 100644 include/albatross/src/models/pic_gp.hpp
 create mode 100644 tests/test_pic_gp.cc

diff --git a/BUILD.bazel b/BUILD.bazel
index 71536037..422058cc 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -128,6 +128,7 @@ cc_library(
         "include/albatross/src/models/gp.hpp",
         "include/albatross/src/models/least_squares.hpp",
         "include/albatross/src/models/null_model.hpp",
+        "include/albatross/src/models/pic_gp.hpp",
         "include/albatross/src/models/ransac.hpp",
         "include/albatross/src/models/ransac_gp.hpp",
         "include/albatross/src/models/sparse_common.hpp",
@@ -230,6 +231,7 @@ swift_cc_test(
         "tests/test_models.cc",
         "tests/test_models.h",
         "tests/test_parameter_handling_mixin.cc",
+        "tests/test_pic_gp.cc",
         "tests/test_prediction.cc",
         "tests/test_radial.cc",
         "tests/test_random_utils.cc",
diff --git a/doc/src/crappy-pic.rst b/doc/src/crappy-pic.rst
new file mode 100644
index 00000000..0005dc14
--- /dev/null
+++ b/doc/src/crappy-pic.rst
@@ -0,0 +1,164 @@
+#################################################
+PIC math scratchpad
+#################################################
+
+.. _crappy-pic:
+
+-------------------------------------------------------------------
+Relationship between :math:`\sigma_\cancel{B}` and :math:`\sigma_f`
+-------------------------------------------------------------------
+
+It seems like what we need first of all is some way to get the PITC posterior variance based only on :math:`\cancel{B}` using only the full PITC posterior and calculations that scale only with :math:`|B|`.  I'm not totally finished working out how :math:`(\sigma^2_{*})^{\cancel{B} \cancel{B}}` fits into the whole posterior :math:`(\sigma^2_*)^{PIC}`
+
+Observe that since :math:`\mathbf{\Lambda}` is a block-diagonal matrix, its inverse is too, so when it's in the middle of a quadratic form, the cross terms disappear.  As far as I can tell, this is the only place we can additively separate :math:`\mathbf{Q}_{\cancel{B} \cancel{B}}` and :math:`\mathbf{K}_{B B}` without explicit cross terms.  Start with the usual application of Woodbury's lemma to the PITC posterior covariance:
+
+.. math::
+
+            \newcommand{Lam}{\mathbf{\Lambda}}
+            \newcommand{Kuu}{\mathbf{K}_{uu}}
+            \newcommand{Kuf}{\mathbf{K}_{uf}}
+            \newcommand{Kfu}{\mathbf{K}_{fu}}
+            (\sigma_*^2) &= K_* - \mathbf{Q}_{* f} \left(\mathbf{K}_{fu} \mathbf{K}_{uu}^{-1} \mathbf{K}_{uf} + \mathbf{\Lambda} \right)^{-1} \mathbf{Q}_{f *} \\
+            &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \Kuf \left(\Kfu \Kuu^{-1} \Kuf + \Lam\right)^{-1} \Kfu \Kuu^{-1} \mathbf{K}_{u*} \\
+            &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \mathbf{K}_{u*}
+
+Now break up :math:`\Kuf` and :math:`\Lam`:
+
+.. math::
+
+            \Kuf &= \begin{bmatrix} \mathbf{K}_{u \cancel{B}} & \mathbf{K}_{u B} \end{bmatrix} \\
+            \Lam &= \begin{bmatrix} \Lam_{\cancel{B}} & \mathbf{0} \\ \mathbf{0} & \Lam_{B} \end{bmatrix} \\
+
+so that
+
+.. math::
+
+            \Kuf \Lam^{-1} \Kfu &= \begin{bmatrix} \mathbf{K}_{u \cancel{B}} & \mathbf{K}_{u B} \end{bmatrix}
+            \begin{bmatrix} \Lam_{\cancel{B}}^{-1} & \mathbf{0} \\ \mathbf{0} & \Lam_{B}^{-1} \end{bmatrix}
+            \begin{bmatrix} \mathbf{K}_{\cancel{B} u} \\ \mathbf{K}_{B u} \end{bmatrix} \\
+            &= \mathbf{K}_{u \cancel{B}} \Lam_{\cancel{B}}^{-1} \mathbf{K}_{\cancel{B} u} + 
+            \mathbf{K}_{u B} \Lam_{B}^{-1} \mathbf{K}_{B u} \\
+            \mathbf{K}_{u \cancel{B}} \Lam_{\cancel{B}}^{-1} \mathbf{K}_{\cancel{B} u} &= \Kuf \Lam^{-1} \Kfu - \mathbf{K}_{u B} \Lam_{B}^{-1} \mathbf{K}_{B u}
+
+This last one is the inverse term in the training-data dependent part of the PITC posterior covariance using only the training points in :math:`\cancel{B}`, which I think is what we want to combine with the dense posterior from the training points in :math:`B`:
+
+.. math::
+            (\sigma_*^2)^{\cancel{B} \cancel{B}} = K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \Kuu + \mathbf{K}_{u \cancel{B}} \Lam_{\cancel{B}}^{-1} \mathbf{K}_{\cancel{B} u} \right)^{-1} \mathbf{K}_{u*}
+
+which we can write in terms of only the full PITC prior and the prior for group :math:`B`:
+
+.. math::
+            (\sigma_*^2)^{\cancel{B} \cancel{B}} = K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \Kuu + \Kuf \Lam^{-1} \Kfu - \mathbf{K}_{u B} \Lam_{B}^{-1} \mathbf{K}_{B u} \right)^{-1} \mathbf{K}_{u*}
+
+If you set :math:`B = \emptyset`, removing the correction for the training points in block :math:`B` entirely, you have the original PITC posterior covariance based on the full set of training points :math:`f`, as expected.  Now we only have to compute :math:`\Kuf \Lam^{-1} \Kfu` (the term that scales with the full training set :math:`f`) once, and nothing that scales with :math:`|\cancel{B}|`.  This still involves an :math:`O(M^3)` decomposition for each group :math:`B`.  I have been assuming our group size :math:`|B|` is significantly smaller than the number of inducing points :math:`M`, so it's not great, but an interesting thing happens if we use the "downdate" form of Woodbury's lemma in the opposite direction to the usual way:
+
+.. math::
+            \left(A - B C^{-1} B^T \right)^{-1} &= A^{-1} + A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+
+Letting :math:`\mathbf{A} = \Kuu + \Kuf \Lam^{-1} \Kfu` (playing the part of :math:`A` in Woodbury's lemma):
+
+.. math::
+
+            (\sigma_*^2)^{\cancel{B} \cancel{B}} &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \mathbf{A}^{-1} + \mathbf{A}^{-1} \mathbf{K}_{u B} \left(\Lam_{B} - \mathbf{K}_{B u} \mathbf{A}^{-1} \mathbf{K}_{u B}\right)^{-1} \mathbf{K}_{B u} \mathbf{A}^{-1} \right) \mathbf{K}_{u*} \\
+            &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \mathbf{A}^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \mathbf{A}^{-1} \mathbf{K}_{u B} \left(\Lam_{B} - \mathbf{K}_{B u} \mathbf{A}^{-1} \mathbf{K}_{u B}\right)^{-1} \mathbf{K}_{B u} \mathbf{A}^{-1} \mathbf{K}_{u*} \\
+
+I find the use of Woodbury's lemma to downdate a bit uncomfortable -- does this work with real symmetric matrices?  But we would only need to decompose :math:`\mathbf{A}` (and also :math:`\Kuu`) at a cost of :math:`O(M^3)` once; the inversion to be calculated per group costs :math:`O(|B|^3)`.  I also don't immediately spot a slick QR decomposition way to do this per-group term; it's pretty symmetrical.  The only thing that jumps out is precomputing :math:`\mathbf{K}_{B u} \mathbf{A}^{-\frac{1}{2}}`.
+
+----------------------------------------------
+Derivation of Woodbury's Lemma for Differences
+----------------------------------------------
+
+This is specifically for symmetric matrices, to convince myself the downdate formula is legit.  We would like to compute a rank-:math:`k` downdate to the inverse of a matrix :math:`A`.  
+
+.. math::
+            \left(A - B C^{-1} B^T \right)^{-1} &= A^{-1} + A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+
+To be extra clear, let's say :math:`A \in \mathbb{R}^{n \times n}`, :math:`B \in \mathbb{R}^{n \times k}`, :math:`C \in \mathbb{R}^{k \times k}`.  We put the relevant matrices into a block matrix :math:`M \in \mathbb{R}^{m \times m}` for :math:`m = n + k`, and label the conforming blocks of its inverse:
+
+.. math::
+            M &= \begin{pmatrix} A & B \\ B^T & C\end{pmatrix} \\
+            M^{-1} &= \begin{pmatrix} W & X \\ X^T & Y\end{pmatrix} \\
+            M M^{-1} &= \begin{pmatrix} \mathbf{I} & \mathbf{0} \\ \mathbf{0} & \mathbf{I} \end{pmatrix} \\
+            M^{-1} M &= \begin{pmatrix} \mathbf{I} & \mathbf{0} \\ \mathbf{0} & \mathbf{I} \end{pmatrix}
+
+The blocks of :math:`M M^{-1}` yield the following equations:
+            
+.. math::
+            AW + BX^T &= \mathbf{I} \\
+            AX + BY &= \mathbf{0} \\
+            B^T W + CX^T &= \mathbf{0} \\
+            B^T X + CY &= \mathbf{I} \\
+
+Rearrange the middle two:
+
+.. math::
+            X &= -A^{-1} B Y \\
+            X^T &= -C^{-1} B^T W
+
+If we do the same for :math:`M^{-1} M`:
+
+.. math::
+            X &= -W B C^{-1} \\
+            X^T &= -Y B^T A^{-1}
+
+These blocks are equal:
+
+.. math::
+            C^{-1} B^T W &= Y B^T A^{-1} \\
+            A^{-1} B Y &= W B C^{-1} \\
+
+Now use the middle two equations from :math:`M M^{-1}` and plug them into the first and last equations irrespectively:
+
+.. math::
+            W - A^{-1} B C^{-1} B^T W &= A^{1} \\
+            \left(I - A^{-1} B C^{-1} B^T\right) W &= A^{-1} \\
+            -C^{-1} B^T A B Y + Y &= C^{-1} \\
+            \left(I - C^{-1} B^T A^{-1} B\right)Y &= C^{-1} \\
+
+Assuming :math:`\left(I - A^{-1} B C^{-1} B^T\right)` and :math:`\left(I - C^{-1} B^T A^{-1} B\right)` are invertible, rearrange:
+
+.. math::
+            W &= \left(I - A^{-1} B C^{-1} B^T\right)^{-1} A^{-1} \\
+            &= \left(A - B C^{-1} B^T\right)^{-1} \\
+            Y &= \left(I - C^{-1} B^T A^{-1} B\right)^{-1} C^{-1} \\
+            &= \left(C - B^T A^{-1} B\right)^{-1}
+
+Now use equality of the off-diagonal blocks from the two ways to multiply :math:`M` and :math:`M^{-1}` above (don't you wish Sphinx would number equations?) and substitute:
+
+.. math::
+            C^{-1} B^T \left(A - B C^{-1} B^T\right)^{-1} &= \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+            \left(A - B C^{-1} B^T\right)^{-1} B C^{-1} &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} \\
+
+Let's look just at the term :math:`\left(A - B C^{-1} B^T\right)` and right-multiply by :math:`-A^{-1}`:
+
+.. math::
+            \left(A - B C^{-1} B^T\right) \left(-A^{-1}\right) &= -\mathbf{I} + B C^{-1} B^T A^{-1} \\
+            \mathbf{I} + \left(A - B C^{-1} B^T\right) \left(-A^{-1}\right) &= B C^{-1} B^T A^{-1}
+
+Now let's return to the previous equation and right-multiply by :math:`B^T A^{-1}`:
+
+.. math::
+            \left(A - B C^{-1} B^T\right)^{-1} B C^{-1} B^T A^{-1} &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+
+Substitute the previous result for :math:`B C^{-1} B^T A^{-1}`:
+
+.. math::
+            \left(A - B C^{-1} B^T\right)^{-1} \left( \mathbf{I} + \left(A - B C^{-1} B^T\right) \left(-A^{-1}\right) \right) &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+            \left(A - B C^{-1} B^T\right)^{-1} - A^{-1} &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+            \left(A - B C^{-1} B^T \right)^{-1} &= A^{-1} + A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \blacksquare
+
+which is the difference form of Woodbury's lemma, or a rank-:math:`k` downdate of :math:`A^{-1}`.  The main assumption seems to be that :math:`\left(I - A^{-1} B C^{-1} B^T\right)` and :math:`\left(I - C^{-1} B^T A^{-1} B\right)` are invertible.
+
+------------------------
+Blockwise inversion of S
+------------------------
+
+I tried to invert :math:`\mathbf{S}` blockwise; it didn't work out but I'm leaving it in here just to look back at:
+
+.. math::
+            \newcommand{VV}{\mathbf{V}}
+            \mathbf{U} &= \mathbf{Q}_{* \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B} \VV_{B *} \\
+            &= \mathbf{Q}_{* \cancel{B}} \left( \mathbf{Q}_{\cancel{B} \cancel{B}}^{-1} \mathbf{Q}_{\cancel{B} B} \left(\mathbf{Q}_{B B} - \mathbf{Q}_{B \cancel{B}} \mathbf{Q}_{\cancel{B} \cancel{B}}^{-1} \mathbf{Q}_{\cancel{B} B}\right)^{-1} \right) \VV_{B *}
+            
+
+How are we going to avoid the :math:`O(|\cancel{B}|^3)` inversion for :math:`\mathbf{Q}_{\cancel{B} \cancel{B}}`?  Also we are going to get cross-terms with both :math:`\mathbf{Q}_{* \cancel{B}}` and :math:`\mathbf{Q}_{B B}` involved, but maybe they are OK because they are only :math:`O(|\cancel{B}| |B|)`?
\ No newline at end of file
diff --git a/doc/src/sparse-gp-details.rst b/doc/src/sparse-gp-details.rst
index ce5bab0a..92f94773 100644
--- a/doc/src/sparse-gp-details.rst
+++ b/doc/src/sparse-gp-details.rst
@@ -338,3 +338,144 @@ However, we found that while the posterior mean predictions were numerically sta
 
 You should be able to find this implementation and details using git history.
 
+--------------------
+PIC
+--------------------
+
+The PIC approximation is introduced in:
+
+   [5] Local and global sparse Gaussian process approximations
+   Edward Snelson and Zoubin Ghahramani
+   https://proceedings.mlr.press/v2/snelson07a/snelson07a.pdf
+
+without any details of the covariance calculation.
+   
+Mean
+^^^^
+
+.. math::
+            \newcommand{Lam}{\mathbf{\Lambda}}
+            \newcommand{Kuu}{\mathbf{K}_{uu}}
+            \newcommand{Kuf}{\mathbf{K}_{uf}}
+            \newcommand{Kfu}{\mathbf{K}_{fu}}
+            \newcommand{VV}{\mathbf{V}} 
+            \mu^{PIC} &= \tilde{\mathbf{K}}^{PIC}_{*f} \left[ \tilde{\mathbf{K}}^{PITC}_{ff} \right]^{-1} y \\
+            &= \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{Q}_{* B} + \mathbf{V}_{* B}\end{bmatrix} \begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1} \begin{bmatrix} y_{\cancel{B}} \\ y_{B} \end{bmatrix}
+
+Breaking down :math:`\mathbf{K}_{* B} = \mathbf{Q}_{* B} + \VV_{* B}` we can write the PIC mean as a correction to the PITC mean:
+
+.. math::
+           \mu^{PIC} &= \mu^{PITC} + \underbrace{\begin{bmatrix} \mathbf{0} & \VV_{* B} \end{bmatrix} \begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1} \begin{bmatrix} y_{\cancel{B}} \\ y_{B} \end{bmatrix} }_{\Delta \mu^{PIC}}\\
+
+The inverse of the PITC term can be had via Woodbury's lemma and the QR decomposition.
+
+.. math::
+           \Delta \mu^{PIC} &=\begin{bmatrix} \mathbf{0} & \VV_{* B} \end{bmatrix} \begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1} \begin{bmatrix} y_{\cancel{B}} \\ y_{B} \end{bmatrix} \\
+           &= \begin{bmatrix} \mathbf{0} & \VV_{* B} \end{bmatrix} \left(\Kfu \Kuu^{-1} \Kuf + \Lam\right)^{-1} y \\
+           &= \begin{bmatrix} \mathbf{0} & \VV_{* B} \end{bmatrix} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left(\Kuu + \Kuf \Lam^{-1} \Kfu\right)^{-1} \Kuf \Lam^{-1} \right) y
+
+Assume we have already computed the PITC approximation as described above.  We already have access to the information vector :math:`v = P R^{-1} Q^T_1 \Lam^{-\frac{1}{2}} y` by the QR decomposition of :math:`B = \begin{bmatrix} \Lam^{-\frac{1}{2}} \Kfu \\ L^T_{uu}\end{bmatrix}`.  We can mimic the solution of the PITC approximation, but since we are premultiplying by the block-diagonal matrix :math:`\Lam^{-1}`, we can split this into per-block components:
+
+.. math::
+           \Delta \mu^{PIC} &= \begin{bmatrix} \mathbf{0} & \VV_{* B} \end{bmatrix} \Lam^{-1} \left(y - \Kfu v\right) \\
+           &= \mathbf{V}_{* b} \Lam^{-1}_{B} \left(y_{B} - \mathbf{K}_{B u} v_{B} \right)
+ 
+Covariance
+^^^^^^^^^^
+
+The way I started partitioning the covariance term into blocks is as follows:
+
+.. math::
+
+            (\sigma_*^2)^{PIC} &= K_* - \tilde{\mathbf{K}}^{PIC}_{*f} \left[ \tilde{\mathbf{K}}^{PITC}_{ff} \right]^{-1} \tilde{\mathbf{K}}^{PIC}_{f*} + \sigma^2 \\
+            &= K_* - \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{K}_{* B} \end{bmatrix} \left(\mathbf{Q}_{ff} - \mathtt{blkdiag}(\mathbf{K}_{ff} - \mathbf{Q}_{ff})\right)^{-1} \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{K}_{B *} \end{bmatrix} \\
+            &= K_* - \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{K}_{* B} \end{bmatrix} \left(\mathbf{K}_{fu} \mathbf{K}_{uu}^{-1} \mathbf{K}_{uf} + \Lam \right)^{-1} \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{K}_{B *} \end{bmatrix} 
+
+The problem with doing this for PIC covariance (vs. PIC mean and PITC) is that we can't left-multiply the whole thing by :math:`\mathbf{K}_{uu}^{-1} \mathbf{K}_{uf}` (which in those instances leads to applying Woodbury's lemma to reduce the inverse to the size of the number of inducing points :math:`M`) because :math:`\mathbf{K}_{*B}` is not a low-rank approximation using the inducing points.  We can instead break up the inverse term into blocks:
+
+.. math::
+            (\sigma_*^2)^{PIC} &= K_* -
+            \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{K}_{* B}\end{bmatrix}
+            \begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1}
+            \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{K}_{B *}\end{bmatrix}
+
+If we substitute :math:`\mathbf{K}_{* B} = \mathbf{Q}_{* B} + \VV_{* B}` as with the mean, it doesn't work out nicely:
+            
+.. math::
+            (\sigma_*^2)^{PIC} &= K_* - 
+            \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{Q}_{* B} + \VV_{* B} \end{bmatrix}
+            \underbrace{\begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1}}_{\mathbf{S}^{-1}}
+            \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{Q}_{B *} + \VV_{B *}\end{bmatrix} \\
+            &= K_* - \mathbf{Q}_{**}^{PITC} - \underbrace{\mathbf{Q}_{* f} \mathbf{S}^{-1}_{f B} \VV_{B *}}_{\mathbf{U}} - \mathbf{U}^T - \mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}
+
+Now we have 3 correction terms to apply to the posterior PITC covariance.  The best thing I can think of is to apply Woodbury's lemma, but in the opposite direction to usual:
+
+.. math::
+            \mathbf{S}^{-1} &= \left(\Kfu \Kuu^{-1} \Kuf + \Lam\right)^{-1} \\
+            &= \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1}
+
+which involves decomposing the block-diagonal matrix :math:`\Lam` with blocks of size :math:`|B|` and a matrix the size :math:`M` of the inducing point set.  In practice after we precompute terms, we have a sequence of triangular factors that we can subset as needed to pick out :math:`B` and :math:`\cancel{B}`.  (Confusingly, one of these useful decompositions is the QR decomposition of the unrelated matrix :math:`B` in the PITC derivation above.)  
+
+.. math::
+            \mathbf{U} &= \mathbf{Q}_{* f} \mathbf{S}^{-1}_{f B} \VV_{B *} \\
+            &= \mathbf{Q}_{* f} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{f B} \VV_{B *}
+
+This looks appealingly like we could just keep combining instances of the QR decomposition of :math:`B`, but that would leave out the :math:`\Kuu` part.
+
+The open question is how to efficiently distill this into various correction terms for each group that don't require operations that scale with :math:`\cancel{B}`, since the paper promises :math:`O((|B| + M)^2)` for predictive covariances after precomputation.  In principle, using sparse vectors / matrices for the :math:`*` target components, combined with Eigen's expression templates, should bring the complexity of some of these repeated solves for mostly empty vectors (for :math:`B`) down to :math:`O(|B|)`, and likewise for inducing points.
+
+At this point, our cross-terms are preceded by :math:`Q_{* f}`, which expands to :math:`K_{*u} \Kuu^{-1} \Kuf`.  We can precompute everything except :math:`K_{*u}`, including the :math:`O(M^3)` inducing point decomposition, leaving prediction-time computations to scale with :math:`M`.
+
+To compute :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, we form a (mostly zero) column of :math:`\mathbf{V}` for each feature, break the two terms of :math:`\mathbf{S}^{-1}` into symmetric parts, multiply by :math:`\mathbf{V}` and subtract, here in excruciating notational detail:
+
+.. math::
+           \VV_{* B} \mathbf{S}^{-1} \VV_{B *} &= \VV_{* B} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{B B} \VV_{B *} \\
+           &= \VV_{* B}  \left( \left(P_\Lam L_\Lam^{-T} D_\Lam^{-\frac{1}{2}}\right) \underbrace{\left(D_\Lam^{-\frac{1}{2}} L_\Lam^{-1} P_\Lam^T\right)}_{Z_\Lam} - \mathbf{G}^T (B^T B)^{-1} \mathbf{G} \right) \VV_{B *} \\
+           &= \VV_{* B}  \left( \mathbf{Z}_\Lam^T \mathbf{Z}_\Lam - \mathbf{G}^T (P_u R_u^{-1} R_u^{-T} P_u^T) \mathbf{G} \right) \VV_{B *} \\
+           &= \VV_{* B}  \left( \mathbf{Z}_\Lam^T \mathbf{Z}_\Lam - \mathbf{G}^T (P_u R_u^{-1}) \underbrace{(R_u^{-T} P_u^T) \mathbf{G}}_{\mathbf{Z}_u} \right) \VV_{B *} \\
+           &= \VV_{* B}  \left( \mathbf{Z}_\Lam^T \mathbf{Z}_\Lam - \mathbf{Z}_u^T \mathbf{Z}_u \right) \VV_{B *} \\
+           &= \VV_{* B} \mathbf{Z}_\Lam^T \underbrace{\mathbf{Z}_\Lam \VV_{B *}}_{\mathbf{\xi}_\Lam} - \VV_{* B} \mathbf{Z}_u^T \underbrace{\mathbf{Z}_u \VV_{B *}}_{\mathbf{\xi}_u} \\
+           &= \mathbf{\xi}_\Lam^T \mathbf{\xi}_\Lam - \mathbf{\xi}_u^T \mathbf{\xi}_u \\
+
+Note that the left-hand (subscript :math:`\Lam`) term is the decomposition of a block-diagonal matrix, so it will only contain cross-terms between features corresponding to the same local block.  This calculation can be done blockwise.  The right-hand (subscript :math:`u`) term projects the features through the local block of the training dataset and then through the inducing points, so the cross-terms are not in general sparse, and this calculation involves a lot more careful indexing.
+
+The same breakdown of :math:`\mathbf{S}^{-1}` can be used to compute :math:`\mathbf{W}` during the fit step.
+
+The notation :math:`\mathbf{W}_B` indicates that the relevant columns of :math:`\mathbf{W}` are used on the right-hand side.  This is mathematically equivalent to making :math:`\VV_{B *}` have dimension :math:`N \times p` and be zero outside block :math:`B`.  Computationally the right factor must be a sparse object to preserve the desired asymptotics.
+
+Summing up
+^^^^^^^^^^
+
+For the covariance, we must precompute:
+
+ - :math:`P^TLDL^TP = \Lam`
+ - :math:`\mathbf{G} = \Lam^{-1} \Kfu`
+ - :math:`L_{uu} L_{uu}^T = \Kuu`
+ - :math:`QRP^T = B = \begin{bmatrix}\Lam^{-\frac{1}{2}}\Kfu \\ L_{uu} \end{bmatrix}` such that :math:`\mathbf{S}^{-1} = \Lam^{-1} - \mathbf{G} \left(B^T B\right)^{-1} \mathbf{G}^T`, and blocks :math:`\mathbf{S}^{-1}_{a b}` can be got by choosing the right rows / columns with which to do permutations and back-substitutions.
+ - :math:`\mathbf{W} = \Kuu^{-1} \mathbf{K}_{u f} \mathbf{S}^{-1}`
+
+For the mean, we compute the information vector :math:`v` as in PITC.   
+
+then for each group :math:`B`:
+
+ - :math:`\mathbf{Y}_B = \Kuu^{-1} \mathbf{K}_{u B}`
+ - :math:`v_b = \Lam_{B B}^{-1} \left( y_B - \mathbf{K}_{B u} v \right)`
+
+Given that we have already computed :math:`\Kfu`, we can use :math:`\mathbf{K}_{u B}` and :math:`\mathbf{K}_{u \cancel{B}}` efficiently in Eigen using sparse matrices with a single entry per nonzero row or column to be used.
+   
+Then at prediction time, we must compute:
+
+ - :math:`\mathbf{K}_{* B}`, :math:`O(|B|)`
+ - :math:`\mathbf{K}_{* u}`, :math:`O(M)`
+ - :math:`\mathbf{Q}_{* B} = \mathbf{K}_{* u} \mathbf{Y}_B`, :math:`O(M^2)` with the existing decomposition
+ - :math:`\VV_{* B} = \mathbf{K}_{* B} - \mathbf{Q}_{* B}`, :math:`O(|B|^2)`
+ - :math:`\mathbf{Q}_{**}^{PITC}` as with PITC
+ - :math:`\mathbf{U} = \mathbf{K}_{* u} \mathbf{W}_B \VV_{B *}`, :math:`O(M + |B|)`?
+ - :math:`\VV_{* B} \mathbf{S}^{-1}_{B B} \VV_{B *}`, :math:`O(|B|^2)`
+   
+Further notes
+^^^^^^^^^^^^^
+
+If asked to predict something that is uncorrelated with all training data (conditional on inducing points), we simply fall back to the PITC approximation.
+
+On some problems, calculation of the predictive covariance using Woodbury's lemma to avoid an :math:`O(N^3)` decomposition seems to lead to numerical problems.
\ No newline at end of file
diff --git a/include/albatross/SparseGP b/include/albatross/SparseGP
index 925bb506..ddcd2ffc 100644
--- a/include/albatross/SparseGP
+++ b/include/albatross/SparseGP
@@ -25,5 +25,6 @@
 #include <albatross/src/utils/eigen_utils.hpp>
 #include <albatross/src/models/sparse_common.hpp>
 #include <albatross/src/models/sparse_gp.hpp>
+#include <albatross/src/models/pic_gp.hpp>
 
 #endif
diff --git a/include/albatross/src/cereal/gp.hpp b/include/albatross/src/cereal/gp.hpp
index b3926397..9b421b83 100644
--- a/include/albatross/src/cereal/gp.hpp
+++ b/include/albatross/src/cereal/gp.hpp
@@ -17,7 +17,9 @@ using albatross::Fit;
 using albatross::GaussianProcessBase;
 using albatross::GPFit;
 using albatross::LinearCombination;
+
 using albatross::SparseGPFit;
+using albatross::PICGPFit;
 
 #ifndef GP_SERIALIZATION_VERSION
 #define GP_SERIALIZATION_VERSION 2
@@ -51,6 +53,28 @@ inline void serialize(Archive &archive, Fit<SparseGPFit<FeatureType>> &fit,
   }
 }
 
+template <typename Archive, typename GrouperFunction,
+          typename InducingFeatureType, typename FeatureType>
+inline void
+serialize(Archive &archive,
+          Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> &fit,
+          const std::uint32_t version) {
+  archive(cereal::make_nvp("train_features", fit.train_features));
+  archive(cereal::make_nvp("inducing_features", fit.inducing_features));
+  archive(cereal::make_nvp("train_covariance", fit.train_covariance));
+  archive(cereal::make_nvp("sigma_R", fit.sigma_R));
+  archive(cereal::make_nvp("P", fit.P));
+  archive(cereal::make_nvp("mean_w", fit.mean_w));
+  archive(cereal::make_nvp("W", fit.W));
+  archive(cereal::make_nvp("covariance_Y", fit.covariance_Y));
+  archive(cereal::make_nvp("Z", fit.Z));
+  archive(cereal::make_nvp("A_ldlt", fit.A_ldlt));
+  archive(cereal::make_nvp("measurement_groups", fit.measurement_groups));
+  archive(cereal::make_nvp("information", fit.information));
+  archive(cereal::make_nvp("numerical_rank", fit.numerical_rank));
+  archive(cereal::make_nvp("cols_Bs", fit.cols_Bs));
+}
+
 template <typename Archive, typename CovFunc, typename MeanFunc,
           typename ImplType>
 inline void save(Archive &archive,
diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
new file mode 100644
index 00000000..cd088341
--- /dev/null
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -0,0 +1,1054 @@
+/*
+ * Copyright (C) 2024 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef INCLUDE_ALBATROSS_MODELS_PIC_GP_H_
+#define INCLUDE_ALBATROSS_MODELS_PIC_GP_H_
+
+namespace albatross {
+
+template <typename CovarianceType, typename InducingFeatureType,
+          typename GrouperFunction>
+class BruteForcePIC
+    : public CovarianceFunction<
+          BruteForcePIC<CovarianceType, InducingFeatureType, GrouperFunction>> {
+public:
+  CovarianceType cov_;
+  std::vector<InducingFeatureType> inducing_points_;
+  GrouperFunction grouper_;
+  Eigen::LDLT<Eigen::MatrixXd> K_uu_ldlt_;
+
+  BruteForcePIC(const std::vector<InducingFeatureType> &inducing_points,
+                CovarianceType &&cov, GrouperFunction &&grouper)
+      : cov_{cov}, inducing_points_{inducing_points}, grouper_{grouper},
+        K_uu_ldlt_{cov_(inducing_points_, inducing_points_).ldlt()} {}
+
+  template <typename X, typename Y>
+  double _call_impl(const X &x, const Y &y) const {
+    if (grouper_(x) == grouper_(y)) {
+      return cov_(x, y);
+    }
+
+    Eigen::VectorXd K_xu(inducing_points_.size());
+    Eigen::VectorXd K_uy(inducing_points_.size());
+    for (Eigen::Index i = 0;
+         i < static_cast<Eigen::Index>(inducing_points_.size()); ++i) {
+      K_xu[i] = cov_(x, inducing_points_[i]);
+      K_uy[i] = cov_(inducing_points_[i], y);
+    }
+    // const Eigen::VectorXd K_uy = cov_(inducing_points_, y);
+    return K_xu.dot(K_uu_ldlt_.solve(K_uy));
+  }
+};
+
+template <typename CovarianceType, typename InducingFeatureType,
+          typename GrouperFunction>
+auto make_brute_force_pic_covariance(
+    const std::vector<InducingFeatureType> &inducing_points,
+    CovarianceType &&cov, GrouperFunction &&grouper) {
+  return BruteForcePIC<CovarianceType, InducingFeatureType, GrouperFunction>(
+      inducing_points, std::forward<CovarianceType>(cov),
+      std::forward<GrouperFunction>(grouper));
+}
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy, typename QRImplementation>
+class PICGaussianProcessRegression;
+
+template <typename GrouperFunction, typename InducingFeatureType,
+          typename FeatureType>
+struct PICGPFit {};
+
+template <typename GrouperFunction, typename FeatureType>
+using PICGroupIndexType =
+    typename std::result_of<GrouperFunction(const FeatureType &)>::type;
+
+template <typename FeatureType> struct PICGroup {
+  // Row indices are into the original set of features into the data
+  // used to form S.
+  Eigen::Index initial_row;
+  Eigen::Index block_size;
+  Eigen::Index block_index;
+  albatross::RegressionDataset<FeatureType> dataset;
+};
+
+template <typename GrouperFunction, typename FeatureType>
+using PICGroupMap = std::map<PICGroupIndexType<GrouperFunction, FeatureType>,
+                             PICGroup<FeatureType>>;
+
+template <typename GrouperFunction, typename InducingFeatureType,
+          typename FeatureType>
+struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
+  using PermutationIndices = Eigen::Matrix<Eigen::Index, Eigen::Dynamic, 1>;
+  using GroupIndexType = PICGroupIndexType<GrouperFunction, FeatureType>;
+  using GroupMap = PICGroupMap<GrouperFunction, FeatureType>;
+
+  std::vector<FeatureType> train_features;
+  std::vector<InducingFeatureType> inducing_features;
+  Eigen::SerializableLDLT train_covariance;
+  Eigen::MatrixXd sigma_R;
+  Eigen::PermutationMatrixX P;
+  std::vector<Eigen::VectorXd> mean_w;
+  Eigen::MatrixXd W;
+  std::vector<Eigen::MatrixXd> covariance_Y;
+  Eigen::MatrixXd Z;
+  BlockDiagonalLDLT A_ldlt;
+  GroupMap measurement_groups;
+  Eigen::VectorXd information;
+  Eigen::Index numerical_rank;
+  std::vector<Eigen::SparseMatrix<double>> cols_Bs;
+  GrouperFunction grouper;
+
+  Fit(){};
+
+  Fit(const std::vector<FeatureType> &features_,
+      const std::vector<InducingFeatureType> &inducing_features_,
+      const Eigen::SerializableLDLT &train_covariance_,
+      const Eigen::MatrixXd &sigma_R_, const Eigen::PermutationMatrixX &P_,
+      const std::vector<Eigen::VectorXd> &mean_w_, const Eigen::MatrixXd &W_,
+      const std::vector<Eigen::MatrixXd> &covariance_Y_,
+      const Eigen::MatrixXd &Z_, const BlockDiagonalLDLT &A_ldlt_,
+      const GroupMap &measurement_groups_, const Eigen::VectorXd &information_,
+      Eigen::Index numerical_rank_,
+      const std::vector<Eigen::SparseMatrix<double>> cols_Bs_,
+      GrouperFunction grouper_)
+      : train_features(features_), inducing_features(inducing_features_),
+        train_covariance(train_covariance_), sigma_R(sigma_R_), P(P_),
+        mean_w(mean_w_), W(W_), covariance_Y(covariance_Y_), Z(Z_),
+        A_ldlt(A_ldlt_), measurement_groups(measurement_groups_),
+        information(information_), numerical_rank(numerical_rank_),
+        cols_Bs(cols_Bs_), grouper(grouper_) {}
+
+  void shift_mean(const Eigen::VectorXd &mean_shift) {
+    ALBATROSS_ASSERT(mean_shift.size() == information.size());
+    information += train_covariance.solve(mean_shift);
+  }
+
+  bool operator==(
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>>
+          &other) const {
+    return (train_features == other.train_features &&
+            inducing_features == other.inducing_features &&
+            train_covariance == other.train_covariance &&
+            sigma_R == other.sigma_R && P.indices() == other.P.indices() &&
+            mean_w == other.mean_w && W == W &&
+            covariance_Y == other.covariance_Y && Z == other.Z &&
+            A_ldlt == other.A_ldlt &&
+            measurement_groups == other.measurement_groups &&
+            information == other.information &&
+            numerical_rank == other.numerical_rank && cols_Bs == other.cols_Bs);
+  }
+};
+
+/*
+ *  This class implements an approximation technique for Gaussian processes
+ * which relies on an assumption that all observations are independent (or
+ * groups of observations are independent) conditional on a set of inducing
+ * points.  The method is based off:
+ *
+ *     [1] Sparse Gaussian Processes using Pseudo-inputs
+ *     Edward Snelson, Zoubin Ghahramani
+ *     http://www.gatsby.ucl.ac.uk/~snelson/SPGP_up.pdf
+ *
+ *  Though the code uses notation closer to that used in this (excellent)
+ * overview of these methods:
+ *
+ *     [2] A Unifying View of Sparse Approximate Gaussian Process Regression
+ *     Joaquin Quinonero-Candela, Carl Edward Rasmussen
+ *     http://www.jmlr.org/papers/volume6/quinonero-candela05a/quinonero-candela05a.pdf
+ *
+ *  Very broadly speaking this method starts with a prior over the observations,
+ *
+ *     [f] ~ N(0, K_ff)
+ *
+ *  where K_ff(i, j) = covariance_function(features[i], features[j]) and f
+ * represents the function value.
+ *
+ *  It then uses a set of inducing points, u, and makes some assumptions about
+ * the conditional distribution:
+ *
+ *     [f|u] ~ N(K_fu K_uu^-1 u, K_ff - Q_ff)
+ *
+ *  Where Q_ff = K_fu K_uu^-1 K_uf represents the variance in f that is
+ * explained by u.
+ *
+ *  For FITC (Fully Independent Training Contitional) the assumption is that
+ * K_ff - Qff is diagonal, for PITC (Partially Independent Training Conditional)
+ * that it is block diagonal.  These assumptions lead to an efficient way of
+ * inferring the posterior distribution for some new location f*,
+ *
+ *     [f*|f=y] ~ N(K_*u S K_uf A^-1 y, K_** - Q_** + K_*u S K_u*)
+ *
+ *  Where S = (K_uu + K_uf A^-1 K_fu)^-1 and A = diag(K_ff - Q_ff) and "diag"
+ * may mean diagonal or block diagonal.  Regardless we end up with O(m^2n)
+ * complexity instead of O(n^3) of direct Gaussian processes.  (Note that in [2]
+ * S is called sigma and A is lambda.)
+ *
+ *  Of course, the implementation details end up somewhat more complex in order
+ * to improve numerical stability.  Here we use an approach based off the QR
+ * decomposition which is described in
+ *
+ *     Stable and Efficient Gaussian Process Calculations
+ *     http://www.jmlr.org/papers/volume10/foster09a/foster09a.pdf
+ *
+ * A more detailed (but more likely to be out of date) description of
+ * the details can be found on the albatross documentation.  A short
+ * description follows.  It starts by setting up the Sparse Gaussian process
+ * covariances
+ *
+ *   [f|u] ~ N(K_fu K_uu^-1 u, K_ff - Q_ff)
+ *
+ * We then set,
+ *
+ *   A = K_ff - Q_ff
+ *     = K_ff - K_fu K_uu^-1 K_uf
+ *
+ * which can be thought of as the covariance in the training data which
+ * is not be explained by the inducing points.  The fundamental
+ * assumption in these sparse Gaussian processes is that A is sparse, in
+ * this case block diagonal.
+ *
+ * We then build a matrix B and use its QR decomposition (with pivoting P)
+ *
+ *   B = |A^-1/2 K_fu| = |Q_1| R P^T
+ *       |K_uu^{T/2} |   |Q_2|
+ *
+ * After which we can get the information vector (see _fit_impl)
+ *
+ *   v = (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1 y
+ *     = (B^T B) B^T A^-1/2 y
+ *     = P R^-1 Q_1^T A^-1/2 y
+ *
+ * and can make predictions for new locations (see _predict_impl),
+ *
+ *   [f*|f=y] ~ N(K_*u S K_uf A^-1 y, K_** - Q_** + K_*u S K_u*)
+ *            ~ N(m, C)
+ *
+ *  where we have
+ *
+ *    m = K_*u S K_uf A^-1 y
+ *      = K_*u v
+ *
+ *  and
+ *
+ *    C = K_** - Q_** + K_*u S K_u*
+ *
+ *  using
+ *
+ *    Q_** = K_*u K_uu^-1 K_u*
+ *         = (K_uu^{-1/2}  K_u*)^T (K_uu^{-1/2}  K_u*)
+ *         = Q_sqrt^T Q_sqrt
+ *  and
+ *
+ *    K_*u S K_u* = K_*u (K_uu + K_uf A^-1 K_fu)^-1   K_u*
+ *                = K_*u (B^T B)^-1 K_u*
+ *                = K_*u (P R R^T P^T)^-1 K_u*
+ *                = (P R^-T K_u*)^T (P R^-T K_u*)
+ *                = S_sqrt^T S_sqrt
+ */
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy,
+          typename QRImplementation = DenseQRImplementation>
+class PICGaussianProcessRegression
+    : public GaussianProcessBase<CovFunc, MeanFunc,
+                                 PICGaussianProcessRegression<
+                                     CovFunc, MeanFunc, GrouperFunction,
+                                     InducingPointStrategy, QRImplementation>> {
+public:
+  using Base = GaussianProcessBase<
+      CovFunc, MeanFunc,
+      PICGaussianProcessRegression<CovFunc, MeanFunc, GrouperFunction,
+                                   InducingPointStrategy, QRImplementation>>;
+
+  template <typename FeatureType>
+  using GroupIndexType = PICGroupIndexType<GrouperFunction, FeatureType>;
+
+  template <typename FeatureType>
+  using GroupMap = PICGroupMap<GrouperFunction, FeatureType>;
+
+  PICGaussianProcessRegression() : Base() { initialize_params(); };
+
+  PICGaussianProcessRegression(const CovFunc &covariance_function,
+                               const MeanFunc &mean_function)
+      : Base(covariance_function, mean_function) {
+    initialize_params();
+  };
+  PICGaussianProcessRegression(CovFunc &&covariance_function,
+                               MeanFunc &&mean_function)
+      : Base(std::move(covariance_function), std::move(mean_function)) {
+    initialize_params();
+  };
+
+  PICGaussianProcessRegression(
+      const CovFunc &covariance_function, const MeanFunc &mean_function,
+      const GrouperFunction &independent_group_function,
+      const InducingPointStrategy &inducing_point_strategy,
+      const std::string &model_name)
+      : Base(covariance_function, mean_function, model_name),
+        inducing_point_strategy_(inducing_point_strategy),
+        independent_group_function_(independent_group_function) {
+    initialize_params();
+  };
+  PICGaussianProcessRegression(CovFunc &&covariance_function,
+                               MeanFunc &&mean_function,
+                               GrouperFunction &&independent_group_function,
+                               InducingPointStrategy &&inducing_point_strategy,
+                               const std::string &model_name)
+      : Base(std::move(covariance_function), std::move(mean_function),
+             model_name),
+        inducing_point_strategy_(std::move(inducing_point_strategy)),
+        independent_group_function_(std::move(independent_group_function)) {
+    initialize_params();
+  };
+
+  void initialize_params() {
+    measurement_nugget_ = {
+        details::DEFAULT_NUGGET,
+        LogScaleUniformPrior(PARAMETER_EPSILON, PARAMETER_MAX)};
+    inducing_nugget_ = {details::DEFAULT_NUGGET,
+                        LogScaleUniformPrior(PARAMETER_EPSILON, PARAMETER_MAX)};
+  }
+
+  ParameterStore get_params() const override {
+    auto params = map_join(this->mean_function_.get_params(),
+                           this->covariance_function_.get_params());
+    params[details::measurement_nugget_name()] = measurement_nugget_;
+    params[details::inducing_nugget_name()] = inducing_nugget_;
+    return params;
+  }
+
+  void set_param(const std::string &name, const Parameter &param) override {
+    if (name == details::measurement_nugget_name()) {
+      measurement_nugget_ = param;
+      return;
+    } else if (name == details::inducing_nugget_name()) {
+      inducing_nugget_ = param;
+      return;
+    }
+    const bool success = set_param_if_exists_in_any(
+        name, param, &this->covariance_function_, &this->mean_function_);
+    ALBATROSS_ASSERT(success);
+  }
+
+  template <typename NewGrouper = GrouperFunction,
+            typename PleaseDontChange = typename std::enable_if<
+                std::is_same<typename std::remove_reference<NewGrouper>::type,
+                             GrouperFunction>::value>::type>
+  void update_grouper_function(NewGrouper &&f) {
+    independent_group_function_ = std::forward<NewGrouper>(f);
+  }
+
+  template <typename NewStrategy = InducingPointStrategy,
+            typename PleaseDontChange = typename std::enable_if<
+                std::is_same<typename std::remove_reference<NewStrategy>::type,
+                             InducingPointStrategy>::value>::type>
+  void update_inducing_point_strategy(NewStrategy &&f) {
+    inducing_point_strategy_ = std::forward<NewStrategy>(f);
+  }
+
+  template <typename FeatureType, typename InducingPointFeatureType>
+  auto
+  _update_impl(const Fit<PICGPFit<GrouperFunction, InducingPointFeatureType,
+                                  FeatureType>> &old_fit,
+               const std::vector<FeatureType> &features,
+               const MarginalDistribution &targets) const {
+    assert(false && "Cannot call update() on a PIC GP yet!");
+
+    BlockDiagonalLDLT A_ldlt;
+    Eigen::SerializableLDLT K_uu_ldlt;
+    Eigen::MatrixXd K_fu;
+    Eigen::VectorXd y;
+    compute_internal_components(old_fit.train_features, features, targets,
+                                &A_ldlt, &K_uu_ldlt, &K_fu, &y);
+
+    const Eigen::Index n_old = old_fit.sigma_R.rows();
+    const Eigen::Index n_new = A_ldlt.rows();
+    const Eigen::Index k = old_fit.sigma_R.cols();
+    Eigen::MatrixXd B = Eigen::MatrixXd::Zero(n_old + n_new, k);
+
+    ALBATROSS_ASSERT(n_old == k);
+
+    // Form:
+    //   B = |R_old P_old^T| = |Q_1| R P^T
+    //       |A^{-1/2} K_fu|   |Q_2|
+    for (Eigen::Index i = 0; i < old_fit.permutation_indices.size(); ++i) {
+      const Eigen::Index &pi = old_fit.permutation_indices.coeff(i);
+      B.col(pi).topRows(i + 1) = old_fit.sigma_R.col(i).topRows(i + 1);
+    }
+    B.bottomRows(n_new) = A_ldlt.sqrt_solve(K_fu);
+    const auto B_qr = QRImplementation::compute(B, Base::threads_.get());
+
+    // Form:
+    //   y_aug = |R_old P_old^T v_old|
+    //           |A^{-1/2} y         |
+    ALBATROSS_ASSERT(old_fit.information.size() == n_old);
+    Eigen::VectorXd y_augmented(n_old + n_new);
+    for (Eigen::Index i = 0; i < old_fit.permutation_indices.size(); ++i) {
+      y_augmented[i] =
+          old_fit.information[old_fit.permutation_indices.coeff(i)];
+    }
+    y_augmented.topRows(n_old) =
+        old_fit.sigma_R.template triangularView<Eigen::Upper>() *
+        y_augmented.topRows(n_old);
+
+    y_augmented.bottomRows(n_new) = A_ldlt.sqrt_solve(y, Base::threads_.get());
+    const Eigen::VectorXd v = B_qr->solve(y_augmented);
+
+    Eigen::MatrixXd R = get_R(*B_qr);
+    if (B_qr->rank() < B_qr->cols()) {
+      // Inflate the diagonal of R in an attempt to avoid singularity
+      R.diagonal() +=
+          Eigen::VectorXd::Constant(B_qr->cols(), details::cSparseRNugget);
+    }
+    using FitType =
+        Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
+    return FitType(old_fit.train_features, old_fit.train_covariance, R,
+                   get_P(*B_qr), v, B_qr->rank());
+  }
+
+  // Here we create the QR decomposition of:
+  //
+  //   B = |A^-1/2 K_fu| = |Q_1| R P^T
+  //       |K_uu^{T/2} |   |Q_2|
+  //
+  // which corresponds to the inverse square root of Sigma
+  //
+  //   Sigma = (B^T B)^-1
+  std::unique_ptr<typename QRImplementation::QRType>
+  compute_sigma_qr(const Eigen::SerializableLDLT &K_uu_ldlt,
+                   const BlockDiagonalLDLT &A_ldlt,
+                   const Eigen::MatrixXd &K_fu) const {
+    Eigen::MatrixXd B(A_ldlt.rows() + K_uu_ldlt.rows(), K_uu_ldlt.rows());
+    B.topRows(A_ldlt.rows()) = A_ldlt.sqrt_solve(K_fu);
+    B.bottomRows(K_uu_ldlt.rows()) = K_uu_ldlt.sqrt_transpose();
+    return QRImplementation::compute(B, Base::threads_.get());
+  };
+
+  template <
+      typename FeatureType,
+      std::enable_if_t<
+          has_call_operator<CovFunc, FeatureType, FeatureType>::value, int> = 0>
+  auto _fit_impl(const std::vector<FeatureType> &features,
+                 const MarginalDistribution &targets) const {
+    // Determine the set of inducing points, u.
+    const auto u =
+        inducing_point_strategy_(this->covariance_function_, features);
+    ALBATROSS_ASSERT(u.size() > 0 && "Empty inducing points!");
+
+    BlockDiagonalLDLT A_ldlt;
+    Eigen::SerializableLDLT K_uu_ldlt;
+    Eigen::MatrixXd K_fu;
+    Eigen::VectorXd y;
+    GroupMap<FeatureType> measurement_groups;
+    compute_internal_components(u, features, targets, &A_ldlt, &K_uu_ldlt,
+                                &K_fu, &y, &measurement_groups);
+    auto B_qr = compute_sigma_qr(K_uu_ldlt, A_ldlt, K_fu);
+
+    // To make a prediction, we will need to compute cross-terms with
+    // sub-blocks of S^-1, where
+    //
+    // S^-1 = A^-1 - A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1
+    //               \____________________________________________/
+    //                                    Z^T Z
+    //
+    // The inverse part of the right-hand term of this is B_qr above,
+    // and we already have A_ldlt, so we can form Z.
+    //
+    // Now to compute sub-blocks of S^-1, we just index into Z^T Z
+    // appropriately, and if the term is diagonal, also solve using
+    // the blocks of A^-1.
+    const Eigen::MatrixXd Z = sqrt_solve(*B_qr, A_ldlt.solve(K_fu).transpose());
+
+    Eigen::VectorXd y_augmented = Eigen::VectorXd::Zero(B_qr->rows());
+    y_augmented.topRows(y.size()) = A_ldlt.sqrt_solve(y, Base::threads_.get());
+    const Eigen::VectorXd v = B_qr->solve(y_augmented);
+
+    using InducingPointFeatureType = typename std::decay<decltype(u[0])>::type;
+
+    const Eigen::MatrixXd W = K_uu_ldlt.solve(
+        (A_ldlt.solve(K_fu) - Z.transpose() * Z * K_fu).transpose());
+
+    // TODO(@peddie): a lot of this can be batched
+    std::vector<Eigen::MatrixXd> covariance_Y(measurement_groups.size());
+    std::vector<Eigen::VectorXd> mean_w(measurement_groups.size());
+    std::vector<Eigen::SparseMatrix<double>> cols_Bs(measurement_groups.size());
+    const auto block_start_indices = A_ldlt.block_to_row_map();
+
+    const auto precompute_block = [&, this](std::size_t block_number,
+                                            Eigen::Index start_row) -> void {
+      const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
+      // K_fu is already computed, so we can form K_uB and K_uA by
+      // appropriate use of sparse indexing matrices and avoid an O(N
+      // M) operation.
+      //
+      // This nonsense would be a lot more straightforward with Eigen
+      // 3.4's slicing and indexing API.
+      Eigen::SparseMatrix<double> cols_B(features.size(), block_size);
+      cols_B.reserve(block_size);
+
+      for (Eigen::Index i = 0; i < block_size; ++i) {
+        cols_B.insert(start_row + i, i) = 1.;
+      }
+      cols_B.makeCompressed();
+
+      cols_Bs[block_number] = cols_B;
+      // v_b \in R^b = A_BB^-1 (y_b - K_Bu v)
+      Eigen::MatrixXd ydiff_b = cols_B.transpose() * (y - K_fu * v);
+      const Eigen::MatrixXd mean_w_full =
+          A_ldlt.blocks[block_number].solve(ydiff_b);
+      mean_w[block_number] = mean_w_full;
+      // Y \in R^(u x b) = K_uu^-1 K_uB
+      covariance_Y[block_number] = K_uu_ldlt.solve(K_fu.transpose() * cols_B);
+    };
+
+    apply_map(block_start_indices, precompute_block, Base::threads_.get());
+
+    using FitType =
+        Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
+    return FitType(features, u, K_uu_ldlt, get_R(*B_qr), get_P(*B_qr), mean_w,
+                   W, covariance_Y, Z, A_ldlt, measurement_groups, v,
+                   B_qr->rank(), cols_Bs, independent_group_function_);
+  }
+
+  template <typename FeatureType>
+  auto fit_from_prediction(const std::vector<FeatureType> &new_inducing_points,
+                           const JointDistribution &prediction_) const {
+    FitModel<PICGaussianProcessRegression,
+             Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>>>
+        output(*this,
+               Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>>());
+    Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>> &new_fit =
+        output.get_fit();
+
+    new_fit.train_features = new_inducing_points;
+
+    const Eigen::MatrixXd K_zz =
+        this->covariance_function_(new_inducing_points, Base::threads_.get());
+    new_fit.train_covariance = Eigen::SerializableLDLT(K_zz);
+
+    // We're going to need to take the sqrt of the new covariance which
+    // could be extremely small, so here we add a small nugget to avoid
+    // numerical instability
+    JointDistribution prediction(prediction_);
+    prediction.covariance.diagonal() += Eigen::VectorXd::Constant(
+        cast::to_index(prediction.size()), 1, details::DEFAULT_NUGGET);
+    new_fit.information = new_fit.train_covariance.solve(prediction.mean);
+
+    // Here P is the posterior covariance at the new inducing points.  If
+    // we consider the case where we rebase and then use the resulting fit
+    // to predict the new inducing points then we see that the predictive
+    // covariance (see documentation above) would be,:
+    //
+    //    C = K_zz - Q_zz + K_zz Sigma K_zz
+    //
+    // We can use this, knowing that at the inducing points K_zz = Q_zz, to
+    // derive our updated Sigma,
+    //
+    //    C = K_zz - K_zz + K_zz Sigma K_zz
+    //    C  = K_zz Sigma K_zz
+    //    Sigma = K_zz^-1 C K_zz^-1
+    //
+    // And since we need to store Sigma in sqrt form we get,
+    //
+    //    Sigma = (B_z^T B_z)^-1
+    //          = K_zz^-1 C K_zz^-1
+    //
+    // So by setting:
+    //
+    //    B_z = C^{-1/2} K_z
+    //
+    // We can then compute and store the QR decomposition of B
+    // as we do in a normal fit.
+    const Eigen::SerializableLDLT C_ldlt(prediction.covariance);
+    const Eigen::MatrixXd sigma_inv_sqrt = C_ldlt.sqrt_solve(K_zz);
+    const auto B_qr = QRImplementation::compute(sigma_inv_sqrt, nullptr);
+
+    new_fit.P = get_P(*B_qr);
+    new_fit.sigma_R = get_R(*B_qr);
+    new_fit.numerical_rank = B_qr->rank();
+
+    return output;
+  }
+
+  // This is included to allow the SparseGP
+  // to be compatible with fits generated
+  // using a standard GP.
+  using Base::_predict_impl;
+
+  template <typename FeatureType, typename InducingFeatureType,
+            typename FitFeatureType>
+  Eigen::VectorXd _predict_impl(
+      const std::vector<FeatureType> &features,
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
+          &sparse_gp_fit,
+      PredictTypeIdentity<Eigen::VectorXd> &&) const {
+    const auto find_group = [this, &sparse_gp_fit](const auto &feature) {
+      return sparse_gp_fit.measurement_groups.find(
+          independent_group_function_(without_measurement(feature)));
+    };
+    const Eigen::MatrixXd K_up =
+        this->covariance_function_(sparse_gp_fit.inducing_features, features);
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      const auto group = find_group(features[j]);
+      if (group == sparse_gp_fit.measurement_groups.end()) {
+        continue;
+      }
+      const std::vector<FeatureType> fvec = {features[j]};
+
+      const Eigen::VectorXd features_cov =
+          this->covariance_function_(group->second.dataset.features, fvec);
+      const Eigen::VectorXd kpuy =
+          K_up.transpose().row(j) *
+          sparse_gp_fit.covariance_Y[group->second.block_index];
+      const Eigen::VectorXd Vbp = features_cov - kpuy;
+      mean_correction[j] =
+          Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
+    }
+
+    Eigen::VectorXd mean =
+        K_up.transpose() * sparse_gp_fit.information + mean_correction;
+
+    this->mean_function_.add_to(features, &mean);
+    return mean;
+  }
+
+  template <typename FeatureType, typename InducingFeatureType,
+            typename FitFeatureType>
+  MarginalDistribution _predict_impl(
+      const std::vector<FeatureType> &features,
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
+          &sparse_gp_fit,
+      PredictTypeIdentity<MarginalDistribution> &&) const {
+    const auto K_up =
+        this->covariance_function_(sparse_gp_fit.inducing_features, features);
+    Eigen::VectorXd mean = gp_mean_prediction(K_up, sparse_gp_fit.information);
+    this->mean_function_.add_to(features, &mean);
+
+    // First pass: compute an O(1) mapping from features to groups
+    std::vector<std::size_t> feature_to_block(features.size());
+    std::vector<decltype(sparse_gp_fit.measurement_groups.begin())> groups(
+        features.size());
+    Eigen::VectorXi col_alloc{Eigen::VectorXi::Zero(features.size())};
+    bool all_same_group = true;
+    bool all_in_groups = true;
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      groups[j] = sparse_gp_fit.measurement_groups.find(
+          independent_group_function_(without_measurement(features[j])));
+      if (groups[j] != sparse_gp_fit.measurement_groups.end()) {
+        col_alloc(j) = groups[j]->second.block_size;
+        all_same_group = all_same_group && groups[j] == groups[0];
+      } else {
+        all_in_groups = false;
+      }
+      feature_to_block[j] =
+          std::distance(sparse_gp_fit.measurement_groups.begin(), groups[j]);
+    }
+
+    // Second pass: compute mean vector and fill sparse Vp matrix
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    Eigen::SparseMatrix<double> Vp(sparse_gp_fit.train_features.size(),
+                                   features.size());
+    Vp.reserve(col_alloc);
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      if (groups[j] == sparse_gp_fit.measurement_groups.end()) {
+        continue;
+      }
+
+      const auto &B = groups[j]->second;
+      Eigen::VectorXd Vbp =
+          this->covariance_function_(B.dataset.features,
+                                     std::vector<FeatureType>{features[j]}) -
+          K_up.col(j).transpose() * sparse_gp_fit.covariance_Y[B.block_index];
+      for (Eigen::Index i = 0; i < Vbp.size(); ++i) {
+        Vp.insert(B.initial_row + i, j) = Vbp(i);
+      }
+      mean_correction[j] = Vbp.dot(sparse_gp_fit.mean_w[B.block_index]);
+    }
+    Vp.makeCompressed();
+
+    Eigen::MatrixXd xi_lambda = sparse_gp_fit.A_ldlt.sqrt_solve(Vp);
+    Eigen::MatrixXd xi_u = sparse_gp_fit.Z * Vp;
+    Eigen::VectorXd VSV_diag{
+        (xi_lambda.transpose() * xi_lambda - xi_u.transpose() * xi_u)
+            .diagonal()};
+
+    const Eigen::VectorXd U_diag = (K_up.transpose() * sparse_gp_fit.W * Vp).diagonal();
+
+    Eigen::VectorXd marginal_variance(cast::to_index(features.size()));
+    for (Eigen::Index i = 0; i < marginal_variance.size(); ++i) {
+      marginal_variance[i] = this->covariance_function_(
+          features[cast::to_size(i)], features[cast::to_size(i)]);
+    }
+
+    const Eigen::MatrixXd Q_sqrt =
+        sparse_gp_fit.train_covariance.sqrt_solve(K_up);
+    const Eigen::VectorXd Q_diag =
+        Q_sqrt.cwiseProduct(Q_sqrt).array().colwise().sum();
+    marginal_variance -= Q_diag;
+
+    const Eigen::MatrixXd S_sqrt =
+        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P, K_up);
+    const Eigen::VectorXd S_diag =
+        S_sqrt.cwiseProduct(S_sqrt).array().colwise().sum();
+    marginal_variance += S_diag;
+
+    mean += mean_correction;
+
+    return MarginalDistribution(mean,
+                                marginal_variance - (2 * U_diag + VSV_diag));
+  }
+
+  template <typename FeatureType, typename InducingFeatureType,
+            typename FitFeatureType>
+  JointDistribution _predict_impl(
+      const std::vector<FeatureType> &features,
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
+          &sparse_gp_fit,
+      PredictTypeIdentity<JointDistribution> &&) const {
+    // K_up
+    const Eigen::MatrixXd K_up =
+        this->covariance_function_(sparse_gp_fit.inducing_features, features);
+    const Eigen::MatrixXd prior_cov = this->covariance_function_(features);
+
+    // First pass: compute an O(1) mapping from features to groups
+    std::vector<std::size_t> feature_to_block(features.size());
+    std::vector<decltype(sparse_gp_fit.measurement_groups.begin())> groups(
+        features.size());
+    Eigen::VectorXi col_alloc{Eigen::VectorXi::Zero(features.size())};
+    bool all_same_group = true;
+    bool all_in_groups = true;
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      groups[j] = sparse_gp_fit.measurement_groups.find(
+          independent_group_function_(without_measurement(features[j])));
+      if (groups[j] != sparse_gp_fit.measurement_groups.end()) {
+        col_alloc(j) = groups[j]->second.block_size;
+        all_same_group = all_same_group && groups[j] == groups[0];
+      } else {
+        all_in_groups = false;
+      }
+      feature_to_block[j] =
+          std::distance(sparse_gp_fit.measurement_groups.begin(), groups[j]);
+    }
+
+    // Second pass: compute mean vector and fill sparse Vp matrix
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    Eigen::SparseMatrix<double> Vp(sparse_gp_fit.train_features.size(),
+                                   features.size());
+    Vp.reserve(col_alloc);
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      if (groups[j] == sparse_gp_fit.measurement_groups.end()) {
+        // Fall back to PITC
+        continue;
+      }
+
+      const auto &B = groups[j]->second;
+      // TODO(@peddie): there must be some way to assign this
+      // expression directly to a map of the column memory
+      Eigen::VectorXd Vbp =
+          this->covariance_function_(B.dataset.features,
+                                     std::vector<FeatureType>{features[j]}) -
+          K_up.col(j).transpose() * sparse_gp_fit.covariance_Y[B.block_index];
+      for (Eigen::Index i = 0; i < Vbp.size(); ++i) {
+        Vp.insert(B.initial_row + i, j) = Vbp(i);
+      }
+      mean_correction[j] = Vbp.dot(sparse_gp_fit.mean_w[B.block_index]);
+    }
+    Vp.makeCompressed();
+
+
+    Eigen::MatrixXd xi_lambda = sparse_gp_fit.A_ldlt.sqrt_solve(Vp);
+    Eigen::MatrixXd xi_u = sparse_gp_fit.Z * Vp;
+    Eigen::MatrixXd VSV{xi_lambda.transpose() * xi_lambda -
+                        xi_u.transpose() * xi_u};
+
+    const Eigen::MatrixXd U = K_up.transpose() * sparse_gp_fit.W * Vp;
+
+    const Eigen::MatrixXd S_sqrt =
+        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P, K_up);
+
+    const Eigen::MatrixXd Q_sqrt =
+        sparse_gp_fit.train_covariance.sqrt_solve(K_up);
+
+    const Eigen::MatrixXd max_explained = Q_sqrt.transpose() * Q_sqrt;
+    const Eigen::MatrixXd unexplained = S_sqrt.transpose() * S_sqrt;
+
+    const Eigen::MatrixXd pitc_covariance =
+        prior_cov - max_explained + unexplained;
+
+    const Eigen::MatrixXd pic_correction = U + U.transpose() + VSV;
+    JointDistribution pred(K_up.transpose() * sparse_gp_fit.information +
+                               mean_correction,
+                           pitc_covariance - pic_correction);
+    this->mean_function_.add_to(features, &pred.mean);
+
+    return pred;
+  }
+
+  template <typename FeatureType>
+  double log_likelihood(const RegressionDataset<FeatureType> &dataset) const {
+    const auto u =
+        inducing_point_strategy_(this->covariance_function_, dataset.features);
+
+    BlockDiagonalLDLT A_ldlt;
+    Eigen::SerializableLDLT K_uu_ldlt;
+    Eigen::MatrixXd K_fu;
+    Eigen::VectorXd y;
+    compute_internal_components(u, dataset.features, dataset.targets, &A_ldlt,
+                                &K_uu_ldlt, &K_fu, &y);
+    const auto B_qr = compute_sigma_qr(K_uu_ldlt, A_ldlt, K_fu);
+    // The log likelihood for y ~ N(0, K) is:
+    //
+    //   L = 1/2 (n log(2 pi) + log(|K|) + y^T K^-1 y)
+    //
+    // where in our case we have
+    //   K = A + Q_ff
+    // and
+    //   Q_ff = K_fu K_uu^-1 K_uf
+    //
+    // First we get the determinant, |K|:
+    //
+    //   |K| = |A + Q_ff|
+    //       = |K_uu + K_uf A^-1 K_fu| |K_uu^-1| |A|
+    //       = |P R^T Q^T Q R P^T| |K_uu^-1| |A|
+    //       = |R^T R| |K_uu^-1| |A|
+    //       = |R|^2 |A| / |K_uu|
+    //
+    // Where the first equality comes from the matrix determinant lemma.
+    // https://en.wikipedia.org/wiki/Matrix_determinant_lemma#Generalization
+    //
+    // After which we can take the log:
+    //
+    //   log(|K|) = 2 log(|R|) + log(|A|) - log(|K_uu|)
+    //
+    const double log_det_a = A_ldlt.log_determinant();
+
+    const double log_det_r =
+        B_qr->matrixR().diagonal().array().cwiseAbs().log().sum();
+    const double log_det_K_uu = K_uu_ldlt.log_determinant();
+    const double log_det = log_det_a + 2 * log_det_r - log_det_K_uu;
+
+    // q = y^T K^-1 y
+    //   = y^T (A + Q_ff)^-1 y
+    //   = y^T (A^-1 - A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1) y
+    //   = y^T A^-1 y - y^T A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1) y
+    //   = y^T A^-1 y - y^T A^-1 K_fu (R^T R)^-1 K_uf A^-1) y
+    //   = y^T A^-1 y - (R^-T K_uf A^-1 y)^T (R^-T K_uf A^-1 y)
+    //   = y^T y_a - y_b^T y_b
+    //
+    // with y_b = R^-T K_uf y_a
+    const Eigen::VectorXd y_a = A_ldlt.solve(y);
+
+    Eigen::VectorXd y_b = K_fu.transpose() * y_a;
+    y_b = sqrt_solve(*B_qr, y_b);
+
+    double log_quadratic = y.transpose() * y_a;
+    log_quadratic -= y_b.transpose() * y_b;
+
+    const double rank = cast::to_double(y.size());
+    const double log_dimension = rank * log(2 * M_PI);
+
+    return -0.5 * (log_det + log_quadratic + log_dimension) +
+           this->prior_log_likelihood();
+  }
+
+  InducingPointStrategy get_inducing_point_strategy() const {
+    return inducing_point_strategy_;
+  }
+
+private:
+  // This method takes care of a lot of the common book keeping required to
+  // setup the Sparse Gaussian Process problem.  Namely, we want to get from
+  // possibly unordered features to a structured representation
+  // in the form K_ff = A_ff + Q_ff where Q_ff = K_fu K_uu^-1 K_uf and
+  // A_ff is block diagonal and is formed by subtracting Q_ff from K_ff.
+  //
+  template <typename InducingFeatureType, typename FeatureType>
+  void compute_internal_components(
+      const std::vector<InducingFeatureType> &inducing_features,
+      const std::vector<FeatureType> &out_of_order_features,
+      const MarginalDistribution &out_of_order_targets,
+      BlockDiagonalLDLT *A_ldlt, Eigen::SerializableLDLT *K_uu_ldlt,
+      Eigen::MatrixXd *K_fu, Eigen::VectorXd *y,
+      GroupMap<FeatureType> *measurement_groups) const {
+
+    ALBATROSS_ASSERT(A_ldlt != nullptr);
+    ALBATROSS_ASSERT(K_uu_ldlt != nullptr);
+    ALBATROSS_ASSERT(K_fu != nullptr);
+    ALBATROSS_ASSERT(y != nullptr);
+
+    const auto indexer =
+        group_by(out_of_order_features, independent_group_function_).indexers();
+
+    const auto out_of_order_measurement_features =
+        as_measurements(out_of_order_features);
+
+    std::vector<std::size_t> reordered_inds;
+    BlockDiagonal K_ff;
+    Eigen::Index begin_row_index = 0;
+    Eigen::Index block_index = 0;
+    // TODO(@peddie): compute these blocks asynchronously?
+    for (const auto &pair : indexer) {
+      reordered_inds.insert(reordered_inds.end(), pair.second.begin(),
+                            pair.second.end());
+      measurement_groups->operator[](pair.first) = PICGroup<FeatureType>{
+          begin_row_index, static_cast<Eigen::Index>(pair.second.size()),
+          block_index,
+          albatross::RegressionDataset<FeatureType>(
+              subset(out_of_order_features, pair.second),
+              out_of_order_targets.subset(pair.second))};
+      auto subset_features =
+          subset(out_of_order_measurement_features, pair.second);
+      K_ff.blocks.emplace_back(this->covariance_function_(subset_features));
+      K_ff.blocks.back().diagonal() +=
+          subset(out_of_order_targets.covariance.diagonal(), pair.second);
+      begin_row_index += pair.second.size();
+      ++block_index;
+    }
+
+    const auto features =
+        subset(out_of_order_measurement_features, reordered_inds);
+    auto targets = subset(out_of_order_targets, reordered_inds);
+    *y = targets.mean;
+
+    this->mean_function_.remove_from(
+        subset(out_of_order_features, reordered_inds), &targets.mean);
+
+    *K_fu = this->covariance_function_(features, inducing_features,
+                                       Base::threads_.get());
+
+    auto K_uu =
+        this->covariance_function_(inducing_features, Base::threads_.get());
+    K_uu.diagonal() +=
+        inducing_nugget_.value * Eigen::VectorXd::Ones(K_uu.rows());
+
+    *K_uu_ldlt = K_uu.ldlt();
+    // P is such that:
+    //     Q_ff = K_fu K_uu^-1 K_uf
+    //          = K_fu K_uu^-T/2 K_uu^-1/2 K_uf
+    //          = P^T P
+    const Eigen::MatrixXd P = K_uu_ldlt->sqrt_solve(K_fu->transpose());
+
+    // We only need the diagonal blocks of Q_ff to get A
+    BlockDiagonal Q_ff_diag;
+    Eigen::Index i = 0;
+    for (const auto &pair : indexer) {
+      Eigen::Index cols = cast::to_index(pair.second.size());
+      auto P_cols = P.block(0, i, P.rows(), cols);
+      Q_ff_diag.blocks.emplace_back(P_cols.transpose() * P_cols);
+      i += cols;
+    }
+    auto A = K_ff - Q_ff_diag;
+
+    // It's possible that the inducing points will perfectly describe
+    // some of the data, in which case we need to add a bit of extra
+    // noise to make sure lambda is invertible.
+    for (auto &b : A.blocks) {
+      b.diagonal() +=
+          measurement_nugget_.value * Eigen::VectorXd::Ones(b.rows());
+    }
+
+    *A_ldlt = A.ldlt();
+  }
+
+  Parameter measurement_nugget_;
+  Parameter inducing_nugget_;
+  InducingPointStrategy inducing_point_strategy_;
+  GrouperFunction independent_group_function_;
+};
+
+// rebase_inducing_points takes a Sparse GP which was fit using some set of
+// inducing points and creates a new fit relative to new inducing points.
+// Note that this will NOT be the equivalent to having fit the model with
+// the new inducing points since some information may have been lost in
+// the process.
+template <typename ModelType, typename GrouperFunction, typename FeatureType,
+          typename NewFeatureType>
+auto rebase_inducing_points(
+    const FitModel<ModelType,
+                   Fit<PICGPFit<GrouperFunction, NewFeatureType, FeatureType>>>
+        &fit_model,
+    const std::vector<NewFeatureType> &new_inducing_points) {
+  return fit_model.get_model().fit_from_prediction(
+      new_inducing_points, fit_model.predict(new_inducing_points).joint());
+}
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy>
+using SparseQRPicGaussianProcessRegression =
+    PICGaussianProcessRegression<CovFunc, GrouperFunction,
+                                 InducingPointStrategy, SPQRImplementation>;
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance_and_mean(
+    CovFunc &&covariance_function, MeanFunc &&mean_function,
+    GrouperFunction &&grouper_function, InducingPointStrategy &&strategy,
+    const std::string &model_name,
+    QRImplementation qr __attribute__((unused)) = DenseQRImplementation{}) {
+  return PICGaussianProcessRegression<
+      typename std::decay<CovFunc>::type, typename std::decay<MeanFunc>::type,
+      typename std::decay<GrouperFunction>::type,
+      typename std::decay<InducingPointStrategy>::type,
+      typename std::decay<QRImplementation>::type>(
+      std::forward<CovFunc>(covariance_function),
+      std::forward<MeanFunc>(mean_function),
+      std::forward<GrouperFunction>(grouper_function),
+      std::forward<InducingPointStrategy>(strategy), model_name);
+};
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance_and_mean(
+    CovFunc &&covariance_function, MeanFunc &&mean_function,
+    GrouperFunction &&grouper_function, const std::string &model_name,
+    QRImplementation qr = DenseQRImplementation{}) {
+  return pic_gp_from_covariance_and_mean(
+      std::forward<CovFunc>(covariance_function),
+      std::forward<MeanFunc>(mean_function),
+      std::forward<GrouperFunction>(grouper_function),
+      StateSpaceInducingPointStrategy(), model_name, qr);
+};
+
+template <typename CovFunc, typename GrouperFunction,
+          typename InducingPointStrategy,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance(CovFunc &&covariance_function,
+                            GrouperFunction &&grouper_function,
+                            InducingPointStrategy &&strategy,
+                            const std::string &model_name,
+                            QRImplementation qr = DenseQRImplementation{}) {
+  return pic_gp_from_covariance_and_mean<CovFunc, decltype(ZeroMean()),
+                                         GrouperFunction, InducingPointStrategy,
+                                         QRImplementation>(
+      std::forward<CovFunc>(covariance_function), ZeroMean(),
+      std::forward<GrouperFunction>(grouper_function),
+      std::forward<InducingPointStrategy>(strategy), model_name, qr);
+};
+
+template <typename CovFunc, typename GrouperFunction,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance(CovFunc covariance_function,
+                            GrouperFunction grouper_function,
+                            const std::string &model_name,
+                            QRImplementation qr = DenseQRImplementation{}) {
+  return pic_gp_from_covariance_and_mean<
+      CovFunc, decltype(ZeroMean()), GrouperFunction,
+      decltype(StateSpaceInducingPointStrategy()), QRImplementation>(
+      std::forward<CovFunc>(covariance_function), ZeroMean(),
+      std::forward<GrouperFunction>(grouper_function),
+      StateSpaceInducingPointStrategy(), model_name, qr);
+};
+
+} // namespace albatross
+
+#endif /* INCLUDE_ALBATROSS_MODELS_PIC_GP_H_ */
diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
new file mode 100644
index 00000000..96b8c1f9
--- /dev/null
+++ b/tests/test_pic_gp.cc
@@ -0,0 +1,404 @@
+/*
+ * Copyright (C) 2024 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <fstream>
+#include <gtest/gtest.h>
+
+// #include "albatross/src/eigen/serializable_ldlt.hpp"
+// #include "albatross/src/linalg/block_diagonal.hpp"
+#include "test_models.h"
+#include <albatross/Common>
+#include <chrono>
+
+#include <albatross/SparseGP>
+#include <random>
+
+namespace albatross {
+
+struct LeaveOneIntervalOut {
+
+  LeaveOneIntervalOut(){};
+  explicit LeaveOneIntervalOut(double group_domain_size_)
+      : group_domain_size(group_domain_size_){};
+
+  int operator()(const double &f) const {
+    return static_cast<int>(floor(f / group_domain_size));
+  }
+
+  int operator()(const Measurement<double> &f) const {
+    return static_cast<int>(floor(f.value / group_domain_size));
+  }
+
+  double group_domain_size = 5.;
+};
+
+TEST(TestPicGP, TestPredictionExists) {
+  const auto dataset = make_toy_linear_data();
+  UniformlySpacedInducingPoints strategy(8);
+  LeaveOneIntervalOut grouper;
+  const auto pic =
+      pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                             strategy, "pic", DenseQRImplementation{});
+
+  const auto pic_fit = pic.fit(dataset);
+
+  const auto test_features = linspace(0.1, 9.9, 11);
+
+  const auto pic_pred =
+      pic_fit.predict_with_measurement_noise(test_features).joint();
+
+  EXPECT_GT(pic_pred.mean.size(), 0);
+}
+
+
+TEST(TestPicGP, ScalarEquivalence) {
+  static constexpr std::size_t kNumTrainPoints = 3;
+  static constexpr std::size_t kNumTestPoints = 1;
+  static constexpr std::size_t kNumInducingPoints = 3;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  // Set the grouper so that all the simple test data falls within one
+  // group.
+  LeaveOneIntervalOut grouper(10);
+  auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+
+  auto direct_fit = direct.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto direct_pred =
+      direct_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  EXPECT_LT(distance::wasserstein_2(direct_pred, pic_pred), 1e-12);
+}
+
+TEST(TestPicGP, SingleBlockEquivalence) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 4;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  // Set the grouper so that all the simple test data falls within one
+  // group.
+  LeaveOneIntervalOut grouper(10);
+  auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+
+  auto direct_fit = direct.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto direct_pred =
+      direct_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  EXPECT_LT(distance::wasserstein_2(direct_pred, pic_pred), 1e-10);
+}
+
+template <typename CovarianceType, typename FeatureType,
+          typename InducingStrategy, typename GrouperFunction>
+JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
+                           const std::vector<FeatureType> &predict,
+                           CovarianceType &&cov, InducingStrategy &&inducing,
+                           GrouperFunction &&grouper) {
+  const auto u = inducing(cov, data.features);
+
+  auto bfp_cov = make_brute_force_pic_covariance(inducing(cov, data.features),
+                                                 cov, grouper);
+
+  BlockDiagonal K_ff;
+  const auto indexers = group_by(data.features, grouper).indexers();
+  std::vector<std::size_t> reordered_inds;
+  for (const auto &pair : indexers) {
+    reordered_inds.insert(reordered_inds.end(), pair.second.begin(),
+                          pair.second.end());
+    K_ff.blocks.emplace_back(cov(subset(data.features, pair.second)));
+    K_ff.blocks.back().diagonal() +=
+        subset(data.targets.covariance.diagonal(), pair.second);
+  }
+  const std::vector<FeatureType> ordered_features =
+      subset(data.features, reordered_inds);
+
+  Eigen::MatrixXd K_uu = cov(u, u);
+  K_uu.diagonal() += Eigen::VectorXd::Constant(K_uu.rows(), 1e-8);
+  const Eigen::MatrixXd K_fu = cov(ordered_features, u);
+
+  const Eigen::SerializableLDLT K_uu_ldlt = K_uu.ldlt();
+  const Eigen::MatrixXd P = K_uu_ldlt.sqrt_solve(K_fu.transpose());
+  const Eigen::MatrixXd Q_ff = P.transpose() * P;
+  BlockDiagonal Q_ff_diag;
+  Eigen::Index i = 0;
+  for (const auto &pair : indexers) {
+    const Eigen::Index cols = cast::to_index(pair.second.size());
+    auto P_cols = P.block(0, i, P.rows(), cols);
+    Q_ff_diag.blocks.emplace_back(P_cols.transpose() * P_cols);
+    i += cols;
+  }
+  // auto Lambda = K_ff - Q_ff_diag;
+
+  Eigen::MatrixXd Q_ff_lambda = Q_ff;
+  Eigen::Index k = 0;
+  for (const auto &block : K_ff.blocks) {
+    Q_ff_lambda.block(k, k, block.rows(), block.rows()) = block;
+    Q_ff_lambda.block(k, k, block.rows(), block.rows()).diagonal() +=
+        Eigen::VectorXd::Constant(block.rows(), 1e-2);
+    k += block.rows();
+  }
+
+  const auto print_matrix = [](const Eigen::MatrixXd &m, std::string &&label) {
+    std::cout << label << " (" << m.rows() << "x" << m.cols() << "):\n"
+              << m.format(Eigen::FullPrecision) << std::endl;
+  };
+
+  const auto print_vector = [](const Eigen::VectorXd &v, std::string &&label) {
+    std::cout << label << " (" << v.size()
+              << "): " << v.transpose().format(Eigen::FullPrecision)
+              << std::endl;
+  };
+
+  print_matrix(Q_ff_lambda, "Q_ff + Lambda");
+  // const BlockDiagonalLDLT Lambda_ldlt = Lambda.ldlt();
+
+  print_matrix(bfp_cov(ordered_features), "BFP: Q_ff + Lambda");
+
+  print_matrix(Q_ff_lambda - bfp_cov(ordered_features),
+               "Q_ff_lambda difference");
+
+  const Eigen::SerializableLDLT S = Q_ff_lambda.ldlt();
+
+  print_matrix(S.matrixL(), "PITC L");
+  print_vector(S.vectorD(), "PITC D");
+
+  Eigen::MatrixXd K_PIC(ordered_features.size(), predict.size());
+  Eigen::MatrixXd K_PITC(ordered_features.size(), predict.size());
+
+  Eigen::MatrixXd V_PIC =
+      Eigen::MatrixXd::Zero(ordered_features.size(), predict.size());
+
+  Eigen::MatrixXd Y = K_uu_ldlt.solve(K_fu.transpose());
+  print_matrix(Y, "Y");
+
+  Eigen::MatrixXd W = K_uu_ldlt.solve(S.solve(K_fu).transpose());
+  print_matrix(W, "W");
+
+  for (Eigen::Index f = 0; f < cast::to_index(ordered_features.size()); ++f) {
+    for (Eigen::Index p = 0; p < cast::to_index(predict.size()); ++p) {
+      const std::vector<FeatureType> pv{predict[p]};
+      K_PITC(f, p) = K_fu.row(f).dot(K_uu_ldlt.solve(cov(u, pv)).col(0));
+      if (grouper(predict[p]) == grouper(ordered_features[f])) {
+        K_PIC(f, p) = cov(ordered_features[f], predict[p]);
+        V_PIC(f, p) =
+            K_PIC(f, p) - K_fu.row(f).dot(K_uu_ldlt.solve(cov(u, pv)).col(0));
+      } else {
+        K_PIC(f, p) = K_fu.row(f).dot(K_uu_ldlt.solve(cov(u, pv)).col(0));
+      }
+    }
+  }
+
+  const Eigen::MatrixXd K_pu = cov(predict, u);
+  // Eigen::Index j = 0;
+  // Eigen::Index blk = 0;
+  // std::vector<Eigen::MatrixXd> WW;
+
+  const Eigen::MatrixXd WBW = K_uu_ldlt.solve(S.solve(K_fu).transpose());
+  print_matrix(K_PIC, "K_PIC");
+  print_matrix(bfp_cov(ordered_features, predict), "BFP: K_PIC");
+
+  print_matrix(K_PIC - bfp_cov(ordered_features, predict), "K_PIC error");
+  print_matrix(K_PITC, "K_PITC");
+
+  print_matrix(V_PIC, "V_PIC");
+
+  print_matrix(WBW, "W");
+  const Eigen::MatrixXd U = K_pu * WBW * V_PIC;
+  print_matrix(U, "U");
+
+  auto SV = S.sqrt_solve(V_PIC);
+  const Eigen::MatrixXd VSV =
+      V_PIC.transpose() * S.solve(V_PIC); // SV.transpose() * SV;
+  print_matrix(VSV, "VSV");
+
+  const Eigen::MatrixXd predict_prior = cov(predict);
+
+  print_matrix(predict_prior, "prior");
+
+  print_matrix(bfp_cov(predict), "BFP: prior");
+
+  print_matrix(predict_prior - bfp_cov(predict), "prior error");
+
+  // auto KK = K_PITC_ldlt.sqrt_solve(K_PIC);
+  // const Eigen::MatrixXd explained_cov = KK.transpose() * KK;
+  const Eigen::MatrixXd explained_cov = K_PIC.transpose() * S.solve(K_PIC);
+  print_matrix(explained_cov, "explained");
+
+  const Eigen::VectorXd PIC_mean =
+      K_PIC.transpose() * S.solve(data.targets.mean);
+  print_vector(PIC_mean, "PIC mean");
+  const Eigen::MatrixXd predict_cov = predict_prior - explained_cov;
+
+  print_matrix(predict_cov, "K_**");
+
+  const Eigen::MatrixXd PITC_cov =
+      predict_prior - K_PITC.transpose() * S.solve(K_PITC);
+
+  const Eigen::MatrixXd PIC_cov = PITC_cov - U - U.transpose() - VSV;
+  print_matrix(PITC_cov, "PTIC cov");
+  print_matrix(PIC_cov, "PIC cov");
+
+  print_matrix(predict_cov - PITC_cov, "PIC - PITC");
+  print_matrix(predict_cov - PIC_cov, "PIC - factored");
+
+  return JointDistribution{PIC_mean, PIC_cov};
+}
+
+TEST(TestPicGP, BruteForceEquivalenceOneBlock) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(10);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+  auto bfp_cov = make_brute_force_pic_covariance(
+      strategy(make_simple_covariance_function(), dataset.features),
+      make_simple_covariance_function(), grouper);
+  auto bfp = gp_from_covariance(bfp_cov);
+
+  auto bfp_fit = bfp.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto bfp_pred = bfp_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  EXPECT_LT(distance::wasserstein_2(bfp_pred, pic_pred), 1e-11);
+}
+
+TEST(TestPicGP, BruteForceEquivalenceMultipleBlocks) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(2);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+  auto bfp_cov = make_brute_force_pic_covariance(
+      strategy(make_simple_covariance_function(), dataset.features),
+      make_simple_covariance_function(), grouper);
+  auto bfp = gp_from_covariance(bfp_cov);
+
+  auto bfp_fit = bfp.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto bfp_pred = bfp_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+
+  EXPECT_LT(distance::wasserstein_2(bfp_pred, pic_pred), 1e-12);
+}
+
+TEST(TestPicGP, PITCEquivalenceOutOfTraining) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(2);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+  auto pitc =
+      sparse_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                strategy, "pitc", DenseQRImplementation{});
+
+  auto pic_fit = pic.fit(dataset);
+  auto pitc_fit = pitc.fit(dataset);
+
+  auto test_features = linspace(10.1, 19.9, kNumTestPoints);
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  auto pitc_pred = pitc_fit.predict_with_measurement_noise(test_features).joint();
+
+  EXPECT_LT(distance::wasserstein_2(pic_pred, pitc_pred), 1e-12);
+}
+
+TEST(TestPicGP, PredictMeanEquivalent) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(2);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).mean();
+  auto pic_joint_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+
+  const double pic_mean_error = (pic_pred - pic_joint_pred.mean).norm();
+  EXPECT_LT(pic_mean_error, 1e-12);
+}
+
+TEST(TestPicGP, PredictMarginalEquivalent) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(2);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).marginal();
+  auto pic_joint_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+
+  const double pic_marginal_error =
+      (pic_pred.mean - pic_joint_pred.mean).norm();
+  EXPECT_LT(pic_marginal_error, 1e-12);
+  const double pic_marginal_cov_error =
+      (Eigen::VectorXd(pic_pred.covariance.diagonal()) -
+       Eigen::VectorXd(pic_joint_pred.covariance.diagonal()))
+          .norm();
+  EXPECT_LT(pic_marginal_cov_error, 1e-12)
+      << "\nmarginal: "
+      << Eigen::VectorXd(pic_pred.covariance)
+             .transpose()
+             .format(Eigen::FullPrecision)
+      << "\njoint diag: "
+      << Eigen::VectorXd(pic_joint_pred.covariance.diagonal())
+             .transpose()
+             .format(Eigen::FullPrecision);
+}
+
+
+} // namespace albatross

From f2d36a10d5365c45ff7e2fa05a426ddd8efaea76 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Tue, 6 Aug 2024 08:42:49 +1000
Subject: [PATCH 04/12] format fixups

---
 include/albatross/src/cereal/gp.hpp            |  2 +-
 include/albatross/src/models/pic_gp.hpp        |  4 ++--
 include/albatross/src/models/sparse_common.hpp |  3 +--
 tests/test_pic_gp.cc                           | 14 ++++++++------
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/albatross/src/cereal/gp.hpp b/include/albatross/src/cereal/gp.hpp
index 9b421b83..216d487a 100644
--- a/include/albatross/src/cereal/gp.hpp
+++ b/include/albatross/src/cereal/gp.hpp
@@ -18,8 +18,8 @@ using albatross::GaussianProcessBase;
 using albatross::GPFit;
 using albatross::LinearCombination;
 
-using albatross::SparseGPFit;
 using albatross::PICGPFit;
+using albatross::SparseGPFit;
 
 #ifndef GP_SERIALIZATION_VERSION
 #define GP_SERIALIZATION_VERSION 2
diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index cd088341..8badc0dd 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -681,7 +681,8 @@ class PICGaussianProcessRegression
         (xi_lambda.transpose() * xi_lambda - xi_u.transpose() * xi_u)
             .diagonal()};
 
-    const Eigen::VectorXd U_diag = (K_up.transpose() * sparse_gp_fit.W * Vp).diagonal();
+    const Eigen::VectorXd U_diag =
+        (K_up.transpose() * sparse_gp_fit.W * Vp).diagonal();
 
     Eigen::VectorXd marginal_variance(cast::to_index(features.size()));
     for (Eigen::Index i = 0; i < marginal_variance.size(); ++i) {
@@ -764,7 +765,6 @@ class PICGaussianProcessRegression
     }
     Vp.makeCompressed();
 
-
     Eigen::MatrixXd xi_lambda = sparse_gp_fit.A_ldlt.sqrt_solve(Vp);
     Eigen::MatrixXd xi_u = sparse_gp_fit.Z * Vp;
     Eigen::MatrixXd VSV{xi_lambda.transpose() * xi_lambda -
diff --git a/include/albatross/src/models/sparse_common.hpp b/include/albatross/src/models/sparse_common.hpp
index b5267b99..525ecee4 100644
--- a/include/albatross/src/models/sparse_common.hpp
+++ b/include/albatross/src/models/sparse_common.hpp
@@ -86,7 +86,6 @@ struct DenseQRImplementation {
   }
 };
 
-
 } // namespace albatross
 
-#endif  // INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
\ No newline at end of file
+#endif // INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
index 96b8c1f9..dc110334 100644
--- a/tests/test_pic_gp.cc
+++ b/tests/test_pic_gp.cc
@@ -59,7 +59,6 @@ TEST(TestPicGP, TestPredictionExists) {
   EXPECT_GT(pic_pred.mean.size(), 0);
 }
 
-
 TEST(TestPicGP, ScalarEquivalence) {
   static constexpr std::size_t kNumTrainPoints = 3;
   static constexpr std::size_t kNumTestPoints = 1;
@@ -337,7 +336,8 @@ TEST(TestPicGP, PITCEquivalenceOutOfTraining) {
 
   auto test_features = linspace(10.1, 19.9, kNumTestPoints);
   auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
-  auto pitc_pred = pitc_fit.predict_with_measurement_noise(test_features).joint();
+  auto pitc_pred =
+      pitc_fit.predict_with_measurement_noise(test_features).joint();
 
   EXPECT_LT(distance::wasserstein_2(pic_pred, pitc_pred), 1e-12);
 }
@@ -358,7 +358,8 @@ TEST(TestPicGP, PredictMeanEquivalent) {
 
   auto test_features = linspace(0.1, 9.9, kNumTestPoints);
   auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).mean();
-  auto pic_joint_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_joint_pred =
+      pic_fit.predict_with_measurement_noise(test_features).joint();
 
   const double pic_mean_error = (pic_pred - pic_joint_pred.mean).norm();
   EXPECT_LT(pic_mean_error, 1e-12);
@@ -379,8 +380,10 @@ TEST(TestPicGP, PredictMarginalEquivalent) {
   auto pic_fit = pic.fit(dataset);
 
   auto test_features = linspace(0.1, 9.9, kNumTestPoints);
-  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).marginal();
-  auto pic_joint_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred =
+      pic_fit.predict_with_measurement_noise(test_features).marginal();
+  auto pic_joint_pred =
+      pic_fit.predict_with_measurement_noise(test_features).joint();
 
   const double pic_marginal_error =
       (pic_pred.mean - pic_joint_pred.mean).norm();
@@ -400,5 +403,4 @@ TEST(TestPicGP, PredictMarginalEquivalent) {
              .format(Eigen::FullPrecision);
 }
 
-
 } // namespace albatross

From 78348f5c52f786623d4d3fe1be42b5b0275087ab Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Wed, 7 Aug 2024 02:25:03 +1000
Subject: [PATCH 05/12] Remove debug spam

---
 include/albatross/src/cereal/suitesparse.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/albatross/src/cereal/suitesparse.hpp b/include/albatross/src/cereal/suitesparse.hpp
index 32a89867..ca44c9c6 100644
--- a/include/albatross/src/cereal/suitesparse.hpp
+++ b/include/albatross/src/cereal/suitesparse.hpp
@@ -439,8 +439,6 @@ inline void load(Archive &ar, cholmod_sparse &m,
   }
   detail::suitesparse_cereal_realloc(&m.x, m.nzmax, element_size_bytes);
 
-  std::cout << "m.p size: " << p_size_bytes << " bytes" << std::endl;
-  std::cout << "m.ncol + 1: " << m.ncol + 1 << std::endl;
   detail::decode_array(ar, "m.p", m.p, (m.ncol + 1) * p_size_bytes);
   detail::decode_array(ar, "m.i", m.i, m.nzmax * integer_size_bytes);
   if (!m.packed) {

From 9ba34bf905b386111cf322ca6d2e678cef6ff2c3 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Wed, 7 Aug 2024 02:25:08 +1000
Subject: [PATCH 06/12] Remove unused group tracker

---
 include/albatross/src/models/pic_gp.hpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 8badc0dd..b1c30d0e 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -639,15 +639,12 @@ class PICGaussianProcessRegression
         features.size());
     Eigen::VectorXi col_alloc{Eigen::VectorXi::Zero(features.size())};
     bool all_same_group = true;
-    bool all_in_groups = true;
     for (Eigen::Index j = 0; j < features.size(); ++j) {
       groups[j] = sparse_gp_fit.measurement_groups.find(
           independent_group_function_(without_measurement(features[j])));
       if (groups[j] != sparse_gp_fit.measurement_groups.end()) {
         col_alloc(j) = groups[j]->second.block_size;
         all_same_group = all_same_group && groups[j] == groups[0];
-      } else {
-        all_in_groups = false;
       }
       feature_to_block[j] =
           std::distance(sparse_gp_fit.measurement_groups.begin(), groups[j]);
@@ -726,15 +723,12 @@ class PICGaussianProcessRegression
         features.size());
     Eigen::VectorXi col_alloc{Eigen::VectorXi::Zero(features.size())};
     bool all_same_group = true;
-    bool all_in_groups = true;
     for (Eigen::Index j = 0; j < features.size(); ++j) {
       groups[j] = sparse_gp_fit.measurement_groups.find(
           independent_group_function_(without_measurement(features[j])));
       if (groups[j] != sparse_gp_fit.measurement_groups.end()) {
         col_alloc(j) = groups[j]->second.block_size;
         all_same_group = all_same_group && groups[j] == groups[0];
-      } else {
-        all_in_groups = false;
       }
       feature_to_block[j] =
           std::distance(sparse_gp_fit.measurement_groups.begin(), groups[j]);

From 7612fcfaa82113d9957dcff9bd8741aea3e943d8 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Wed, 7 Aug 2024 04:13:32 +1000
Subject: [PATCH 07/12] serialization fixups

---
 include/albatross/src/cereal/block_diagonal.hpp | 2 +-
 include/albatross/src/cereal/gp.hpp             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/albatross/src/cereal/block_diagonal.hpp b/include/albatross/src/cereal/block_diagonal.hpp
index 964602bd..b9253363 100644
--- a/include/albatross/src/cereal/block_diagonal.hpp
+++ b/include/albatross/src/cereal/block_diagonal.hpp
@@ -18,7 +18,7 @@ namespace cereal {
 template <typename Archive>
 inline void serialize(Archive &archive,
                       const albatross::BlockDiagonalLDLT &ldlt,
-                      const std::uint32_t version) {
+                      const std::uint32_t version __attribute__((unused))) {
   archive(cereal::make_nvp("blocks", ldlt.blocks));
 }
 
diff --git a/include/albatross/src/cereal/gp.hpp b/include/albatross/src/cereal/gp.hpp
index 216d487a..cd799908 100644
--- a/include/albatross/src/cereal/gp.hpp
+++ b/include/albatross/src/cereal/gp.hpp
@@ -58,7 +58,7 @@ template <typename Archive, typename GrouperFunction,
 inline void
 serialize(Archive &archive,
           Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> &fit,
-          const std::uint32_t version) {
+          const std::uint32_t version __attribute__((unused))) {
   archive(cereal::make_nvp("train_features", fit.train_features));
   archive(cereal::make_nvp("inducing_features", fit.inducing_features));
   archive(cereal::make_nvp("train_covariance", fit.train_covariance));

From 179dd64543087c5ec2eb36ed458afe30597251f4 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Wed, 7 Aug 2024 04:13:40 +1000
Subject: [PATCH 08/12] missing operator== for block diagonal LDLT

---
 include/albatross/src/linalg/block_diagonal.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/albatross/src/linalg/block_diagonal.hpp b/include/albatross/src/linalg/block_diagonal.hpp
index 29321879..c57fd1c5 100644
--- a/include/albatross/src/linalg/block_diagonal.hpp
+++ b/include/albatross/src/linalg/block_diagonal.hpp
@@ -68,6 +68,8 @@ struct BlockDiagonalLDLT {
   Eigen::Index rows() const;
 
   Eigen::Index cols() const;
+
+  bool operator==(const BlockDiagonalLDLT &other) const;
 };
 
 struct BlockDiagonal {
@@ -248,6 +250,11 @@ inline const BlockDiagonalLDLT &BlockDiagonalLDLT::adjoint() const {
   return *this;
 }
 
+inline bool
+BlockDiagonalLDLT::operator==(const BlockDiagonalLDLT &other) const {
+  return blocks == other.blocks;
+}
+
 /*
  * Block Diagonal
  */

From cdf43376443858063024215ea449b309709504d7 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 8 Aug 2024 01:58:08 +1000
Subject: [PATCH 09/12] Avoid linker collision

---
 tests/test_pic_gp.cc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
index dc110334..0a8c247f 100644
--- a/tests/test_pic_gp.cc
+++ b/tests/test_pic_gp.cc
@@ -24,10 +24,10 @@
 
 namespace albatross {
 
-struct LeaveOneIntervalOut {
+struct GroupedLeaveOneIntervalOut {
 
-  LeaveOneIntervalOut(){};
-  explicit LeaveOneIntervalOut(double group_domain_size_)
+  GroupedLeaveOneIntervalOut(){};
+  explicit GroupedLeaveOneIntervalOut(double group_domain_size_)
       : group_domain_size(group_domain_size_){};
 
   int operator()(const double &f) const {
@@ -44,7 +44,7 @@ struct LeaveOneIntervalOut {
 TEST(TestPicGP, TestPredictionExists) {
   const auto dataset = make_toy_linear_data();
   UniformlySpacedInducingPoints strategy(8);
-  LeaveOneIntervalOut grouper;
+  GroupedLeaveOneIntervalOut grouper;
   const auto pic =
       pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                              strategy, "pic", DenseQRImplementation{});
@@ -67,7 +67,7 @@ TEST(TestPicGP, ScalarEquivalence) {
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
   // Set the grouper so that all the simple test data falls within one
   // group.
-  LeaveOneIntervalOut grouper(10);
+  GroupedLeaveOneIntervalOut grouper(10);
   auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
@@ -92,7 +92,7 @@ TEST(TestPicGP, SingleBlockEquivalence) {
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
   // Set the grouper so that all the simple test data falls within one
   // group.
-  LeaveOneIntervalOut grouper(10);
+  GroupedLeaveOneIntervalOut grouper(10);
   auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
@@ -271,7 +271,7 @@ TEST(TestPicGP, BruteForceEquivalenceOneBlock) {
   static constexpr std::size_t kNumInducingPoints = 5;
 
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
-  LeaveOneIntervalOut grouper(10);
+  GroupedLeaveOneIntervalOut grouper(10);
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
 
@@ -296,7 +296,7 @@ TEST(TestPicGP, BruteForceEquivalenceMultipleBlocks) {
   static constexpr std::size_t kNumInducingPoints = 5;
 
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
-  LeaveOneIntervalOut grouper(2);
+  GroupedLeaveOneIntervalOut grouper(2);
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
 
@@ -322,7 +322,7 @@ TEST(TestPicGP, PITCEquivalenceOutOfTraining) {
   static constexpr std::size_t kNumInducingPoints = 5;
 
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
-  LeaveOneIntervalOut grouper(2);
+  GroupedLeaveOneIntervalOut grouper(2);
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
 
@@ -348,7 +348,7 @@ TEST(TestPicGP, PredictMeanEquivalent) {
   static constexpr std::size_t kNumInducingPoints = 5;
 
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
-  LeaveOneIntervalOut grouper(2);
+  GroupedLeaveOneIntervalOut grouper(2);
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
 
@@ -371,7 +371,7 @@ TEST(TestPicGP, PredictMarginalEquivalent) {
   static constexpr std::size_t kNumInducingPoints = 5;
 
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
-  LeaveOneIntervalOut grouper(2);
+  GroupedLeaveOneIntervalOut grouper(2);
   auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
                                     strategy, "pic", DenseQRImplementation{});
 

From ae2f56116521914f53652232d8f3f2bbcd238c15 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 8 Aug 2024 01:59:34 +1000
Subject: [PATCH 10/12] Remove unimplemented PIC functions

---
 include/albatross/src/models/pic_gp.hpp | 203 ------------------------
 1 file changed, 203 deletions(-)

diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index b1c30d0e..97f80e6d 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -354,66 +354,6 @@ class PICGaussianProcessRegression
     inducing_point_strategy_ = std::forward<NewStrategy>(f);
   }
 
-  template <typename FeatureType, typename InducingPointFeatureType>
-  auto
-  _update_impl(const Fit<PICGPFit<GrouperFunction, InducingPointFeatureType,
-                                  FeatureType>> &old_fit,
-               const std::vector<FeatureType> &features,
-               const MarginalDistribution &targets) const {
-    assert(false && "Cannot call update() on a PIC GP yet!");
-
-    BlockDiagonalLDLT A_ldlt;
-    Eigen::SerializableLDLT K_uu_ldlt;
-    Eigen::MatrixXd K_fu;
-    Eigen::VectorXd y;
-    compute_internal_components(old_fit.train_features, features, targets,
-                                &A_ldlt, &K_uu_ldlt, &K_fu, &y);
-
-    const Eigen::Index n_old = old_fit.sigma_R.rows();
-    const Eigen::Index n_new = A_ldlt.rows();
-    const Eigen::Index k = old_fit.sigma_R.cols();
-    Eigen::MatrixXd B = Eigen::MatrixXd::Zero(n_old + n_new, k);
-
-    ALBATROSS_ASSERT(n_old == k);
-
-    // Form:
-    //   B = |R_old P_old^T| = |Q_1| R P^T
-    //       |A^{-1/2} K_fu|   |Q_2|
-    for (Eigen::Index i = 0; i < old_fit.permutation_indices.size(); ++i) {
-      const Eigen::Index &pi = old_fit.permutation_indices.coeff(i);
-      B.col(pi).topRows(i + 1) = old_fit.sigma_R.col(i).topRows(i + 1);
-    }
-    B.bottomRows(n_new) = A_ldlt.sqrt_solve(K_fu);
-    const auto B_qr = QRImplementation::compute(B, Base::threads_.get());
-
-    // Form:
-    //   y_aug = |R_old P_old^T v_old|
-    //           |A^{-1/2} y         |
-    ALBATROSS_ASSERT(old_fit.information.size() == n_old);
-    Eigen::VectorXd y_augmented(n_old + n_new);
-    for (Eigen::Index i = 0; i < old_fit.permutation_indices.size(); ++i) {
-      y_augmented[i] =
-          old_fit.information[old_fit.permutation_indices.coeff(i)];
-    }
-    y_augmented.topRows(n_old) =
-        old_fit.sigma_R.template triangularView<Eigen::Upper>() *
-        y_augmented.topRows(n_old);
-
-    y_augmented.bottomRows(n_new) = A_ldlt.sqrt_solve(y, Base::threads_.get());
-    const Eigen::VectorXd v = B_qr->solve(y_augmented);
-
-    Eigen::MatrixXd R = get_R(*B_qr);
-    if (B_qr->rank() < B_qr->cols()) {
-      // Inflate the diagonal of R in an attempt to avoid singularity
-      R.diagonal() +=
-          Eigen::VectorXd::Constant(B_qr->cols(), details::cSparseRNugget);
-    }
-    using FitType =
-        Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
-    return FitType(old_fit.train_features, old_fit.train_covariance, R,
-                   get_P(*B_qr), v, B_qr->rank());
-  }
-
   // Here we create the QR decomposition of:
   //
   //   B = |A^-1/2 K_fu| = |Q_1| R P^T
@@ -518,66 +458,6 @@ class PICGaussianProcessRegression
                    B_qr->rank(), cols_Bs, independent_group_function_);
   }
 
-  template <typename FeatureType>
-  auto fit_from_prediction(const std::vector<FeatureType> &new_inducing_points,
-                           const JointDistribution &prediction_) const {
-    FitModel<PICGaussianProcessRegression,
-             Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>>>
-        output(*this,
-               Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>>());
-    Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>> &new_fit =
-        output.get_fit();
-
-    new_fit.train_features = new_inducing_points;
-
-    const Eigen::MatrixXd K_zz =
-        this->covariance_function_(new_inducing_points, Base::threads_.get());
-    new_fit.train_covariance = Eigen::SerializableLDLT(K_zz);
-
-    // We're going to need to take the sqrt of the new covariance which
-    // could be extremely small, so here we add a small nugget to avoid
-    // numerical instability
-    JointDistribution prediction(prediction_);
-    prediction.covariance.diagonal() += Eigen::VectorXd::Constant(
-        cast::to_index(prediction.size()), 1, details::DEFAULT_NUGGET);
-    new_fit.information = new_fit.train_covariance.solve(prediction.mean);
-
-    // Here P is the posterior covariance at the new inducing points.  If
-    // we consider the case where we rebase and then use the resulting fit
-    // to predict the new inducing points then we see that the predictive
-    // covariance (see documentation above) would be,:
-    //
-    //    C = K_zz - Q_zz + K_zz Sigma K_zz
-    //
-    // We can use this, knowing that at the inducing points K_zz = Q_zz, to
-    // derive our updated Sigma,
-    //
-    //    C = K_zz - K_zz + K_zz Sigma K_zz
-    //    C  = K_zz Sigma K_zz
-    //    Sigma = K_zz^-1 C K_zz^-1
-    //
-    // And since we need to store Sigma in sqrt form we get,
-    //
-    //    Sigma = (B_z^T B_z)^-1
-    //          = K_zz^-1 C K_zz^-1
-    //
-    // So by setting:
-    //
-    //    B_z = C^{-1/2} K_z
-    //
-    // We can then compute and store the QR decomposition of B
-    // as we do in a normal fit.
-    const Eigen::SerializableLDLT C_ldlt(prediction.covariance);
-    const Eigen::MatrixXd sigma_inv_sqrt = C_ldlt.sqrt_solve(K_zz);
-    const auto B_qr = QRImplementation::compute(sigma_inv_sqrt, nullptr);
-
-    new_fit.P = get_P(*B_qr);
-    new_fit.sigma_R = get_R(*B_qr);
-    new_fit.numerical_rank = B_qr->rank();
-
-    return output;
-  }
-
   // This is included to allow the SparseGP
   // to be compatible with fits generated
   // using a standard GP.
@@ -787,73 +667,6 @@ class PICGaussianProcessRegression
     return pred;
   }
 
-  template <typename FeatureType>
-  double log_likelihood(const RegressionDataset<FeatureType> &dataset) const {
-    const auto u =
-        inducing_point_strategy_(this->covariance_function_, dataset.features);
-
-    BlockDiagonalLDLT A_ldlt;
-    Eigen::SerializableLDLT K_uu_ldlt;
-    Eigen::MatrixXd K_fu;
-    Eigen::VectorXd y;
-    compute_internal_components(u, dataset.features, dataset.targets, &A_ldlt,
-                                &K_uu_ldlt, &K_fu, &y);
-    const auto B_qr = compute_sigma_qr(K_uu_ldlt, A_ldlt, K_fu);
-    // The log likelihood for y ~ N(0, K) is:
-    //
-    //   L = 1/2 (n log(2 pi) + log(|K|) + y^T K^-1 y)
-    //
-    // where in our case we have
-    //   K = A + Q_ff
-    // and
-    //   Q_ff = K_fu K_uu^-1 K_uf
-    //
-    // First we get the determinant, |K|:
-    //
-    //   |K| = |A + Q_ff|
-    //       = |K_uu + K_uf A^-1 K_fu| |K_uu^-1| |A|
-    //       = |P R^T Q^T Q R P^T| |K_uu^-1| |A|
-    //       = |R^T R| |K_uu^-1| |A|
-    //       = |R|^2 |A| / |K_uu|
-    //
-    // Where the first equality comes from the matrix determinant lemma.
-    // https://en.wikipedia.org/wiki/Matrix_determinant_lemma#Generalization
-    //
-    // After which we can take the log:
-    //
-    //   log(|K|) = 2 log(|R|) + log(|A|) - log(|K_uu|)
-    //
-    const double log_det_a = A_ldlt.log_determinant();
-
-    const double log_det_r =
-        B_qr->matrixR().diagonal().array().cwiseAbs().log().sum();
-    const double log_det_K_uu = K_uu_ldlt.log_determinant();
-    const double log_det = log_det_a + 2 * log_det_r - log_det_K_uu;
-
-    // q = y^T K^-1 y
-    //   = y^T (A + Q_ff)^-1 y
-    //   = y^T (A^-1 - A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1) y
-    //   = y^T A^-1 y - y^T A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1) y
-    //   = y^T A^-1 y - y^T A^-1 K_fu (R^T R)^-1 K_uf A^-1) y
-    //   = y^T A^-1 y - (R^-T K_uf A^-1 y)^T (R^-T K_uf A^-1 y)
-    //   = y^T y_a - y_b^T y_b
-    //
-    // with y_b = R^-T K_uf y_a
-    const Eigen::VectorXd y_a = A_ldlt.solve(y);
-
-    Eigen::VectorXd y_b = K_fu.transpose() * y_a;
-    y_b = sqrt_solve(*B_qr, y_b);
-
-    double log_quadratic = y.transpose() * y_a;
-    log_quadratic -= y_b.transpose() * y_b;
-
-    const double rank = cast::to_double(y.size());
-    const double log_dimension = rank * log(2 * M_PI);
-
-    return -0.5 * (log_det + log_quadratic + log_dimension) +
-           this->prior_log_likelihood();
-  }
-
   InducingPointStrategy get_inducing_point_strategy() const {
     return inducing_point_strategy_;
   }
@@ -959,22 +772,6 @@ class PICGaussianProcessRegression
   GrouperFunction independent_group_function_;
 };
 
-// rebase_inducing_points takes a Sparse GP which was fit using some set of
-// inducing points and creates a new fit relative to new inducing points.
-// Note that this will NOT be the equivalent to having fit the model with
-// the new inducing points since some information may have been lost in
-// the process.
-template <typename ModelType, typename GrouperFunction, typename FeatureType,
-          typename NewFeatureType>
-auto rebase_inducing_points(
-    const FitModel<ModelType,
-                   Fit<PICGPFit<GrouperFunction, NewFeatureType, FeatureType>>>
-        &fit_model,
-    const std::vector<NewFeatureType> &new_inducing_points) {
-  return fit_model.get_model().fit_from_prediction(
-      new_inducing_points, fit_model.predict(new_inducing_points).joint());
-}
-
 template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
           typename InducingPointStrategy>
 using SparseQRPicGaussianProcessRegression =

From 86b38ea5f2be6b29fd9630e7a56ba0c7b8a87c37 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Mon, 2 Sep 2024 15:47:15 +1000
Subject: [PATCH 11/12] Cleanups and comments

---
 include/albatross/src/models/pic_gp.hpp | 143 +++++++-----------------
 tests/test_pic_gp.cc                    |   2 -
 2 files changed, 43 insertions(+), 102 deletions(-)

diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 97f80e6d..8863cbec 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -15,6 +15,20 @@
 
 namespace albatross {
 
+/*
+ * This class directly implements the PIC covariance structure:
+ * conditional on the inducing points, each group of observations is
+ * independent.  You can use it as a covariance function for a dense
+ * GP model, and it should give you the same results as the PIC
+ * approximation, but since it's implemented directly as a covariance
+ * function, it's much less efficient than modelling using the
+ * PICGaussianProcessRegression class defined below.  It is provided
+ * for debugging purposes, since it can be tricky to follow what the
+ * efficient implementation is doing.
+ *
+ * See the PIC GP model test suite for an example of how to use this
+ * class.
+ */
 template <typename CovarianceType, typename InducingFeatureType,
           typename GrouperFunction>
 class BruteForcePIC
@@ -44,7 +58,6 @@ class BruteForcePIC
       K_xu[i] = cov_(x, inducing_points_[i]);
       K_uy[i] = cov_(inducing_points_[i], y);
     }
-    // const Eigen::VectorXd K_uy = cov_(inducing_points_, y);
     return K_xu.dot(K_uu_ldlt_.solve(K_uy));
   }
 };
@@ -149,110 +162,40 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
 };
 
 /*
- *  This class implements an approximation technique for Gaussian processes
- * which relies on an assumption that all observations are independent (or
- * groups of observations are independent) conditional on a set of inducing
- * points.  The method is based off:
- *
- *     [1] Sparse Gaussian Processes using Pseudo-inputs
- *     Edward Snelson, Zoubin Ghahramani
- *     http://www.gatsby.ucl.ac.uk/~snelson/SPGP_up.pdf
- *
- *  Though the code uses notation closer to that used in this (excellent)
- * overview of these methods:
- *
- *     [2] A Unifying View of Sparse Approximate Gaussian Process Regression
- *     Joaquin Quinonero-Candela, Carl Edward Rasmussen
- *     http://www.jmlr.org/papers/volume6/quinonero-candela05a/quinonero-candela05a.pdf
- *
- *  Very broadly speaking this method starts with a prior over the observations,
- *
- *     [f] ~ N(0, K_ff)
- *
- *  where K_ff(i, j) = covariance_function(features[i], features[j]) and f
- * represents the function value.
- *
- *  It then uses a set of inducing points, u, and makes some assumptions about
- * the conditional distribution:
- *
- *     [f|u] ~ N(K_fu K_uu^-1 u, K_ff - Q_ff)
- *
- *  Where Q_ff = K_fu K_uu^-1 K_uf represents the variance in f that is
- * explained by u.
- *
- *  For FITC (Fully Independent Training Contitional) the assumption is that
- * K_ff - Qff is diagonal, for PITC (Partially Independent Training Conditional)
- * that it is block diagonal.  These assumptions lead to an efficient way of
- * inferring the posterior distribution for some new location f*,
- *
- *     [f*|f=y] ~ N(K_*u S K_uf A^-1 y, K_** - Q_** + K_*u S K_u*)
- *
- *  Where S = (K_uu + K_uf A^-1 K_fu)^-1 and A = diag(K_ff - Q_ff) and "diag"
- * may mean diagonal or block diagonal.  Regardless we end up with O(m^2n)
- * complexity instead of O(n^3) of direct Gaussian processes.  (Note that in [2]
- * S is called sigma and A is lambda.)
- *
- *  Of course, the implementation details end up somewhat more complex in order
- * to improve numerical stability.  Here we use an approach based off the QR
- * decomposition which is described in
+ * This class implements an approximation technique for Gaussian
+ * processes which relies on the following:
  *
- *     Stable and Efficient Gaussian Process Calculations
- *     http://www.jmlr.org/papers/volume10/foster09a/foster09a.pdf
+ *  1. Observations occur in correlated groups
  *
- * A more detailed (but more likely to be out of date) description of
- * the details can be found on the albatross documentation.  A short
- * description follows.  It starts by setting up the Sparse Gaussian process
- * covariances
+ *  2. Conditional on the values of observations within the same group
+ *     and on the values of the inducing points, an observation is
+ *     independent of observations in other groups.
  *
- *   [f|u] ~ N(K_fu K_uu^-1 u, K_ff - Q_ff)
+ * This is known as the PIC (Partially Independent Conditional)
+ * approximation and was introduced in
  *
- * We then set,
- *
- *   A = K_ff - Q_ff
- *     = K_ff - K_fu K_uu^-1 K_uf
- *
- * which can be thought of as the covariance in the training data which
- * is not be explained by the inducing points.  The fundamental
- * assumption in these sparse Gaussian processes is that A is sparse, in
- * this case block diagonal.
- *
- * We then build a matrix B and use its QR decomposition (with pivoting P)
- *
- *   B = |A^-1/2 K_fu| = |Q_1| R P^T
- *       |K_uu^{T/2} |   |Q_2|
- *
- * After which we can get the information vector (see _fit_impl)
- *
- *   v = (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1 y
- *     = (B^T B) B^T A^-1/2 y
- *     = P R^-1 Q_1^T A^-1/2 y
- *
- * and can make predictions for new locations (see _predict_impl),
- *
- *   [f*|f=y] ~ N(K_*u S K_uf A^-1 y, K_** - Q_** + K_*u S K_u*)
- *            ~ N(m, C)
- *
- *  where we have
- *
- *    m = K_*u S K_uf A^-1 y
- *      = K_*u v
- *
- *  and
- *
- *    C = K_** - Q_** + K_*u S K_u*
- *
- *  using
- *
- *    Q_** = K_*u K_uu^-1 K_u*
- *         = (K_uu^{-1/2}  K_u*)^T (K_uu^{-1/2}  K_u*)
- *         = Q_sqrt^T Q_sqrt
- *  and
+ *     [1] Local and global sparse Gaussian process approximations
+ *     Edward Snelson, Zoubin Ghahramani
+ *     https://proceedings.mlr.press/v2/snelson07a/snelson07a.pdf
+ *
+ * The upshot of this method is that for enough small enough groups,
+ * the fit performance scales as O(N M^2) for N observations and M
+ * inducing points (just like FITC/PITC), but fine-grained local
+ * information from correlation groups can provide better predictive
+ * performance.
+ *
+ * Inducing point and group definitions are handled as in the
+ * FITC/PITC approximation class (see sparse_gp.hpp).  If a prediction
+ * point is not correlated with any of the training observations (as
+ * defined by the grouper function), the resulting predicted
+ * distribution will be computed using only the fitted inducing
+ * points.
+ *
+ * Several operations supported by other Albatross GP model classes,
+ * like "rebasing" (computing new inducing points from old ones) and
+ * reduced-complexity updating of a fit with new information, are not
+ * yet supported by the PIC approximation class.
  *
- *    K_*u S K_u* = K_*u (K_uu + K_uf A^-1 K_fu)^-1   K_u*
- *                = K_*u (B^T B)^-1 K_u*
- *                = K_*u (P R R^T P^T)^-1 K_u*
- *                = (P R^-T K_u*)^T (P R^-T K_u*)
- *                = S_sqrt^T S_sqrt
  */
 template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
           typename InducingPointStrategy,
diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
index 0a8c247f..227497a6 100644
--- a/tests/test_pic_gp.cc
+++ b/tests/test_pic_gp.cc
@@ -13,8 +13,6 @@
 #include <fstream>
 #include <gtest/gtest.h>
 
-// #include "albatross/src/eigen/serializable_ldlt.hpp"
-// #include "albatross/src/linalg/block_diagonal.hpp"
 #include "test_models.h"
 #include <albatross/Common>
 #include <chrono>

From 9bb009edb226cf66cac804175abd8af605329e90 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Mon, 2 Sep 2024 15:47:33 +1000
Subject: [PATCH 12/12] Update Albatross to use vendored MKL distribution

---
 WORKSPACE.bazel | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel
index ab62240f..54325525 100644
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@@ -31,7 +31,7 @@ http_archive(
     build_file = "@rules_swiftnav//third_party:mkl_headers.BUILD",
     sha256 = "b24d12a8e18ba23de5c659a33fb184a7ac6019d4b159e78f628d7c8de225f77a",
     urls = [
-        "https://anaconda.org/intel/mkl-include/2023.1.0/download/linux-64/mkl-include-2023.1.0-intel_46342.tar.bz2",
+        "https://swiftnav-public-mkl-sharedlibs.s3.us-west-2.amazonaws.com/mkl-include-2023.1.0-intel_46342.tar.bz2",
     ],
 )
 
@@ -41,7 +41,7 @@ http_archive(
     sha256 = "c63adbfdbdc7c4992384a2d89cd62211e4a9f8061e3e841af1a269699531cb02",
     strip_prefix = "lib",
     urls = [
-        "https://anaconda.org/intel/mkl-static/2023.1.0/download/linux-64/mkl-static-2023.1.0-intel_46342.tar.bz2",
+        "https://swiftnav-public-mkl-sharedlibs.s3.us-west-2.amazonaws.com/mkl-static-2023.1.0-intel_46342.tar.bz2",
     ],
 )