diff --git a/.gitmodules b/.gitmodules
index b3e311c1..fec9af67 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -25,3 +25,6 @@
 [submodule "third_party/nlopt"]
 	path = third_party/nlopt
 	url = https://github.com/swift-nav/nlopt.git
+[submodule "third_party/autodiff"]
+	path = third_party/autodiff
+	url = git@github.com:autodiff/autodiff.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20f803ef..c5fd9607 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,8 @@ swift_create_project_options(
     TEST_PACKAGES "Googletest" "GFlags"
     )
 
+include(LanguageStandards)
+
 include(ClangFormat)
 swift_setup_clang_format()
 
@@ -27,6 +29,7 @@ find_package(FastCSV REQUIRED)
 find_package(Gzip-Hpp REQUIRED)
 find_package(Variant REQUIRED)
 find_package(Nlopt REQUIRED)
+find_package(Autodiff REQUIRED)
 
 add_library(albatross INTERFACE)
 target_include_directories(albatross INTERFACE
@@ -40,9 +43,11 @@ target_link_libraries(albatross
     fast-csv
     gzip-hpp
     variant
+    autodiff
     )
 
 set(albatross_COMPILE_OPTIONS
+  -std=c++17
   -Werror
   -Wall
   -Wno-unused-value
diff --git a/include/albatross/Common b/include/albatross/Common
index 10451207..3cee18b4 100644
--- a/include/albatross/Common
+++ b/include/albatross/Common
@@ -20,6 +20,7 @@
 #include <Eigen/Dense>
 
 #include <mapbox/variant.hpp>
+#include <autodiff/reverse/var.hpp>
 
 #include <math.h>
 #include <iomanip>
diff --git a/include/albatross/src/core/declarations.hpp b/include/albatross/src/core/declarations.hpp
index 7cc8bf14..4ee74243 100644
--- a/include/albatross/src/core/declarations.hpp
+++ b/include/albatross/src/core/declarations.hpp
@@ -85,9 +85,10 @@ using ParameterKey = std::string;
 // a corresponding cereal type included or you'll get some
 // really impressive compilation errors.
 using ParameterPrior = PriorContainer;
-using ParameterValue = double;
+using ParameterValue = autodiff::var;
 
 using ParameterStore = std::map<ParameterKey, Parameter>;
+using ParameterConstReferences = std::map<ParameterKey, const Parameter &>;
 
 /*
  * Distributions
diff --git a/include/albatross/src/covariance_functions/callers.hpp b/include/albatross/src/covariance_functions/callers.hpp
index 0fd7de9b..b07c1ccd 100644
--- a/include/albatross/src/covariance_functions/callers.hpp
+++ b/include/albatross/src/covariance_functions/callers.hpp
@@ -120,7 +120,7 @@ struct DirectCaller {
   template <typename CovFunc, typename X, typename Y,
             typename std::enable_if<has_valid_call_impl<CovFunc, X, Y>::value,
                                     int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x, const Y &y) {
+  static auto call(const CovFunc &cov_func, const X &x, const Y &y) {
     return cov_func._call_impl(x, y);
   }
 
@@ -128,7 +128,7 @@ struct DirectCaller {
   template <typename MeanFunc, typename X,
             typename std::enable_if<has_valid_call_impl<MeanFunc, X>::value,
                                     int>::type = 0>
-  static double call(const MeanFunc &mean_func, const X &x) {
+  static auto call(const MeanFunc &mean_func, const X &x) {
     return mean_func._call_impl(x);
   }
 };
@@ -146,7 +146,7 @@ template <typename SubCaller> struct SymmetricCaller {
       typename CovFunc, typename X, typename Y,
       typename std::enable_if<
           has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value, int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x, const Y &y) {
+  static auto call(const CovFunc &cov_func, const X &x, const Y &y) {
     return SubCaller::call(cov_func, x, y);
   }
 
@@ -156,7 +156,7 @@ template <typename SubCaller> struct SymmetricCaller {
                 (has_valid_cov_caller<CovFunc, SubCaller, Y, X>::value &&
                  !has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value),
                 int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x, const Y &y) {
+  static auto call(const CovFunc &cov_func, const X &x, const Y &y) {
     return SubCaller::call(cov_func, y, x);
   }
 
@@ -165,7 +165,7 @@ template <typename SubCaller> struct SymmetricCaller {
       typename MeanFunc, typename X,
       typename std::enable_if<
           has_valid_mean_caller<MeanFunc, SubCaller, X>::value, int>::type = 0>
-  static double call(const MeanFunc &mean_func, const X &x) {
+  static auto call(const MeanFunc &mean_func, const X &x) {
     return SubCaller::call(mean_func, x);
   }
 };
@@ -185,7 +185,7 @@ template <typename SubCaller> struct MeasurementForwarder {
       typename CovFunc, typename X, typename Y,
       typename std::enable_if<
           has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value, int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x, const Y &y) {
+  static auto call(const CovFunc &cov_func, const X &x, const Y &y) {
     return SubCaller::call(cov_func, x, y);
   }
 
@@ -195,8 +195,8 @@ template <typename SubCaller> struct MeasurementForwarder {
                  !has_valid_cov_caller<CovFunc, SubCaller, Measurement<X>,
                                        Measurement<Y>>::value),
                 int>::type = 0>
-  static double call(const CovFunc &cov_func, const Measurement<X> &x,
-                     const Measurement<Y> &y) {
+  static auto call(const CovFunc &cov_func, const Measurement<X> &x,
+                   const Measurement<Y> &y) {
     return SubCaller::call(cov_func, x.value, y.value);
   }
 
@@ -206,8 +206,8 @@ template <typename SubCaller> struct MeasurementForwarder {
           (has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value &&
            !has_valid_cov_caller<CovFunc, SubCaller, Measurement<X>, Y>::value),
           int>::type = 0>
-  static double call(const CovFunc &cov_func, const Measurement<X> &x,
-                     const Y &y) {
+  static auto call(const CovFunc &cov_func, const Measurement<X> &x,
+                   const Y &y) {
     return SubCaller::call(cov_func, x.value, y);
   }
 
@@ -217,8 +217,8 @@ template <typename SubCaller> struct MeasurementForwarder {
           (has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value &&
            !has_valid_cov_caller<CovFunc, SubCaller, X, Measurement<Y>>::value),
           int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x,
-                     const Measurement<Y> &y) {
+  static auto call(const CovFunc &cov_func, const X &x,
+                   const Measurement<Y> &y) {
     return SubCaller::call(cov_func, x, y.value);
   }
 
@@ -227,7 +227,7 @@ template <typename SubCaller> struct MeasurementForwarder {
       typename MeanFunc, typename X,
       typename std::enable_if<
           has_valid_mean_caller<MeanFunc, SubCaller, X>::value, int>::type = 0>
-  static double call(const MeanFunc &mean_func, const X &x) {
+  static auto call(const MeanFunc &mean_func, const X &x) {
     return SubCaller::call(mean_func, x);
   }
 
@@ -237,7 +237,7 @@ template <typename SubCaller> struct MeasurementForwarder {
                     !has_valid_mean_caller<MeanFunc, SubCaller,
                                            Measurement<X>>::value,
                 int>::type = 0>
-  static double call(const MeanFunc &mean_func, const Measurement<X> &x) {
+  static auto call(const MeanFunc &mean_func, const Measurement<X> &x) {
     return SubCaller::call(mean_func, x.value);
   }
 };
@@ -249,7 +249,7 @@ template <typename SubCaller> struct LinearCombinationCaller {
       typename CovFunc, typename X, typename Y,
       typename std::enable_if<
           has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value, int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x, const Y &y) {
+  static auto call(const CovFunc &cov_func, const X &x, const Y &y) {
     return SubCaller::call(cov_func, x, y);
   }
 
@@ -257,8 +257,8 @@ template <typename SubCaller> struct LinearCombinationCaller {
       typename CovFunc, typename X, typename Y,
       typename std::enable_if<
           has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value, int>::type = 0>
-  static double call(const CovFunc &cov_func, const LinearCombination<X> &xs,
-                     const LinearCombination<Y> &ys) {
+  static auto call(const CovFunc &cov_func, const LinearCombination<X> &xs,
+                   const LinearCombination<Y> &ys) {
 
     auto sub_caller = [&](const auto &x, const auto &y) {
       return SubCaller::call(cov_func, x, y);
@@ -273,8 +273,8 @@ template <typename SubCaller> struct LinearCombinationCaller {
       typename CovFunc, typename X, typename Y,
       typename std::enable_if<
           has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value, int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x,
-                     const LinearCombination<Y> &ys) {
+  static auto call(const CovFunc &cov_func, const X &x,
+                   const LinearCombination<Y> &ys) {
     double sum = 0.;
     for (std::size_t i = 0; i < ys.values.size(); ++i) {
       sum += ys.coefficients[i] * SubCaller::call(cov_func, x, ys.values[i]);
@@ -286,8 +286,8 @@ template <typename SubCaller> struct LinearCombinationCaller {
       typename CovFunc, typename X, typename Y,
       typename std::enable_if<
           has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value, int>::type = 0>
-  static double call(const CovFunc &cov_func, const LinearCombination<X> &xs,
-                     const Y &y) {
+  static auto call(const CovFunc &cov_func, const LinearCombination<X> &xs,
+                   const Y &y) {
     double sum = 0.;
     for (std::size_t i = 0; i < xs.values.size(); ++i) {
       sum += xs.coefficients[i] * SubCaller::call(cov_func, xs.values[i], y);
@@ -300,7 +300,7 @@ template <typename SubCaller> struct LinearCombinationCaller {
       typename MeanFunc, typename X,
       typename std::enable_if<
           has_valid_mean_caller<MeanFunc, SubCaller, X>::value, int>::type = 0>
-  static double call(const MeanFunc &mean_func, const X &x) {
+  static auto call(const MeanFunc &mean_func, const X &x) {
     return SubCaller::call(mean_func, x);
   }
 
@@ -308,8 +308,7 @@ template <typename SubCaller> struct LinearCombinationCaller {
       typename MeanFunc, typename X,
       typename std::enable_if<
           has_valid_mean_caller<MeanFunc, SubCaller, X>::value, int>::type = 0>
-  static double call(const MeanFunc &mean_func,
-                     const LinearCombination<X> &xs) {
+  static auto call(const MeanFunc &mean_func, const LinearCombination<X> &xs) {
     double sum = 0.;
     for (std::size_t i = 0; i < xs.values.size(); ++i) {
       sum += xs.coefficients[i] * SubCaller::call(mean_func, xs.values[i]);
@@ -349,7 +348,7 @@ template <typename SubCaller> struct VariantForwarder {
                 !is_variant<X>::value && !is_variant<Y>::value &&
                     has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value,
                 int>::type = 0>
-  static double call(const CovFunc &cov_func, const X &x, const Y &y) {
+  static auto call(const CovFunc &cov_func, const X &x, const Y &y) {
     return SubCaller::call(cov_func, x, y);
   }
 
@@ -366,7 +365,7 @@ template <typename SubCaller> struct VariantForwarder {
               typename std::enable_if<
                   has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value,
                   int>::type = 0>
-    double operator()(const Y &y) const {
+    auto operator()(const Y &y) const {
       return SubCaller::call(cov_func_, x_, y);
     };
 
@@ -374,7 +373,7 @@ template <typename SubCaller> struct VariantForwarder {
               typename std::enable_if<
                   !has_valid_cov_caller<CovFunc, SubCaller, X, Y>::value,
                   int>::type = 0>
-    double operator()(const Y &y) const {
+    auto operator()(const Y &y) const {
       return 0.;
     };
 
@@ -389,8 +388,8 @@ template <typename SubCaller> struct VariantForwarder {
                     has_valid_variant_cov_caller<CovFunc, SubCaller, A,
                                                  variant<Ts...>>::value,
                 int>::type = 0>
-  static double call(const CovFunc &cov_func, const A &x,
-                     const variant<Ts...> &y) {
+  static auto call(const CovFunc &cov_func, const A &x,
+                   const variant<Ts...> &y) {
     CallVisitor<CovFunc, A> visitor(cov_func, x);
     return apply_visitor(visitor, y);
   }
@@ -402,8 +401,8 @@ template <typename SubCaller> struct VariantForwarder {
                     has_valid_variant_cov_caller<CovFunc, SubCaller, A,
                                                  variant<Ts...>>::value,
                 int>::type = 0>
-  static double call(const CovFunc &cov_func, const variant<Ts...> &x,
-                     const A &y) {
+  static auto call(const CovFunc &cov_func, const variant<Ts...> &x,
+                   const A &y) {
     CallVisitor<CovFunc, A> visitor(cov_func, y);
     return apply_visitor(visitor, x);
   }
@@ -414,8 +413,8 @@ template <typename SubCaller> struct VariantForwarder {
                 has_valid_cov_caller<CovFunc, SubCaller, variant<Xs...>,
                                      variant<Ys...>>::value,
                 int>::type = 0>
-  static double call(const CovFunc &cov_func, const variant<Xs...> &x,
-                     const variant<Ys...> &y) {
+  static auto call(const CovFunc &cov_func, const variant<Xs...> &x,
+                   const variant<Ys...> &y) {
     return x.match(
         [&y, &cov_func](const auto &xx) { return call(cov_func, xx, y); });
   }
@@ -434,7 +433,7 @@ template <typename SubCaller> struct VariantForwarder {
               typename std::enable_if<
                   has_valid_mean_caller<MeanFunc, SubCaller, X>::value,
                   int>::type = 0>
-    double operator()(const X &x) const {
+    auto operator()(const X &x) const {
       return SubCaller::call(mean_func_, x);
     };
 
@@ -442,7 +441,7 @@ template <typename SubCaller> struct VariantForwarder {
               typename std::enable_if<
                   !has_valid_mean_caller<MeanFunc, SubCaller, X>::value,
                   int>::type = 0>
-    double operator()(const X &x) const {
+    auto operator()(const X &x) const {
       return 0.;
     };
 
@@ -454,7 +453,7 @@ template <typename SubCaller> struct VariantForwarder {
                 !is_variant<X>::value &&
                     has_valid_mean_caller<MeanFunc, SubCaller, X>::value,
                 int>::type = 0>
-  static double call(const MeanFunc &mean_func, const X &x) {
+  static auto call(const MeanFunc &mean_func, const X &x) {
     return SubCaller::call(mean_func, x);
   }
 
@@ -463,7 +462,7 @@ template <typename SubCaller> struct VariantForwarder {
                                         has_valid_variant_mean_caller<
                                             MeanFunc, SubCaller, X>::value,
                                     int>::type = 0>
-  static double call(const MeanFunc &mean_func, const X &x) {
+  static auto call(const MeanFunc &mean_func, const X &x) {
     MeanCallVisitor<MeanFunc> visitor(mean_func);
     return apply_visitor(visitor, x);
   }
@@ -480,9 +479,8 @@ using DefaultCaller = internal::VariantForwarder<internal::MeasurementForwarder<
 
 template <typename Caller, typename CovFunc, typename... Args>
 class caller_has_valid_call
-    : public has_call_with_return_type<Caller, double,
-                                       typename const_ref<CovFunc>::type,
-                                       typename const_ref<Args>::type...> {};
+    : public has_call<Caller, typename const_ref<CovFunc>::type,
+                      typename const_ref<Args>::type...> {};
 
 template <typename CovFunc, typename... Args>
 class has_valid_caller
diff --git a/include/albatross/src/covariance_functions/covariance_function.hpp b/include/albatross/src/covariance_functions/covariance_function.hpp
index 81781ad8..10205522 100644
--- a/include/albatross/src/covariance_functions/covariance_function.hpp
+++ b/include/albatross/src/covariance_functions/covariance_function.hpp
@@ -25,7 +25,7 @@ namespace albatross {
  *
  *     std::string get_name() const {return "my_cov_func";}
  *
- *     double _call_impl(const X &x, const X &y) const {
+ *     auto _call_impl(const X &x, const X &y) const {
  *       return covariance_between_x_and_y(x, y);
  *     }
  *
@@ -39,7 +39,7 @@ namespace albatross {
  *     virtual std::string get_name() const = 0;
  *
  *     template <typename X, typename Y=X>
- *     double _call_impl(const X &x, const Y &y) const = 0;
+ *     auto _call_impl(const X &x, const Y &y) const = 0;
  *
  *     template <typename X, typename Y=X>
  *     double operator()(const X &x, const Y &y) const {
@@ -73,7 +73,7 @@ class CovarianceFunction : public ParameterHandlingMixin {
   CovarianceFunction() : ParameterHandlingMixin(){};
   friend Derived;
 
-  template <typename X, typename Y> double call(const X &x, const Y &y) const {
+  template <typename X, typename Y> auto call(const X &x, const Y &y) const {
     return DefaultCaller::call(derived(), x, y);
   }
 
@@ -83,7 +83,7 @@ class CovarianceFunction : public ParameterHandlingMixin {
                 "implies you aren't using CRTP.  Implementations "
                 "of a CovarianceFunction should look something like:\n"
                 "\n\tclass Foo : public CovarianceFunction<Foo> {"
-                "\n\t\tdouble _call_impl(const X &x, const Y &y) const;"
+                "\n\t\tauto _call_impl(const X &x, const Y &y) const;"
                 "\n\t\t..."
                 "\n\t}\n");
 
@@ -269,7 +269,7 @@ class SumOfCovarianceFunctions
             typename std::enable_if<(has_equivalent_caller<LHS, X, Y>::value &&
                                      has_equivalent_caller<RHS, X, Y>::value),
                                     int>::type = 0>
-  double _call_impl(const X &x, const Y &y) const {
+  auto _call_impl(const X &x, const Y &y) const {
     return this->lhs_(x, y) + this->rhs_(x, y);
   }
 
@@ -280,7 +280,7 @@ class SumOfCovarianceFunctions
             typename std::enable_if<(has_equivalent_caller<LHS, X, Y>::value &&
                                      !has_equivalent_caller<RHS, X, Y>::value),
                                     int>::type = 0>
-  double _call_impl(const X &x, const Y &y) const {
+  auto _call_impl(const X &x, const Y &y) const {
     return this->lhs_(x, y);
   }
 
@@ -291,7 +291,7 @@ class SumOfCovarianceFunctions
             typename std::enable_if<(!has_equivalent_caller<LHS, X, Y>::value &&
                                      has_equivalent_caller<RHS, X, Y>::value),
                                     int>::type = 0>
-  double _call_impl(const X &x, const Y &y) const {
+  auto _call_impl(const X &x, const Y &y) const {
     return this->rhs_(x, y);
   }
 
@@ -360,7 +360,7 @@ class ProductOfCovarianceFunctions
             typename std::enable_if<(has_equivalent_caller<LHS, X, Y>::value &&
                                      has_equivalent_caller<RHS, X, Y>::value),
                                     int>::type = 0>
-  double _call_impl(const X &x, const Y &y) const {
+  auto _call_impl(const X &x, const Y &y) const {
     double output = this->lhs_(x, y);
     if (output != 0.) {
       output *= this->rhs_(x, y);
@@ -375,7 +375,7 @@ class ProductOfCovarianceFunctions
             typename std::enable_if<(has_equivalent_caller<LHS, X, Y>::value &&
                                      !has_equivalent_caller<RHS, X, Y>::value),
                                     int>::type = 0>
-  double _call_impl(const X &x, const Y &y) const {
+  auto _call_impl(const X &x, const Y &y) const {
     return this->lhs_(x, y);
   }
 
@@ -386,7 +386,7 @@ class ProductOfCovarianceFunctions
             typename std::enable_if<(!has_equivalent_caller<LHS, X, Y>::value &&
                                      has_equivalent_caller<RHS, X, Y>::value),
                                     int>::type = 0>
-  double _call_impl(const X &x, const Y &y) const {
+  auto _call_impl(const X &x, const Y &y) const {
     return this->rhs_(x, y);
   }
 
diff --git a/include/albatross/src/covariance_functions/polynomials.hpp b/include/albatross/src/covariance_functions/polynomials.hpp
index 05f5909c..4c2209a4 100644
--- a/include/albatross/src/covariance_functions/polynomials.hpp
+++ b/include/albatross/src/covariance_functions/polynomials.hpp
@@ -54,8 +54,8 @@ class Constant : public CovarianceFunction<Constant> {
    * so you can move one if you move the rest the same amount.
    */
   template <typename X, typename Y>
-  double _call_impl(const X &x __attribute__((unused)),
-                    const Y &y __attribute__((unused))) const {
+  autodiff::var _call_impl(const X &x __attribute__((unused)),
+                           const Y &y __attribute__((unused))) const {
     return sigma_constant.value * sigma_constant.value;
   }
 };
@@ -101,7 +101,7 @@ class LinearMean : public MeanFunction<LinearMean> {
     offset = {0., GaussianPrior(0., 1000.)};
   }
 
-  double _call_impl(const double &x) const {
+  autodiff::var _call_impl(const double &x) const {
     return slope.value * x + offset.value;
   }
 };
diff --git a/include/albatross/src/covariance_functions/traits.hpp b/include/albatross/src/covariance_functions/traits.hpp
index f62fa9cc..9a963721 100644
--- a/include/albatross/src/covariance_functions/traits.hpp
+++ b/include/albatross/src/covariance_functions/traits.hpp
@@ -24,8 +24,7 @@ DEFINE_CLASS_METHOD_TRAITS(_call_impl);
 
 template <typename U, typename... Args>
 class has_valid_call_impl
-    : public has__call_impl_with_return_type<
-          const U, double, typename const_ref<Args>::type...> {};
+    : public has__call_impl<const U, typename const_ref<Args>::type...> {};
 
 template <typename U, typename... Args>
 class has_possible_call_impl : public has__call_impl<U, Args &...> {};
@@ -38,9 +37,8 @@ DEFINE_CLASS_METHOD_TRAITS(call);
 
 template <typename CovFunc, typename Caller, typename... Args>
 class has_valid_cov_caller
-    : public has_call_with_return_type<Caller, double,
-                                       typename const_ref<CovFunc>::type,
-                                       typename const_ref<Args>::type...> {};
+    : public has_call<Caller, typename const_ref<CovFunc>::type,
+                      typename const_ref<Args>::type...> {};
 
 /*
  * has_valid_cross_cov_caller
@@ -55,10 +53,8 @@ class has_valid_cross_cov_caller {
 
 template <typename MeanFunc, typename Caller, typename FeatureType>
 class has_valid_mean_caller
-    : public has_call_with_return_type<Caller, double,
-                                       typename const_ref<MeanFunc>::type,
-                                       typename const_ref<FeatureType>::type> {
-};
+    : public has_call<Caller, typename const_ref<MeanFunc>::type,
+                      typename const_ref<FeatureType>::type> {};
 
 /*
  * This determines whether or not a class has a method defined for,
diff --git a/include/albatross/src/models/gp.hpp b/include/albatross/src/models/gp.hpp
index bcd88b6f..f8620568 100644
--- a/include/albatross/src/models/gp.hpp
+++ b/include/albatross/src/models/gp.hpp
@@ -438,13 +438,57 @@ class GaussianProcessBase : public ModelBase<ImplType> {
   }
 
   template <typename FeatureType>
-  double log_likelihood(const RegressionDataset<FeatureType> &dataset) const {
+  double log_likelihood(const RegressionDataset<FeatureType> &dataset,
+                        Eigen::VectorXd *gradient = nullptr) const {
     Eigen::VectorXd zero_mean(dataset.targets.mean);
     const auto measurement_features = as_measurements(dataset.features);
     mean_function_.remove_from(measurement_features, &zero_mean);
     const Eigen::MatrixXd cov = covariance_function_(measurement_features);
-    double ll = -negative_log_likelihood(zero_mean, cov);
-    ll += this->prior_log_likelihood();
+
+    const auto ldlt = cov.ldlt();
+    double ll = -negative_log_likelihood(zero_mean, ldlt);
+    //    ll += this->prior_log_likelihood();
+
+    if (gradient != nullptr) {
+      const auto tunable_params = get_tunable_parameters(get_params());
+      *gradient = Eigen::VectorXd::Zero(tunable_params.values.size(), 1);
+
+      const Eigen::VectorXd alpha = ldlt.solve(zero_mean);
+
+      const double epsilon = 1e-6;
+      for (std::size_t i = 0; i < tunable_params.values.size(); ++i) {
+
+        auto perturbed_params = [&](double dx) {
+          auto params = this->get_params();
+          auto tune_params = get_tunable_parameters(params);
+          tune_params.values[i] += dx;
+          return set_tunable_params_values(params, tune_params.values);
+        };
+
+        const auto params_minus = perturbed_params(-epsilon);
+        const auto params_plus = perturbed_params(epsilon);
+        const double delta = params_plus.at(tunable_params.names[i]).value -
+                             params_minus.at(tunable_params.names[i]).value;
+        if (delta <= 0) {
+          (*gradient)[i] = 0.;
+          continue;
+        }
+
+        auto eval_cov = [&](const auto &ps) {
+          auto cov_copy = this->covariance_function_;
+          cov_copy.set_params(ps);
+          return cov_copy(measurement_features);
+        };
+
+        const Eigen::MatrixXd dcov =
+            (eval_cov(params_plus) - eval_cov(params_minus)) / delta;
+        const double dlog_det = -0.5 * ldlt.solve(dcov).trace();
+        const double ddata = 0.5 * alpha.transpose() * dcov * alpha;
+
+        (*gradient)[i] = ddata + dlog_det;
+      }
+    }
+
     return ll;
   }
 
diff --git a/include/albatross/src/tune/finite_difference.hpp b/include/albatross/src/tune/finite_difference.hpp
index 1befe462..0d4b536d 100644
--- a/include/albatross/src/tune/finite_difference.hpp
+++ b/include/albatross/src/tune/finite_difference.hpp
@@ -90,6 +90,77 @@ compute_gradient(Function f, const ParameterStore &params, double f_val,
   }
 }
 
+template <typename Function>
+inline std::pair<double, std::vector<double>>
+compute_value_and_gradient(Function f, const ParameterStore &params,
+                           bool use_async = false) {
+
+  TunableParameters tunable_params = get_tunable_parameters(params);
+
+  std::vector<std::size_t> inds(tunable_params.values.size() + 1);
+  std::iota(std::begin(inds), std::end(inds), 0);
+
+  auto get_perturbed = [&](std::size_t i, double epsilon) {
+    std::vector<double> perturbed(tunable_params.values);
+    perturbed[i] = tunable_params.values[i] + epsilon;
+    return set_tunable_params_values(params, perturbed);
+  };
+
+  std::vector<double> output(inds.size());
+
+  auto compute_single_evaluation = [&](std::size_t i) {
+    if (i == tunable_params.values.size()) {
+      return f(params);
+    }
+
+    double epsilon = 1e-6;
+    const double range =
+        tunable_params.upper_bounds[i] - tunable_params.lower_bounds[i];
+    if (std::isfinite(range)) {
+      epsilon = 1e-8 * range;
+    }
+    auto perturbed_params = get_perturbed(i, epsilon);
+
+    if (!params_are_valid(perturbed_params)) {
+      epsilon *= -1;
+      perturbed_params = get_perturbed(i, epsilon);
+    }
+
+    double f_perturbed = f(perturbed_params);
+
+    return std::make_pair(f_perturbed, epsilon);
+  };
+
+  auto evaluate_function_values = [&]() {
+    if (use_async) {
+      return albatross::async_apply(inds, compute_single_evaluation);
+    } else {
+      return albatross::apply(inds, compute_single_evaluation);
+    }
+  };
+
+  const std::vector<std::pair<double, double>> fs = evaluate_function_values();
+
+  std::vector<double> grad;
+  const double f_val = fs[tunable_params.values.size() - 1].first;
+
+  for (std::size_t i = 0; i < tunable_params.values.size(); ++i) {
+    double grad_i = (fs[i].first - f_val) / fs[i].second;
+
+    if (tunable_params.values[i] >= tunable_params.upper_bounds[i] &&
+        grad_i < 0) {
+      grad_i = 0;
+    }
+
+    if (tunable_params.values[i] <= tunable_params.lower_bounds[i] &&
+        grad_i > 0) {
+      grad_i = 0;
+    }
+    grad.push_back(grad_i);
+  }
+  return std::make_pair(f_val, grad);
+}
+
 } // namespace albatross
 
 #endif /* INCLUDE_ALBATROSS_SRC_TUNE_FINITE_DIFFERENCE_HPP_ */
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 1e233226..b4d926d4 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,51 +1,5 @@
 add_executable(albatross_unit_tests 
-  test_apply.cc
-  test_async_utils.cc
-  test_block_utils.cc
-  test_call_trace.cc
-  test_callers.cc
-  test_conditional_gaussian.cc
-  test_concatenate.cc
-  test_core_dataset.cc
-  test_core_distribution.cc
-  test_core_model.cc
-  test_covariance_function.cc
-  test_covariance_functions.cc
-  test_cross_validation.cc
-  test_csv_utils.cc
-  test_distance_metrics.cc
-  test_eigen_utils.cc
-  test_error_handling.cc
-  test_evaluate.cc
   test_gp.cc
-  test_group_by.cc
-  test_indexing.cc
-  test_linalg_utils.cc
-  test_map_utils.cc
-  test_minimum_spanning_tree.cc
-  test_model_adapter.cc
-  test_model_metrics.cc  
-  test_models.cc
-  test_parameter_handling_mixin.cc
-  test_patchwork_gp.cc
-  test_prediction.cc
-  test_radial.cc
-  test_random_utils.cc
-  test_ransac.cc
-  test_samplers.cc
-  test_scaling_function.cc
-  test_serializable_ldlt.cc
-  test_serialize.cc
-  test_sparse_gp.cc
-  test_stats.cc
-  test_traits_cereal.cc
-  test_traits_core.cc
-  test_traits_details.cc
-  test_traits_covariance_functions.cc
-  test_traits_evaluation.cc
-  test_traits_indexing.cc
-  test_tune.cc
-  test_variant_utils.cc
   )
 target_include_directories(albatross_unit_tests SYSTEM PRIVATE
   "${gtest_SOURCE_DIR}"
@@ -66,7 +20,9 @@ target_link_libraries(albatross_unit_tests
   nlopt
   z
   )
-set_target_properties(albatross_unit_tests PROPERTIES CXX_STANDARD 14 CXX_EXTENSIONS OFF)
+
+set_target_properties(albatross_unit_tests PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF)
+#set_target_properties(albatross_unit_tests PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
 
 add_custom_target(run_albatross_unit_tests ALL
   COMMAND albatross_unit_tests
diff --git a/tests/test_gp.cc b/tests/test_gp.cc
index 9d20115e..ebd39d01 100644
--- a/tests/test_gp.cc
+++ b/tests/test_gp.cc
@@ -12,474 +12,543 @@
 
 #include <gtest/gtest.h>
 
+#include "test_covariance_utils.h"
 #include "test_models.h"
 
-namespace albatross {
-
-/*
- * In what follows we create a small problem which contains unobservable
- * components.  The model consists of a constant term for each nearest
- * integer, and another constant term for all values.  Ie, measurements
- * from the same integer wide bin will share a bias and all measurements
- * will share another bias.  The result is that the model can't
- * differentiate between the global bias and the average of all integer
- * biases (if you add 1 to the global bias and subtract 1 from all interval
- * biases you end up with the same measurements).  This is handled
- * properly by the direct Gaussian process, but if you first make a
- * prediction of each of the biases, then try to use that prediction to
- * make a new model you end up dealing with a low rank system of
- * equations which if not handled properly can lead to very large
- * errors.  This simply makes sure those errors are properly dealt with.
- */
-
-enum InducingFeatureType { ConstantEverywhereType, ConstantPerIntervalType };
-
-struct ConstantEverywhereFeature {};
-
-struct ConstantPerIntervalFeature {
-  ConstantPerIntervalFeature() : location(){};
-  explicit ConstantPerIntervalFeature(const long &location_)
-      : location(location_){};
-  long location;
-};
+#include <albatross/Tune>
 
-using InducingFeature =
-    variant<ConstantEverywhereFeature, ConstantPerIntervalFeature>;
+#include <type_traits>
 
-std::vector<InducingFeature>
-create_inducing_points(const std::vector<double> &features) {
-
-  std::vector<InducingFeature> inducing_points;
-  double min = *std::min_element(features.begin(), features.end());
-  double max = *std::max_element(features.begin(), features.end());
-
-  ConstantEverywhereFeature everywhere;
-  inducing_points.emplace_back(everywhere);
-
-  long interval = lround(min);
-  while (interval <= lround(max)) {
-    inducing_points.emplace_back(ConstantPerIntervalFeature(interval));
-    interval += 1;
-  }
-
-  return inducing_points;
-}
+#include <autodiff/reverse/var.hpp>
+using namespace autodiff;
+//
+namespace albatross {
 
-class ConstantEverywhere : public CovarianceFunction<ConstantEverywhere> {
+class DoubleXX : public CovarianceFunction<DoubleXX> {
 public:
-  ConstantEverywhere(){};
-  ~ConstantEverywhere(){};
-
-  double variance = 10.;
-
-  /*
-   * This will create a covariance matrix that looks like,
-   *     sigma_mean^2 * ones(m, n)
-   * which is saying all observations are perfectly correlated,
-   * so you can move one if you move the rest the same amount.
-   */
-  double _call_impl(const double &x, const double &y) const { return variance; }
-
-  double _call_impl(const ConstantEverywhereFeature &x, const double &y) const {
-    return variance;
-  }
-
-  double _call_impl(const ConstantEverywhereFeature &x,
-                    const ConstantEverywhereFeature &y) const {
-    return variance;
-  }
+  double _call_impl(const X &, const X &) const { return 1.; };
 };
 
-class ConstantPerInterval : public CovarianceFunction<ConstantPerInterval> {
+class AutoXX : public CovarianceFunction<AutoXX> {
 public:
-  ConstantPerInterval(){};
-  ~ConstantPerInterval(){};
-
-  double variance = 5.;
+  AutoXX() : sigma(0.1){};
 
-  /*
-   * This will create a covariance matrix that looks like,
-   *     sigma_mean^2 * ones(m, n)
-   * which is saying all observations are perfectly correlated,
-   * so you can move one if you move the rest the same amount.
-   */
-  double _call_impl(const double &x, const double &y) const {
-    if (lround(x) == lround(y)) {
-      return variance;
-    } else {
-      return 0.;
-    }
-  }
+  ALBATROSS_DECLARE_PARAMS(sigma);
 
-  double _call_impl(const ConstantPerIntervalFeature &x,
-                    const double &y) const {
-    if (x.location == lround(y)) {
-      return variance;
-    } else {
-      return 0.;
-    }
-  }
+  var _call_impl(const X &, const X &) const {
+    return sigma.value * sigma.value;
+  };
 
-  double _call_impl(const ConstantPerIntervalFeature &x,
-                    const ConstantPerIntervalFeature &y) const {
-    if (x.location == y.location) {
-      return variance;
-    } else {
-      return 0.;
-    }
+  ParameterConstReferences get_param_references() const override {
+    const ParameterConstReferences output = {
+        {"sigma", sigma},
+    };
+    return output;
   }
 };
 
-RegressionDataset<double> test_unobservable_dataset() {
-  Eigen::Index k = 10;
-  Eigen::VectorXd mean = 3.14159 * Eigen::VectorXd::Ones(k);
-  Eigen::VectorXd variance = 0.1 * Eigen::VectorXd::Ones(k);
-  MarginalDistribution targets(mean, variance.asDiagonal());
-
-  std::vector<double> train_features;
-  for (Eigen::Index i = 0; i < k; ++i) {
-    train_features.push_back(static_cast<double>(i) * 0.3);
-  }
-
-  RegressionDataset<double> dataset(train_features, targets);
-  return dataset;
-}
-
-auto test_unobservable_model() {
-  ConstantEverywhere constant;
-  ConstantPerInterval per_interval;
-  // First we fit a model directly to the training data and use
-  // that to get a prediction of the inducing points.
-  auto model = gp_from_covariance(constant + per_interval, "unobservable");
-  return model;
-}
-
-TEST(test_gp, test_update_model_trait) {
-  const auto dataset = test_unobservable_dataset();
-
-  auto model = test_unobservable_model();
-  auto fit_model = model.fit(dataset);
-
-  std::vector<int> int_features;
-  for (int i = 0; i < static_cast<int>(dataset.features.size()); ++i) {
-    int_features.push_back(i);
-  }
-  RegressionDataset<int> int_dataset(int_features, dataset.targets);
-
-  auto updated_fit = fit_model.update(int_dataset);
-
-  using UpdatedFitType = decltype(updated_fit);
-  using ExpectedType =
-      typename updated_fit_model_type<decltype(fit_model), int>::type;
-
-  EXPECT_TRUE(bool(std::is_same<UpdatedFitType, ExpectedType>::value));
-}
-
-TEST(test_gp, test_update_model_same_types) {
-  const auto dataset = test_unobservable_dataset();
-
-  std::vector<std::size_t> train_inds = {0, 1, 3, 4, 6, 7, 8, 9};
-  std::vector<std::size_t> test_inds = {2, 5};
-
-  const auto train = albatross::subset(dataset, train_inds);
-  const auto test = albatross::subset(dataset, test_inds);
-
-  std::vector<std::size_t> first_inds = {0, 1, 2, 3, 5, 7};
-  std::vector<std::size_t> second_inds = {4, 6};
-  const auto first = albatross::subset(train, first_inds);
-  const auto second = albatross::subset(train, second_inds);
-
-  const auto model = test_unobservable_model();
-
-  const auto full_model = model.fit(train);
-  const auto full_pred = full_model.predict(test.features).joint();
-
-  const auto first_model = model.fit(first);
-  const auto split_model = update(first_model, second);
-  const auto split_pred = split_model.predict(test.features).joint();
-
-  // Make sure the fit feature type is a double
-  const auto split_fit = split_model.get_fit();
-  bool is_double =
-      std::is_same<typename decltype(split_fit)::Feature, double>::value;
-  EXPECT_TRUE(is_double);
-
-  // Make sure a partial fit, followed by update is the same as a full fit
-  EXPECT_TRUE(split_pred.mean.isApprox(full_pred.mean));
-  EXPECT_LE((split_pred.covariance - full_pred.covariance).norm(), 1e-6);
-
-  // Make sure a partial fit is not the same as a full fit
-  const auto first_pred = first_model.predict(test.features).joint();
-  EXPECT_FALSE(split_pred.mean.isApprox(first_pred.mean));
-  EXPECT_GE((split_pred.covariance - first_pred.covariance).norm(), 1e-6);
-}
-
-TEST(test_gp, test_update_model_different_types) {
-  const auto dataset = test_unobservable_dataset();
-
-  const auto model = test_unobservable_model();
-  const auto fit_model = model.fit(dataset);
-
-  const auto inducing_points = create_inducing_points(dataset.features);
-  MarginalDistribution inducing_prediction =
-      fit_model.predict(inducing_points).marginal();
-
-  inducing_prediction.covariance =
-      (1e-4 * Eigen::VectorXd::Ones(inducing_prediction.mean.size()))
-          .asDiagonal();
-
-  RegressionDataset<InducingFeature> inducing_dataset(inducing_points,
-                                                      inducing_prediction);
-  const auto new_fit_model = update(fit_model, inducing_dataset);
-
-  ConstantPerInterval cov;
-
-  // Make sure the new fit with constrained inducing points reproduces
-  // the prediction of the constraint
-  const auto new_pred = new_fit_model.predict(inducing_points).joint();
-  EXPECT_LE((new_pred.mean - inducing_prediction.mean).norm(), 0.01);
-  // Without changing the prediction of the training features much
-  const auto train_pred = new_fit_model.predict(dataset.features).marginal();
-  EXPECT_LE((train_pred.mean - dataset.targets.mean).norm(), 0.1);
-
-  MarginalDistribution perturbed_inducing_targets(inducing_prediction);
-  perturbed_inducing_targets.mean +=
-      Eigen::VectorXd::Ones(perturbed_inducing_targets.mean.size());
-
-  RegressionDataset<InducingFeature> perturbed_dataset(
-      inducing_points, perturbed_inducing_targets);
-  const auto new_perturbed_model = update(fit_model, perturbed_dataset);
-  const auto perturbed_inducing_pred =
-      new_perturbed_model.predict(inducing_points).marginal();
-  const auto perturbed_train_pred =
-      new_perturbed_model.predict(dataset.features).marginal();
-
-  // Make sure constraining to a different value changes the results.
-  EXPECT_GE((perturbed_inducing_pred.mean - new_pred.mean).norm(), 0.5);
-  EXPECT_GE((perturbed_train_pred.mean - train_pred.mean).norm(), 0.5);
-}
-
-TEST(test_gp, test_model_from_different_datasets) {
-  const auto unobservable_dataset = test_unobservable_dataset();
+var f(var x) { return 1 + x + x * x + 1 / x + log(x); }
 
-  const auto model = test_unobservable_model();
+TEST(test_gp, test_autodiff_cov) {
+  DoubleXX cov;
+  X x;
 
-  const auto fit_model = model.fit(unobservable_dataset);
-  const auto inducing_points =
-      create_inducing_points(unobservable_dataset.features);
-  MarginalDistribution inducing_prediction =
-      fit_model.predict(inducing_points).marginal();
+  const auto cxx = cov(x, x);
+  std::cout << cxx << std::endl;
 
-  // Then we create a new model in which the inducing points are
-  // constrained to be the same as the previous prediction.
-  inducing_prediction.covariance =
-      1e-12 * Eigen::VectorXd::Ones(inducing_prediction.size()).asDiagonal();
-  RegressionDataset<double> dataset(unobservable_dataset);
-  RegressionDataset<InducingFeature> inducing_dataset(inducing_points,
-                                                      inducing_prediction);
-  const auto fit_again = model.fit(dataset, inducing_dataset);
+  AutoXX auto_cov;
 
-  // Then we can make sure that the subsequent constrained predictions are
-  // consistent
-  const auto pred = fit_again.predict(inducing_points).joint();
-  EXPECT_TRUE(inducing_prediction.mean.isApprox(pred.mean));
+  const auto auto_cxx = auto_cov(x, x);
 
-  const auto train_pred =
-      fit_model.predict(unobservable_dataset.features).joint();
-  const auto train_pred_again =
-      fit_again.predict(unobservable_dataset.features).joint();
-  EXPECT_TRUE(train_pred.mean.isApprox(train_pred_again.mean));
-
-  // Now constrain the inducing points to be zero and make sure that
-  // messes things up.
-  inducing_dataset.targets.mean.fill(0.);
-  const auto fit_zero = model.fit(dataset, inducing_dataset);
-  const auto pred_zero = fit_zero.predict(inducing_points).joint();
-
-  EXPECT_FALSE(inducing_dataset.targets.mean.isApprox(pred.mean));
-  EXPECT_LT((inducing_dataset.targets.mean - pred_zero.mean).norm(), 1e-6);
-}
-
-TEST(test_gp, test_model_from_prediction_low_rank) {
-  Eigen::Index k = 10;
-  Eigen::VectorXd mean = 3.14159 * Eigen::VectorXd::Ones(k);
-  Eigen::VectorXd variance = 0.1 * Eigen::VectorXd::Ones(k);
-  MarginalDistribution targets(mean, variance.asDiagonal());
-
-  std::vector<double> train_features;
-  for (Eigen::Index i = 0; i < k; ++i) {
-    train_features.push_back(static_cast<double>(i) * 0.3);
+  for (const auto &pair : auto_cov.get_param_references()) {
+    const auto [dc_ds] = derivatives(auto_cxx, wrt(pair.second.value));
+    std::cout << "dc_d" << pair.first << "  :  " << dc_ds << std::endl;
   }
 
-  ConstantEverywhere constant;
-  ConstantPerInterval per_interval;
-
-  auto model = gp_from_covariance(constant + per_interval, "unobservable");
-  const auto fit_model = model.fit(train_features, targets);
-
-  const auto inducing_points = create_inducing_points(train_features);
-
-  auto joint_prediction = fit_model.predict(inducing_points).joint();
-
-  std::vector<double> perturbed_features = {50.01, 51.01, 52.01};
-
-  const auto model_pred = fit_model.predict(perturbed_features).joint();
-
-  auto joint_prediction_from_prediction =
-      model.fit_from_prediction(inducing_points, joint_prediction)
-          .predict(perturbed_features)
-          .joint();
-
-  EXPECT_TRUE(
-      joint_prediction_from_prediction.mean.isApprox(model_pred.mean, 1e-12));
-  EXPECT_TRUE(joint_prediction_from_prediction.covariance.isApprox(
-      model_pred.covariance, 1e-8));
+  const double foo = static_cast<double>(auto_cxx);
+  std::cout << foo << std::endl;
+  std::cout << auto_cxx << std::endl;
 }
 
-TEST(test_gp, test_unobservablemodel_with_sum_constraint) {
-
-  const auto dataset = test_unobservable_dataset();
-  const auto model = test_unobservable_model();
-
-  const auto inducing_points = create_inducing_points(dataset.features);
-
-  std::vector<ConstantPerIntervalFeature> interval_features;
-  for (const auto &f : inducing_points) {
-    if (f.is<ConstantPerIntervalFeature>()) {
-      interval_features.emplace_back(f.get<ConstantPerIntervalFeature>());
-    }
-  }
+TEST(test_gp, test_autodiff) {
+  var x = 2.0;  // the input variable x
+  var u = f(x); // the output variable u
 
-  LinearCombination<ConstantPerIntervalFeature> sums(interval_features);
+  auto [ux] =
+      derivatives(u, wrt(x)); // evaluate the derivative of u with respect to x
 
-  Eigen::VectorXd mean = Eigen::VectorXd::Zero(1);
-  Eigen::VectorXd variance = 1e-5 * Eigen::VectorXd::Ones(1);
-  MarginalDistribution targets(mean, variance.asDiagonal());
-  RegressionDataset<LinearCombination<ConstantPerIntervalFeature>> sum_dataset(
-      {sums}, targets);
-
-  const auto both = concatenate_datasets(dataset, sum_dataset);
-
-  const auto fit_model = model.fit(both);
-
-  const auto pred = fit_model.predict(interval_features).joint();
-
-  const auto ones = Eigen::VectorXd::Ones(pred.mean.size());
-  EXPECT_NEAR(ones.dot(pred.mean), 0., 1e-6);
-  EXPECT_NEAR(ones.dot(pred.covariance * ones), 0., 1e-5);
+  std::cout << "u = " << u
+            << std::endl; // print the evaluated output variable u
+  std::cout << "ux = " << ux << std::endl; // print the evaluated derivative ux
 }
 
-TEST(test_gp, test_unobservablemodel_with_diff_constraint) {
-
-  const auto dataset = test_unobservable_dataset();
-  const auto model = test_unobservable_model();
-
-  const auto inducing_points = create_inducing_points(dataset.features);
-
-  std::vector<ConstantPerIntervalFeature> interval_features;
-  for (const auto &f : inducing_points) {
-    if (f.is<ConstantPerIntervalFeature>()) {
-      interval_features.emplace_back(f.get<ConstantPerIntervalFeature>());
-    }
-  }
-
-  std::vector<ConstantPerIntervalFeature> diff_features = {
-      interval_features[0], interval_features[1]};
-
-  Eigen::Vector2d diff_coefs;
-  diff_coefs << 1, -1;
-
-  LinearCombination<ConstantPerIntervalFeature> difference(diff_features,
-                                                           diff_coefs);
-
-  Eigen::VectorXd mean = Eigen::VectorXd::Zero(1);
-  Eigen::VectorXd variance = 1e-5 * Eigen::VectorXd::Ones(1);
-  MarginalDistribution targets(mean, variance.asDiagonal());
-  RegressionDataset<LinearCombination<ConstantPerIntervalFeature>> diff_dataset(
-      {difference}, targets);
-
-  const auto both = concatenate_datasets(dataset, diff_dataset);
-
-  const auto fit_model = model.fit(both);
-
-  const auto pred = fit_model.predict(diff_features).joint();
-
-  EXPECT_NEAR(diff_coefs.dot(pred.mean), 0., 1e-6);
-  EXPECT_NEAR(diff_coefs.dot(pred.covariance * diff_coefs), 0., 1e-5);
-}
-
-TEST(test_gp, test_nonzero_mean) {
-
-  MakeGaussianProcessWithMean gp_with_mean_case;
-
-  const auto dataset = gp_with_mean_case.get_dataset();
-  const auto model = gp_with_mean_case.get_model();
-
-  const auto fit_model = model.fit(dataset);
-
-  const auto covariance_function = model.get_covariance();
-  const auto measurement_features = as_measurements(dataset.features);
-  const auto train_cov = covariance_function(measurement_features);
-  const auto train_ldlt = train_cov.ldlt();
-  const Eigen::VectorXd information = train_ldlt.solve(dataset.targets.mean);
-
-  const std::vector<double> prediction_features = {-20., 0.01};
-
-  const auto cross = covariance_function(dataset.features, prediction_features);
-  const auto prior = covariance_function(prediction_features);
-  const auto pred_without_mean =
-      gp_joint_prediction(cross, prior, information, train_ldlt);
-
-  const auto actual = fit_model.predict(prediction_features).joint();
-
-  // The prediction without using the mean should be very different.
-  EXPECT_GT((pred_without_mean.mean - actual.mean).norm(), 1.);
-}
-
-TEST(test_gp, test_get_prior) {
-
-  MakeGaussianProcessWithMean gp_with_mean_case;
-
-  const auto dataset = gp_with_mean_case.get_dataset();
-  const auto model = gp_with_mean_case.get_model();
-
-  const auto prior = model.prior(dataset.features);
-
-  const auto cov = model.get_covariance();
-  EXPECT_EQ(prior.covariance, cov(as_measurements(dataset.features)));
-
-  const auto mean = model.get_mean()(as_measurements(dataset.features));
-  EXPECT_EQ(prior.mean, mean);
-}
-
-TEST(test_gp, test_predict_with_complicated_feature) {
-
-  MakeGaussianProcessWithMean gp_with_mean_case;
-
-  const auto dataset = gp_with_mean_case.get_dataset();
-  const auto model = gp_with_mean_case.get_model();
-
-  using ComplexType =
-      variant<double, Eigen::VectorXd,
-              LinearCombination<variant<double, Eigen::VectorXd>>>;
-
-  const auto prior = model.prior(dataset.features);
-
-  std::vector<variant<double, Eigen::VectorXd>> vec = {1.0, 2.0};
-  LinearCombination<variant<double, Eigen::VectorXd>> sum(vec);
-  std::vector<ComplexType> features = {1.0, 2.0, sum};
-
-  const auto pred = model.fit(dataset).predict(features).joint();
-
-  Eigen::Vector3d for_sum;
-  for_sum << 1., 1., 0;
-
-  const double expected_mean = for_sum.transpose() * pred.mean;
-  const double expected_variance =
-      for_sum.transpose() * pred.covariance * for_sum;
-
-  EXPECT_NEAR(expected_mean, pred.mean[2], 1e-6);
-  EXPECT_NEAR(expected_variance, pred.covariance(2, 2), 1e-6);
-}
+///*
+// * In what follows we create a small problem which contains unobservable
+// * components.  The model consists of a constant term for each nearest
+// * integer, and another constant term for all values.  Ie, measurements
+// * from the same integer wide bin will share a bias and all measurements
+// * will share another bias.  The result is that the model can't
+// * differentiate between the global bias and the average of all integer
+// * biases (if you add 1 to the global bias and subtract 1 from all interval
+// * biases you end up with the same measurements).  This is handled
+// * properly by the direct Gaussian process, but if you first make a
+// * prediction of each of the biases, then try to use that prediction to
+// * make a new model you end up dealing with a low rank system of
+// * equations which if not handled properly can lead to very large
+// * errors.  This simply makes sure those errors are properly dealt with.
+// */
+//
+// enum InducingFeatureType { ConstantEverywhereType, ConstantPerIntervalType };
+//
+// struct ConstantEverywhereFeature {};
+//
+// struct ConstantPerIntervalFeature {
+//  ConstantPerIntervalFeature() : location(){};
+//  explicit ConstantPerIntervalFeature(const long &location_)
+//      : location(location_){};
+//  long location;
+//};
+//
+// using InducingFeature =
+//    variant<ConstantEverywhereFeature, ConstantPerIntervalFeature>;
+//
+// std::vector<InducingFeature>
+// create_inducing_points(const std::vector<double> &features) {
+//
+//  std::vector<InducingFeature> inducing_points;
+//  double min = *std::min_element(features.begin(), features.end());
+//  double max = *std::max_element(features.begin(), features.end());
+//
+//  ConstantEverywhereFeature everywhere;
+//  inducing_points.emplace_back(everywhere);
+//
+//  long interval = lround(min);
+//  while (interval <= lround(max)) {
+//    inducing_points.emplace_back(ConstantPerIntervalFeature(interval));
+//    interval += 1;
+//  }
+//
+//  return inducing_points;
+//}
+//
+// class ConstantEverywhere : public CovarianceFunction<ConstantEverywhere> {
+// public:
+//  ConstantEverywhere(){};
+//  ~ConstantEverywhere(){};
+//
+//  double variance = 10.;
+//
+//  /*
+//   * This will create a covariance matrix that looks like,
+//   *     sigma_mean^2 * ones(m, n)
+//   * which is saying all observations are perfectly correlated,
+//   * so you can move one if you move the rest the same amount.
+//   */
+//  double _call_impl(const double &x, const double &y) const { return variance;
+//  }
+//
+//  double _call_impl(const ConstantEverywhereFeature &x, const double &y) const
+//  {
+//    return variance;
+//  }
+//
+//  double _call_impl(const ConstantEverywhereFeature &x,
+//                    const ConstantEverywhereFeature &y) const {
+//    return variance;
+//  }
+//};
+//
+// class ConstantPerInterval : public CovarianceFunction<ConstantPerInterval> {
+// public:
+//  ConstantPerInterval(){};
+//  ~ConstantPerInterval(){};
+//
+//  double variance = 5.;
+//
+//  /*
+//   * This will create a covariance matrix that looks like,
+//   *     sigma_mean^2 * ones(m, n)
+//   * which is saying all observations are perfectly correlated,
+//   * so you can move one if you move the rest the same amount.
+//   */
+//  double _call_impl(const double &x, const double &y) const {
+//    if (lround(x) == lround(y)) {
+//      return variance;
+//    } else {
+//      return 0.;
+//    }
+//  }
+//
+//  double _call_impl(const ConstantPerIntervalFeature &x,
+//                    const double &y) const {
+//    if (x.location == lround(y)) {
+//      return variance;
+//    } else {
+//      return 0.;
+//    }
+//  }
+//
+//  double _call_impl(const ConstantPerIntervalFeature &x,
+//                    const ConstantPerIntervalFeature &y) const {
+//    if (x.location == y.location) {
+//      return variance;
+//    } else {
+//      return 0.;
+//    }
+//  }
+//};
+//
+// RegressionDataset<double> test_unobservable_dataset() {
+//  Eigen::Index k = 10;
+//  Eigen::VectorXd mean = 3.14159 * Eigen::VectorXd::Ones(k);
+//  Eigen::VectorXd variance = 0.1 * Eigen::VectorXd::Ones(k);
+//  MarginalDistribution targets(mean, variance.asDiagonal());
+//
+//  std::vector<double> train_features;
+//  for (Eigen::Index i = 0; i < k; ++i) {
+//    train_features.push_back(static_cast<double>(i) * 0.3);
+//  }
+//
+//  RegressionDataset<double> dataset(train_features, targets);
+//  return dataset;
+//}
+//
+// auto test_unobservable_model() {
+//  ConstantEverywhere constant;
+//  ConstantPerInterval per_interval;
+//  // First we fit a model directly to the training data and use
+//  // that to get a prediction of the inducing points.
+//  auto model = gp_from_covariance(constant + per_interval, "unobservable");
+//  return model;
+//}
+//
+// TEST(test_gp, test_update_model_trait) {
+//  const auto dataset = test_unobservable_dataset();
+//
+//  auto model = test_unobservable_model();
+//  auto fit_model = model.fit(dataset);
+//
+//  std::vector<int> int_features;
+//  for (int i = 0; i < static_cast<int>(dataset.features.size()); ++i) {
+//    int_features.push_back(i);
+//  }
+//  RegressionDataset<int> int_dataset(int_features, dataset.targets);
+//
+//  auto updated_fit = fit_model.update(int_dataset);
+//
+//  using UpdatedFitType = decltype(updated_fit);
+//  using ExpectedType =
+//      typename updated_fit_model_type<decltype(fit_model), int>::type;
+//
+//  EXPECT_TRUE(bool(std::is_same<UpdatedFitType, ExpectedType>::value));
+//}
+//
+// TEST(test_gp, test_update_model_same_types) {
+//  const auto dataset = test_unobservable_dataset();
+//
+//  std::vector<std::size_t> train_inds = {0, 1, 3, 4, 6, 7, 8, 9};
+//  std::vector<std::size_t> test_inds = {2, 5};
+//
+//  const auto train = albatross::subset(dataset, train_inds);
+//  const auto test = albatross::subset(dataset, test_inds);
+//
+//  std::vector<std::size_t> first_inds = {0, 1, 2, 3, 5, 7};
+//  std::vector<std::size_t> second_inds = {4, 6};
+//  const auto first = albatross::subset(train, first_inds);
+//  const auto second = albatross::subset(train, second_inds);
+//
+//  const auto model = test_unobservable_model();
+//
+//  const auto full_model = model.fit(train);
+//  const auto full_pred = full_model.predict(test.features).joint();
+//
+//  const auto first_model = model.fit(first);
+//  const auto split_model = update(first_model, second);
+//  const auto split_pred = split_model.predict(test.features).joint();
+//
+//  // Make sure the fit feature type is a double
+//  const auto split_fit = split_model.get_fit();
+//  bool is_double =
+//      std::is_same<typename decltype(split_fit)::Feature, double>::value;
+//  EXPECT_TRUE(is_double);
+//
+//  // Make sure a partial fit, followed by update is the same as a full fit
+//  EXPECT_TRUE(split_pred.mean.isApprox(full_pred.mean));
+//  EXPECT_LE((split_pred.covariance - full_pred.covariance).norm(), 1e-6);
+//
+//  // Make sure a partial fit is not the same as a full fit
+//  const auto first_pred = first_model.predict(test.features).joint();
+//  EXPECT_FALSE(split_pred.mean.isApprox(first_pred.mean));
+//  EXPECT_GE((split_pred.covariance - first_pred.covariance).norm(), 1e-6);
+//}
+//
+// TEST(test_gp, test_update_model_different_types) {
+//  const auto dataset = test_unobservable_dataset();
+//
+//  const auto model = test_unobservable_model();
+//  const auto fit_model = model.fit(dataset);
+//
+//  const auto inducing_points = create_inducing_points(dataset.features);
+//  MarginalDistribution inducing_prediction =
+//      fit_model.predict(inducing_points).marginal();
+//
+//  inducing_prediction.covariance =
+//      (1e-4 * Eigen::VectorXd::Ones(inducing_prediction.mean.size()))
+//          .asDiagonal();
+//
+//  RegressionDataset<InducingFeature> inducing_dataset(inducing_points,
+//                                                      inducing_prediction);
+//  const auto new_fit_model = update(fit_model, inducing_dataset);
+//
+//  ConstantPerInterval cov;
+//
+//  // Make sure the new fit with constrained inducing points reproduces
+//  // the prediction of the constraint
+//  const auto new_pred = new_fit_model.predict(inducing_points).joint();
+//  EXPECT_LE((new_pred.mean - inducing_prediction.mean).norm(), 0.01);
+//  // Without changing the prediction of the training features much
+//  const auto train_pred = new_fit_model.predict(dataset.features).marginal();
+//  EXPECT_LE((train_pred.mean - dataset.targets.mean).norm(), 0.1);
+//
+//  MarginalDistribution perturbed_inducing_targets(inducing_prediction);
+//  perturbed_inducing_targets.mean +=
+//      Eigen::VectorXd::Random(perturbed_inducing_targets.mean.size());
+//
+//  RegressionDataset<InducingFeature> perturbed_dataset(
+//      inducing_points, perturbed_inducing_targets);
+//  const auto new_perturbed_model = update(fit_model, perturbed_dataset);
+//  const auto perturbed_inducing_pred =
+//      new_perturbed_model.predict(inducing_points).marginal();
+//  const auto perturbed_train_pred =
+//      new_perturbed_model.predict(dataset.features).marginal();
+//
+//  // Make sure constraining to a different value changes the results.
+//  EXPECT_GE((perturbed_inducing_pred.mean - new_pred.mean).norm(), 0.5);
+//  EXPECT_GE((perturbed_train_pred.mean - train_pred.mean).norm(), 0.5);
+//}
+//
+// TEST(test_gp, test_model_from_different_datasets) {
+//  const auto unobservable_dataset = test_unobservable_dataset();
+//
+//  const auto model = test_unobservable_model();
+//
+//  const auto fit_model = model.fit(unobservable_dataset);
+//  const auto inducing_points =
+//      create_inducing_points(unobservable_dataset.features);
+//  MarginalDistribution inducing_prediction =
+//      fit_model.predict(inducing_points).marginal();
+//
+//  // Then we create a new model in which the inducing points are
+//  // constrained to be the same as the previous prediction.
+//  inducing_prediction.covariance =
+//      1e-12 * Eigen::VectorXd::Ones(inducing_prediction.size()).asDiagonal();
+//  RegressionDataset<double> dataset(unobservable_dataset);
+//  RegressionDataset<InducingFeature> inducing_dataset(inducing_points,
+//                                                      inducing_prediction);
+//  const auto fit_again = model.fit(dataset, inducing_dataset);
+//
+//  // Then we can make sure that the subsequent constrained predictions are
+//  // consistent
+//  const auto pred = fit_again.predict(inducing_points).joint();
+//  EXPECT_TRUE(inducing_prediction.mean.isApprox(pred.mean));
+//
+//  const auto train_pred =
+//      fit_model.predict(unobservable_dataset.features).joint();
+//  const auto train_pred_again =
+//      fit_again.predict(unobservable_dataset.features).joint();
+//  EXPECT_TRUE(train_pred.mean.isApprox(train_pred_again.mean));
+//
+//  // Now constrain the inducing points to be zero and make sure that
+//  // messes things up.
+//  inducing_dataset.targets.mean.fill(0.);
+//  const auto fit_zero = model.fit(dataset, inducing_dataset);
+//  const auto pred_zero = fit_zero.predict(inducing_points).joint();
+//
+//  EXPECT_FALSE(inducing_dataset.targets.mean.isApprox(pred.mean));
+//  EXPECT_LT((inducing_dataset.targets.mean - pred_zero.mean).norm(), 1e-6);
+//}
+//
+// TEST(test_gp, test_model_from_prediction_low_rank) {
+//  Eigen::Index k = 10;
+//  Eigen::VectorXd mean = 3.14159 * Eigen::VectorXd::Ones(k);
+//  Eigen::VectorXd variance = 0.1 * Eigen::VectorXd::Ones(k);
+//  MarginalDistribution targets(mean, variance.asDiagonal());
+//
+//  std::vector<double> train_features;
+//  for (Eigen::Index i = 0; i < k; ++i) {
+//    train_features.push_back(static_cast<double>(i) * 0.3);
+//  }
+//
+//  ConstantEverywhere constant;
+//  ConstantPerInterval per_interval;
+//
+//  auto model = gp_from_covariance(constant + per_interval, "unobservable");
+//  const auto fit_model = model.fit(train_features, targets);
+//
+//  const auto inducing_points = create_inducing_points(train_features);
+//
+//  auto joint_prediction = fit_model.predict(inducing_points).joint();
+//
+//  std::vector<double> perturbed_features = {50.01, 51.01, 52.01};
+//
+//  const auto model_pred = fit_model.predict(perturbed_features).joint();
+//
+//  auto joint_prediction_from_prediction =
+//      model.fit_from_prediction(inducing_points, joint_prediction)
+//          .predict(perturbed_features)
+//          .joint();
+//
+//  EXPECT_TRUE(
+//      joint_prediction_from_prediction.mean.isApprox(model_pred.mean, 1e-12));
+//  EXPECT_TRUE(joint_prediction_from_prediction.covariance.isApprox(
+//      model_pred.covariance, 1e-8));
+//}
+//
+// TEST(test_gp, test_unobservablemodel_with_sum_constraint) {
+//
+//  const auto dataset = test_unobservable_dataset();
+//  const auto model = test_unobservable_model();
+//
+//  const auto inducing_points = create_inducing_points(dataset.features);
+//
+//  std::vector<ConstantPerIntervalFeature> interval_features;
+//  for (const auto &f : inducing_points) {
+//    if (f.is<ConstantPerIntervalFeature>()) {
+//      interval_features.emplace_back(f.get<ConstantPerIntervalFeature>());
+//    }
+//  }
+//
+//  LinearCombination<ConstantPerIntervalFeature> sums(interval_features);
+//
+//  Eigen::VectorXd mean = Eigen::VectorXd::Zero(1);
+//  Eigen::VectorXd variance = 1e-5 * Eigen::VectorXd::Ones(1);
+//  MarginalDistribution targets(mean, variance.asDiagonal());
+//  RegressionDataset<LinearCombination<ConstantPerIntervalFeature>>
+//  sum_dataset(
+//      {sums}, targets);
+//
+//  const auto both = concatenate_datasets(dataset, sum_dataset);
+//
+//  const auto fit_model = model.fit(both);
+//
+//  const auto pred = fit_model.predict(interval_features).joint();
+//
+//  const auto ones = Eigen::VectorXd::Ones(pred.mean.size());
+//  EXPECT_NEAR(ones.dot(pred.mean), 0., 1e-6);
+//  EXPECT_NEAR(ones.dot(pred.covariance * ones), 0., 1e-5);
+//}
+//
+// TEST(test_gp, test_unobservablemodel_with_diff_constraint) {
+//
+//  const auto dataset = test_unobservable_dataset();
+//  const auto model = test_unobservable_model();
+//
+//  const auto inducing_points = create_inducing_points(dataset.features);
+//
+//  std::vector<ConstantPerIntervalFeature> interval_features;
+//  for (const auto &f : inducing_points) {
+//    if (f.is<ConstantPerIntervalFeature>()) {
+//      interval_features.emplace_back(f.get<ConstantPerIntervalFeature>());
+//    }
+//  }
+//
+//  std::vector<ConstantPerIntervalFeature> diff_features = {
+//      interval_features[0], interval_features[1]};
+//
+//  Eigen::Vector2d diff_coefs;
+//  diff_coefs << 1, -1;
+//
+//  LinearCombination<ConstantPerIntervalFeature> difference(diff_features,
+//                                                           diff_coefs);
+//
+//  Eigen::VectorXd mean = Eigen::VectorXd::Zero(1);
+//  Eigen::VectorXd variance = 1e-5 * Eigen::VectorXd::Ones(1);
+//  MarginalDistribution targets(mean, variance.asDiagonal());
+//  RegressionDataset<LinearCombination<ConstantPerIntervalFeature>>
+//  diff_dataset(
+//      {difference}, targets);
+//
+//  const auto both = concatenate_datasets(dataset, diff_dataset);
+//
+//  const auto fit_model = model.fit(both);
+//
+//  const auto pred = fit_model.predict(diff_features).joint();
+//
+//  EXPECT_NEAR(diff_coefs.dot(pred.mean), 0., 1e-6);
+//  EXPECT_NEAR(diff_coefs.dot(pred.covariance * diff_coefs), 0., 1e-5);
+//}
+//
+// TEST(test_gp, test_nonzero_mean) {
+//
+//  MakeGaussianProcessWithMean gp_with_mean_case;
+//
+//  const auto dataset = gp_with_mean_case.get_dataset();
+//  const auto model = gp_with_mean_case.get_model();
+//
+//  const auto fit_model = model.fit(dataset);
+//
+//  const auto covariance_function = model.get_covariance();
+//  const auto measurement_features = as_measurements(dataset.features);
+//  const auto train_cov = covariance_function(measurement_features);
+//  const auto train_ldlt = train_cov.ldlt();
+//  const Eigen::VectorXd information = train_ldlt.solve(dataset.targets.mean);
+//
+//  const std::vector<double> prediction_features = {-20., 0.01};
+//
+//  const auto cross = covariance_function(dataset.features,
+//  prediction_features); const auto prior =
+//  covariance_function(prediction_features); const auto pred_without_mean =
+//      gp_joint_prediction(cross, prior, information, train_ldlt);
+//
+//  const auto actual = fit_model.predict(prediction_features).joint();
+//
+//  // The prediction without using the mean should be very different.
+//  EXPECT_GT((pred_without_mean.mean - actual.mean).norm(), 1.);
+//}
+//
+// TEST(test_gp, test_get_prior) {
+//
+//  MakeGaussianProcessWithMean gp_with_mean_case;
+//
+//  const auto dataset = gp_with_mean_case.get_dataset();
+//  const auto model = gp_with_mean_case.get_model();
+//
+//  const auto prior = model.prior(dataset.features);
+//
+//  const auto cov = model.get_covariance();
+//  EXPECT_EQ(prior.covariance, cov(as_measurements(dataset.features)));
+//
+//  const auto mean = model.get_mean()(as_measurements(dataset.features));
+//  EXPECT_EQ(prior.mean, mean);
+//}
+//
+// TEST(test_gp, test_gradient) {
+//
+//  MakeGaussianProcess gp_test_case;
+//
+//  const auto dataset = gp_test_case.get_dataset();
+//  const auto model = gp_test_case.get_model();
+//
+//  auto eval_likelihood = [&](const auto &ps) {
+//    auto perturbed_model = gp_test_case.get_model();
+//    perturbed_model.set_params(ps);
+//    return perturbed_model.log_likelihood(dataset);
+//  };
+//
+//  const double f_val = eval_likelihood(model.get_params());
+//  const auto grad =
+//      compute_gradient(eval_likelihood, model.get_params(), f_val);
+//
+//  std::cout << "grad" << std::endl;
+//  for (const double &x : grad) {
+//    std::cout << "  " << x;
+//  }
+//  std::cout << std::endl;
+//
+//  Eigen::VectorXd gradient;
+//  model.log_likelihood(dataset, &gradient);
+//
+//  std::cout << gradient.transpose() << std::endl;
+//}
 
 } // namespace albatross
diff --git a/tests/test_utils.h b/tests/test_utils.h
index 59c460cd..b4820228 100644
--- a/tests/test_utils.h
+++ b/tests/test_utils.h
@@ -76,7 +76,7 @@ static inline void
 expect_parameter_vector_equal(const std::vector<ParameterValue> &x,
                               const std::vector<ParameterValue> &y) {
   for (std::size_t i = 0; i < x.size(); i++) {
-    EXPECT_DOUBLE_EQ(x[i], y[i]);
+    EXPECT_DOUBLE_EQ(static_cast<double>(x[i]), static_cast<double>(y[i]));
   }
   EXPECT_EQ(x.size(), y.size());
 }
diff --git a/third_party/autodiff b/third_party/autodiff
new file mode 160000
index 00000000..a43a79fa
--- /dev/null
+++ b/third_party/autodiff
@@ -0,0 +1 @@
+Subproject commit a43a79fa5af53532a9a75067643c74ed726b3b8c