From 31477b9e71874570a460dd305f9687a72ec9904e Mon Sep 17 00:00:00 2001 From: kleeman Date: Mon, 4 Mar 2019 16:09:17 -0800 Subject: [PATCH 1/3] Adds the core functionality of albatross using CRTP. Includes: - ModelBase (formerly RegressionModel) - Prediction - FitModel - An example implementation of LeastSquares. --- albatross/core/declarations.h | 77 +++++--- albatross/core/fit_model.h | 47 +++++ albatross/core/model.h | 318 ++++++++----------------------- albatross/core/prediction.h | 144 ++++++++++++++ albatross/models/least_squares.h | 133 ++++++++----- 5 files changed, 406 insertions(+), 313 deletions(-) create mode 100644 albatross/core/fit_model.h create mode 100644 albatross/core/prediction.h diff --git a/albatross/core/declarations.h b/albatross/core/declarations.h index a7e55c11..ba220be0 100644 --- a/albatross/core/declarations.h +++ b/albatross/core/declarations.h @@ -13,13 +13,6 @@ #ifndef ALBATROSS_CORE_DECLARATIONS_H #define ALBATROSS_CORE_DECLARATIONS_H -#include -#include -#include -#include - -#include - namespace Eigen { template @@ -31,15 +24,32 @@ namespace albatross { /* * Model */ -template class RegressionModel; +template class ModelBase; + template struct RegressionDataset; -template struct RegressionFold; -template -class SerializableRegressionModel; -template -using RegressionModelCreator = - std::function>()>; +template struct PredictTypeIdentity; + +template class Prediction; + +template class FitModel; + +template class Fit {}; + +/* + * Parameter Handling + */ +class Prior; +struct Parameter; + +using ParameterKey = std::string; +// If you change the way these are stored, be sure there's +// a corresponding cereal type included or you'll get some +// really impressive compilation errors. +using ParameterPrior = std::shared_ptr; +using ParameterValue = double; + +using ParameterStore = std::map; /* * Distributions @@ -51,27 +61,40 @@ using DiagonalMatrixXd = Eigen::SerializableDiagonalMatrix; using MarginalDistribution = Distribution; +/* + * Models + */ +template +class GaussianProcessBase; + +template +class GaussianProcessRegression; + +struct NullLeastSquaresImpl {}; + +template +class LeastSquares; + + + /* * Cross Validation */ -using FoldIndices = std::vector; -using FoldName = std::string; -using FoldIndexer = std::map; -template -using IndexerFunction = + using FoldIndices = std::vector; + using FoldName = std::string; + using FoldIndexer = std::map; + + template + using IndexerFunction = std::function &)>; + template + class CrossValidation; + /* * RANSAC */ -template class GenericRansac; -template -std::unique_ptr> -make_generic_ransac_model(ModelType *model, double inlier_threshold, - std::size_t min_inliers, - std::size_t random_sample_size, - std::size_t max_iterations, - const IndexerFunction &indexer_function); + template class Ransac; } #endif diff --git a/albatross/core/fit_model.h b/albatross/core/fit_model.h new file mode 100644 index 00000000..51bebbe9 --- /dev/null +++ b/albatross/core/fit_model.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2018 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CORE_FIT_H +#define ALBATROSS_CORE_FIT_H + +namespace albatross { + +template +class FitModel { + + template + friend class Prediction; + + public: + + static_assert(std::is_move_constructible::value, + "Fit type must be move constructible to avoid unexpected copying."); + + FitModel(const ModelType &model, + const Fit &&fit) + : model_(model), fit_(std::move(fit)) {} + + public: + + template + Prediction + get_prediction(const std::vector &features) const { + return Prediction(*this, features); + } + + const ModelType &model_; + const Fit fit_; + +}; + +} +#endif diff --git a/albatross/core/model.h b/albatross/core/model.h index 42b98054..08b7a460 100644 --- a/albatross/core/model.h +++ b/albatross/core/model.h @@ -13,266 +13,110 @@ #ifndef ALBATROSS_CORE_MODEL_H #define ALBATROSS_CORE_MODEL_H -#include "core/dataset.h" -#include "core/indexing.h" -#include "core/parameter_handling_mixin.h" -#include "map_utils.h" -#include "traits.h" -#include -#include -#include -#include - namespace albatross { -namespace detail { -// This is effectively just a container that allows us to develop methods -// which behave different conditional on the type of predictions desired. -template struct PredictTypeIdentity { typedef T type; }; -} - -// This can be used to make intentions more obvious when calling -// predict variants for which you only want the mean. -using PredictMeanOnly = Eigen::VectorXd; using Insights = std::map; -/* - * A model that uses a single Feature to estimate the value of a double typed - * target. - */ -template -class RegressionModel : public ParameterHandlingMixin { -public: - using Feature = FeatureType; - RegressionModel() : ParameterHandlingMixin(), has_been_fit_(){}; - virtual ~RegressionModel(){}; - - virtual bool operator==(const RegressionModel &other) const { - // If the fit method has been called it's possible that some unknown - // class members may have been modified. As such, if a model has been - // fit we fail hard to avoid possibly unexpected behavior. Any - // implementation that wants a functional equality operator after - // having been fit will need to override this one. - assert(!has_been_fit()); - return (get_name() == other.get_name() && - get_params() == other.get_params() && - has_been_fit() == other.has_been_fit()); - } - - /* - * Provides a wrapper around the implementation `fit_` which performs - * simple size checks and makes sure the fit method is called before - * predict. - */ - void fit(const std::vector &features, - const MarginalDistribution &targets) { - assert(features.size() > 0); - assert(features.size() == static_cast(targets.size())); - has_been_fit_ = true; - insights_["input_feature_count"] = std::to_string(features.size()); - fit_(features, targets); - } - - /* - * Convenience function which assumes zero target covariance. - */ - void fit(const std::vector &features, - const Eigen::VectorXd &targets) { - return fit(features, MarginalDistribution(targets)); - } - - /* - * Convenience function which unpacks a dataset into features and targets. - */ - void fit(const RegressionDataset &dataset) { - return fit(dataset.features, dataset.targets); - } - - /* - * Similar to fit, this predict methods wrap the implementation `predict_*_` - * and makes simple checks to confirm the implementation is returning - * properly sized Distribution. - */ - template - PredictType predict(const std::vector &features) const { - return predict(features, detail::PredictTypeIdentity()); - } - - template - PredictType predict(const FeatureType &feature) const { - std::vector features = {feature}; - return predict(features); - } - - template - std::vector - cross_validated_predictions(const RegressionDataset &dataset, - const FoldIndexer &fold_indexer) { - return cross_validated_predictions_( - dataset, fold_indexer, detail::PredictTypeIdentity()); - } - - // Because cross validation can never properly produce a full - // joint distribution it is common to only use the marginal - // predictions, hence the different default from predict. - template - std::vector cross_validated_predictions( - const std::vector> &folds) { - // Iteratively make predictions and assemble the output vector - std::vector predictions; - for (std::size_t i = 0; i < folds.size(); i++) { - fit(folds[i].train_dataset); - predictions.push_back( - predict(folds[i].test_dataset.features)); - } - return predictions; - } +template class ModelBase : public ParameterHandlingMixin { - std::string pretty_string() const { - std::ostringstream ss; - ss << get_name() << std::endl; - ss << ParameterHandlingMixin::pretty_string(); - return ss.str(); - } + template + friend class Prediction; - virtual bool has_been_fit() const { return has_been_fit_; } + template + friend class fit_model_type; - virtual std::string get_name() const = 0; + template + friend struct fit_type_from_fit_model_type; - virtual Insights get_insights() const { return insights_; } + template + friend struct fit_type; - virtual void add_insights(const Insights &insights) { - for (const auto &insight : insights) { - insights_[insight.first] = insight.second; - } - }; - - virtual std::unique_ptr> - ransac_model(double inlier_threshold, std::size_t min_inliers, - std::size_t random_sample_size, std::size_t max_iterations) { - static_assert( - is_complete>::value, - "ransac methods aren't complete yet, be sure you've included ransac.h"); - return make_generic_ransac_model( - this, inlier_threshold, min_inliers, random_sample_size, max_iterations, - leave_one_out_indexer); - } + private: + // Declaring these private makes it impossible to accidentally do things like: + // class A : public ModelBase {} + // or + // using A = ModelBase; + // + // which if unchecked can lead to some very strange behavior. + ModelBase() : insights_(){}; + friend ModelType; + Insights insights_; /* - * Here we define the serialization routines. Note that while in most - * cases we could use the cereal method `serialize`, in this case we don't - * know for sure where the parameters are stored. The - * GaussianProcessRegression - * model, for example, derives its parameters from its covariance function, - * so it's `params_` are actually empty. As a result we need to use the - * save/load cereal variant and deal with parameters through the get/set - * interface. + * Fit */ - template void save(Archive &archive) const { - auto params = get_params(); - archive(cereal::make_nvp("parameters", params)); - archive(cereal::make_nvp("has_been_fit", has_been_fit_)); - } - - template void load(Archive &archive) { - auto params = get_params(); - archive(cereal::make_nvp("parameters", params)); - archive(cereal::make_nvp("has_been_fit", has_been_fit_)); - set_params(params); - } + template < + typename FeatureType, + typename std::enable_if::value, + int>::type = 0> + auto + fit_(const std::vector &features, + const MarginalDistribution &targets) const { + const auto fit = derived().fit(features, targets); + return FitModel(derived(), std::move(fit)); + } + + template ::value && + !has_valid_fit::value, + int>::type = 0> + FitModel + fit_(const std::vector &features, + const MarginalDistribution &targets) const = delete; // Invalid fit_impl_ + + template ::value && + !has_valid_fit::value, + int>::type = 0> + FitModel + fit_(const std::vector &features, + const MarginalDistribution &targets) const = delete; // No fit_impl_ found. + + template ::value, + int>::type = 0> + PredictType predict_(const std::vector &features, + const FitType &fit, + PredictTypeIdentity &&) const { + return derived().predict(features, fit, PredictTypeIdentity()); + } + + template ::value, + int>::type = 0> + PredictType predict_(const std::vector &features, + const FitType &fit, + PredictTypeIdentity &&) const = delete; // No valid predict. -protected: - virtual void fit_(const std::vector &features, - const MarginalDistribution &targets) = 0; /* - * Predict specializations + * CRTP Helpers */ - - JointDistribution - predict(const std::vector &features, - detail::PredictTypeIdentity &&) const { - assert(has_been_fit()); - JointDistribution preds = predict_(features); - assert(static_cast(preds.mean.size()) == features.size()); - return preds; + ModelType &derived() { return *static_cast(this); } + const ModelType &derived() const { + return *static_cast(this); } - MarginalDistribution - predict(const std::vector &features, - detail::PredictTypeIdentity &&) const { - assert(has_been_fit()); - MarginalDistribution preds = predict_marginal_(features); - assert(static_cast(preds.mean.size()) == features.size()); - return preds; - } - - Eigen::VectorXd - predict(const std::vector &features, - detail::PredictTypeIdentity &&) const { - assert(has_been_fit()); - Eigen::VectorXd preds = predict_mean_(features); - assert(static_cast(preds.size()) == features.size()); - return preds; - } - - /* - * Cross validation specializations - * - * Note the naming here uses a trailing underscore. This is to avoid - * name hiding when implementing one of these methods in a derived - * class: - * - * https://stackoverflow.com/questions/1628768/why-does-an-overridden-function-in-the-derived-class-hide-other-overloads-of-the - */ - virtual std::vector cross_validated_predictions_( - const RegressionDataset &dataset, - const FoldIndexer &fold_indexer, - const detail::PredictTypeIdentity &) { - const auto folds = folds_from_fold_indexer(dataset, fold_indexer); - return cross_validated_predictions(folds); - } - - virtual std::vector cross_validated_predictions_( - const RegressionDataset &dataset, - const FoldIndexer &fold_indexer, - const detail::PredictTypeIdentity &) { - const auto folds = folds_from_fold_indexer(dataset, fold_indexer); - return cross_validated_predictions(folds); - } - - virtual std::vector cross_validated_predictions_( - const RegressionDataset &dataset, - const FoldIndexer &fold_indexer, - const detail::PredictTypeIdentity &) { - const auto folds = folds_from_fold_indexer(dataset, fold_indexer); - return cross_validated_predictions(folds); - } - - virtual JointDistribution - predict_(const std::vector &features) const = 0; +public: - virtual MarginalDistribution - predict_marginal_(const std::vector &features) const { - const auto full_distribution = predict_(features); - return MarginalDistribution( - full_distribution.mean, - full_distribution.covariance.diagonal().asDiagonal()); + template + auto + get_fit_model(const std::vector &features, + const MarginalDistribution &targets) const { + return fit_(features, targets); } - virtual Eigen::VectorXd - predict_mean_(const std::vector &features) const { - const auto marginal_distribution = predict_marginal_(features); - return marginal_distribution.mean; + template + auto + get_fit_model(const RegressionDataset &dataset) const { + return fit_(dataset.features, dataset.targets); } - bool has_been_fit_; - Insights insights_; }; -template -using RegressionModelCreator = - std::function>()>; -} // namespace albatross - +} #endif diff --git a/albatross/core/prediction.h b/albatross/core/prediction.h new file mode 100644 index 00000000..93386671 --- /dev/null +++ b/albatross/core/prediction.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2018 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CORE_PREDICTION_H +#define ALBATROSS_CORE_PREDICTION_H + +namespace albatross { + +// This is effectively just a container that allows us to develop methods +// which behave different conditional on the type of predictions desired. +template struct PredictTypeIdentity { typedef T type; }; + +template +class Prediction { + +public: + Prediction(const FitModel &fit_model, + const std::vector &features) + : fit_model_(fit_model), features_(features) {} + + /* + * MEAN + */ + template < + typename DummyType = FeatureType, + typename std::enable_if< + has_valid_predict_mean::value, int>::type = 0> + Eigen::VectorXd mean() const { + static_assert(std::is_same::value, + "never do prediction.mean()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()); + } + + template ::value && + has_valid_predict_marginal::value, + int>::type = 0> + Eigen::VectorXd mean() const { + static_assert(std::is_same::value, + "never do prediction.mean()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()) + .mean; + } + + template ::value && + !has_valid_predict_marginal::value && + has_valid_predict_joint::value, + int>::type = 0> + Eigen::VectorXd mean() const { + static_assert(std::is_same::value, + "never do prediction.mean()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()) + .mean; + } + + /* + * MARGINAL + */ + template ::value, + int>::type = 0> + MarginalDistribution marginal() const { + static_assert(std::is_same::value, + "never do prediction.marginal()"); + return fit_model_.model_.predict_(features_, + fit_model_.fit_, + PredictTypeIdentity()); + } + + template ::value && + has_valid_predict_joint::value, + int>::type = 0> + MarginalDistribution marginal() const { + static_assert(std::is_same::value, + "never do prediction.marginal()"); + const auto joint_pred = + fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()); + if (joint_pred.has_covariance()) { + Eigen::VectorXd diag = joint_pred.covariance.diagonal(); + return MarginalDistribution(joint_pred.mean, diag.asDiagonal()); + } else { + return MarginalDistribution(joint_pred.mean); + } + } + + /* + * JOINT + */ + template < + typename DummyType = FeatureType, + typename std::enable_if< + has_valid_predict_joint::value, int>::type = 0> + JointDistribution joint() const { + static_assert(std::is_same::value, + "never do prediction.joint()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()); + } + + /* + * CATCH FAILURE MODES + */ + template ::value && + !has_valid_predict_marginal::value && + !has_valid_predict_joint::value, + int>::type = 0> + Eigen::VectorXd mean() const = delete; // No valid predict method found. + + template ::value && + !has_valid_predict_joint::value, + int>::type = 0> + Eigen::VectorXd marginal() const = delete; // No valid predict marginal method found. + + template ::value, + int>::type = 0> + Eigen::VectorXd joint() const = delete; // No valid predict joint method found. + +private: + const FitModel &fit_model_; + const std::vector &features_; +}; + +} +#endif diff --git a/albatross/models/least_squares.h b/albatross/models/least_squares.h index ef285a6e..bfc8a8f5 100644 --- a/albatross/models/least_squares.h +++ b/albatross/models/least_squares.h @@ -13,21 +13,15 @@ #ifndef ALBATROSS_MODELS_LEAST_SQUARES_H #define ALBATROSS_MODELS_LEAST_SQUARES_H -#include "core/model_adapter.h" -#include "core/serialize.h" -#include -#include -#include -#include -#include - namespace albatross { -struct LeastSquaresFit { +template class LeastSquares; + +template struct Fit> { Eigen::VectorXd coefs; - bool operator==(const LeastSquaresFit &other) const { - return coefs == other.coefs; + bool operator==(const Fit &other) const { + return (coefs == other.coefs); } template void serialize(Archive &archive) { @@ -36,22 +30,22 @@ struct LeastSquaresFit { }; /* - * This model supports a family of RegressionModels which consist of + * This model supports a family of models which consist of * first creating a design matrix, A, then solving least squares. Ie, * * min_x |y - Ax|_2^2 * * The FeatureType in this case is a single row from the design matrix. */ -class LeastSquaresRegression - : public SerializableRegressionModel { +template +class LeastSquares : public ModelBase> { public: - LeastSquaresRegression(){}; - std::string get_name() const override { return "least_squares"; }; + using FitType = Fit>; + + // std::string get_name() const override { return "least_squares"; }; - LeastSquaresFit - serializable_fit_(const std::vector &features, - const MarginalDistribution &targets) const override { + FitType fit(const std::vector &features, + const MarginalDistribution &targets) const { // The way this is currently implemented we assume all targets have the same // variance (or zero variance). assert(!targets.has_covariance()); @@ -62,36 +56,56 @@ class LeastSquaresRegression for (int i = 0; i < m; i++) { A.row(i) = features[static_cast(i)]; } - // Solve for the coefficients using the QR decomposition. - LeastSquaresFit model_fit = {least_squares_solver(A, targets.mean)}; + + FitType model_fit = {least_squares_solver(A, targets.mean)}; return model_fit; } -protected: - Eigen::VectorXd - predict_mean_(const std::vector &features) const override { + template ::value, + int>::type = 0> + FitType fit(const std::vector &features, + const MarginalDistribution &targets) const { + return impl().fit(features, targets); + } + + JointDistribution predict(const std::vector &features, + const FitType &least_squares_fit, + PredictTypeIdentity &&) const { std::size_t n = features.size(); Eigen::VectorXd mean(n); for (std::size_t i = 0; i < n; i++) { mean(static_cast(i)) = - features[i].dot(this->model_fit_.coefs); + features[i].dot(least_squares_fit.coefs); } - return mean; + return JointDistribution(mean); } - JointDistribution - predict_(const std::vector &features) const override { - return JointDistribution(predict_mean_(features)); + template ::value, + int>::type = 0> + PredictType predict(const std::vector &features, + const FitType &least_squares_fit, + PredictTypeIdentity &&) const { + return impl().predict(features, least_squares_fit, PredictTypeIdentity()); } /* * This lets you customize the least squares approach if need be, * default uses the QR decomposition. */ - virtual Eigen::VectorXd least_squares_solver(const Eigen::MatrixXd &A, - const Eigen::VectorXd &b) const { + Eigen::VectorXd least_squares_solver(const Eigen::MatrixXd &A, + const Eigen::VectorXd &b) const { return A.colPivHouseholderQr().solve(b); } + + /* + * CRTP Helpers + */ + ImplType &impl() { return *static_cast(this); } + const ImplType &impl() const { return *static_cast(this); } }; /* @@ -103,39 +117,60 @@ class LeastSquaresRegression * Setup like this the resulting least squares solve will represent * an offset and slope. */ -using LinearRegressionBase = - AdaptedRegressionModel; - -class LinearRegression : public LinearRegressionBase { +class LinearRegression : public LeastSquares { public: - LinearRegression(){}; - std::string get_name() const override { return "linear_regression"; }; + // std::string get_name() const { return "linear_regression"; }; + + using Base = LeastSquares; - Eigen::VectorXd convert_feature(const double &x) const override { + Eigen::VectorXd convert_feature(const double &f) const { Eigen::VectorXd converted(2); - converted << 1., x; + converted << 1., f; return converted; } + std::vector + convert_features(const std::vector &features) const { + std::vector output; + for (const auto &f : features) { + output.emplace_back(convert_feature(f)); + } + return output; + } + + Base::FitType fit(const std::vector &features, + const MarginalDistribution &targets) const { + return Base::fit(convert_features(features), + targets); + } + + JointDistribution predict(const std::vector &features, + const Base::FitType &least_squares_fit, + PredictTypeIdentity &&) const { + return Base::predict(convert_features(features), + least_squares_fit, + PredictTypeIdentity()); + } + /* * save/load methods are inherited from the SerializableRegressionModel, * but by defining them here and explicitly showing the inheritence * through the use of `base_class` we can make use of cereal's * polymorphic serialization. */ - template void save(Archive &archive) const { - archive(cereal::make_nvp("linear_regression", - cereal::base_class(this))); - } - - template void load(Archive &archive) { - archive(cereal::make_nvp("linear_regression", - cereal::base_class(this))); - } + // template void save(Archive &archive) const { + // archive(cereal::make_nvp("linear_regression", + // cereal::base_class(this))); + // } + // + // template void load(Archive &archive) { + // archive(cereal::make_nvp("linear_regression", + // cereal::base_class(this))); + // } }; } // namespace albatross -CEREAL_REGISTER_TYPE(albatross::LinearRegression); +// CEREAL_REGISTER_TYPE(albatross::LinearRegression); #endif From fea8ab55d3cdc0533e18b804e4ece95e7c1eee18 Mon Sep 17 00:00:00 2001 From: kleeman Date: Wed, 6 Mar 2019 16:27:28 -0800 Subject: [PATCH 2/3] Add some simple tests to illustrate usage --- tests/mock_model.h | 133 +++++++++++++++++++++++++++++++++++++++ tests/test_core_model.cc | 54 ++++++++++++---- 2 files changed, 176 insertions(+), 11 deletions(-) create mode 100644 tests/mock_model.h diff --git a/tests/mock_model.h b/tests/mock_model.h new file mode 100644 index 00000000..7396cf73 --- /dev/null +++ b/tests/mock_model.h @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_TESTS_MOCK_MODEL_H +#define ALBATROSS_TESTS_MOCK_MODEL_H + +namespace albatross { + +class MockModel; + +// A simple predictor which is effectively just an integer. +struct MockFeature { + int value; + + MockFeature() : value(){}; + MockFeature(int v) : value(v){}; + + bool operator==(const MockFeature &other) const { + return value == other.value; + }; + + template void serialize(Archive &archive) { + archive(cereal::make_nvp("value", value)); + } +}; + +struct ContainsMockFeature { + MockFeature mock; +}; + +template <> +struct Fit { + std::map train_data; + + template void serialize(Archive &ar) { + ar(cereal::make_nvp("train_data", train_data)); + }; + + bool operator==(const Fit &other) const { + return train_data == other.train_data; + }; +}; + +/* + * A simple model which builds a map from MockPredict (aka, int) + * to a double value. + */ +class MockModel : public ModelBase { +public: + ALBATROSS_DECLARE_PARAMS(foo, bar); + + MockModel(double foo_ = 3.14159, double bar_ = sqrt(2.)) { + this->foo = {foo_, std::make_shared(3., 2.)}; + this->bar = {bar_, std::make_shared()}; + }; + + // std::string get_name() const override { return "mock_model"; }; + + // template void save(Archive &archive) const { + // archive( + // cereal::base_class>( + // this)); + // } + // + // template void load(Archive &archive) { + // archive( + // cereal::base_class>( + // this)); + // } + + Fit fit(const std::vector &features, + const MarginalDistribution &targets) const { + int n = static_cast(features.size()); + Eigen::VectorXd predictions(n); + Fit model_fit; + for (int i = 0; i < n; i++) { + model_fit.train_data[features[static_cast(i)].value] = + targets.mean[i]; + } + return model_fit; + } + + // looks up the prediction in the map + Eigen::VectorXd predict(const std::vector &features, + const Fit &fit, + PredictTypeIdentity &&) const { + int n = static_cast(features.size()); + Eigen::VectorXd predictions(n); + + for (int i = 0; i < n; i++) { + int index = features[static_cast(i)].value; + predictions[i] = fit.train_data.find(index)->second; + } + + return predictions; + } + + // convert before predicting + Eigen::VectorXd predict(const std::vector &features, + const Fit &fit, + PredictTypeIdentity &&) const { + std::vector mock_features; + for (const auto &f : features) { + mock_features.push_back(f.mock); + } + return predict(mock_features, fit, PredictTypeIdentity()); + } +}; + +static inline RegressionDataset +mock_training_data(const int n = 10) { + std::vector features; + Eigen::VectorXd targets(n); + for (int i = 0; i < n; i++) { + features.push_back(MockFeature(i)); + targets[i] = static_cast(i + n); + } + return RegressionDataset(features, targets); +} +} + +#endif diff --git a/tests/test_core_model.cc b/tests/test_core_model.cc index e66f4dad..44ca2c0a 100644 --- a/tests/test_core_model.cc +++ b/tests/test_core_model.cc @@ -10,10 +10,10 @@ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. */ -#include "core/model.h" -#include "test_utils.h" #include +#include "test_utils.h" + namespace albatross { /* @@ -22,17 +22,49 @@ namespace albatross { */ TEST(test_core_model, test_fit_predict) { auto dataset = mock_training_data(); + + MockModel m; + const auto fit_model = m.get_fit_model(dataset.features, dataset.targets); + Eigen::VectorXd predictions = fit_model.get_prediction(dataset.features).mean(); + + EXPECT_LT((predictions - dataset.targets.mean).norm(), 1e-10); +} + +TEST(test_core_model, test_fit_predict_different_types) { + auto dataset = mock_training_data(); MockModel m; - m.fit(dataset); - // We should be able to perfectly predict in this case. - JointDistribution predictions = m.predict(dataset.features); - EXPECT_LT((predictions.mean - dataset.targets.mean).norm(), 1e-10); + + const auto fit_model = m.get_fit_model(dataset.features, dataset.targets); + + std::vector derived_features; + for (const auto &f : dataset.features) { + derived_features.push_back({f}); + } + + Eigen::VectorXd predictions = fit_model.get_prediction(derived_features).mean(); + + EXPECT_LT((predictions - dataset.targets.mean).norm(), 1e-10); } -TEST(test_core_model, test_regression_model_abstraction) { - // This just tests to make sure that an implementation of a RegressionModel - // can be passed around as a pointer to the abstract class. - std::unique_ptr> m_ptr = - std::make_unique(); + +template +void test_get_set(ModelBase &model, const std::string &key) { + // Make sure a key exists, then modify it and make sure it + // takes on the new value. + const auto orig = model.get_param_value(key); + model.set_param(key, orig + 1.); + EXPECT_EQ(model.get_params().at(key), orig + 1.); } + +TEST(test_core_model, test_get_set_params) { + auto model = MockModel(); + auto params = model.get_params(); + std::size_t count = 0; + for (const auto &pair : params) { + test_get_set(model, pair.first); + ++count; + } + EXPECT_GT(count, 0); +}; + } // namespace albatross From cf74a7e86b1c6a0757411b5c2ff3ed4eb39043ee Mon Sep 17 00:00:00 2001 From: kleeman Date: Thu, 7 Mar 2019 11:00:34 -0800 Subject: [PATCH 3/3] Joseph's comments. Now copying ModelType into FitModel --- albatross/core/fit_model.h | 17 +++++++---------- albatross/core/model.h | 2 +- albatross/core/prediction.h | 2 +- albatross/models/least_squares.h | 19 ------------------- 4 files changed, 9 insertions(+), 31 deletions(-) diff --git a/albatross/core/fit_model.h b/albatross/core/fit_model.h index 51bebbe9..07ee8159 100644 --- a/albatross/core/fit_model.h +++ b/albatross/core/fit_model.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Swift Navigation Inc. + * Copyright (C) 2019 Swift Navigation Inc. * Contact: Swift Navigation * * This source is subject to the license found in the file 'LICENSE' which must @@ -10,35 +10,32 @@ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef ALBATROSS_CORE_FIT_H -#define ALBATROSS_CORE_FIT_H +#ifndef ALBATROSS_CORE_FIT_MODEL_H +#define ALBATROSS_CORE_FIT_MODEL_H namespace albatross { template class FitModel { - + public: template friend class Prediction; - public: - static_assert(std::is_move_constructible::value, "Fit type must be move constructible to avoid unexpected copying."); FitModel(const ModelType &model, - const Fit &&fit) + Fit &&fit) : model_(model), fit_(std::move(fit)) {} - public: - template Prediction get_prediction(const std::vector &features) const { return Prediction(*this, features); } - const ModelType &model_; + private: + const ModelType model_; const Fit fit_; }; diff --git a/albatross/core/model.h b/albatross/core/model.h index 08b7a460..2c476bc3 100644 --- a/albatross/core/model.h +++ b/albatross/core/model.h @@ -53,7 +53,7 @@ template class ModelBase : public ParameterHandlingMixin { auto fit_(const std::vector &features, const MarginalDistribution &targets) const { - const auto fit = derived().fit(features, targets); + auto fit = derived().fit(features, targets); return FitModel(derived(), std::move(fit)); } diff --git a/albatross/core/prediction.h b/albatross/core/prediction.h index 93386671..15cac69c 100644 --- a/albatross/core/prediction.h +++ b/albatross/core/prediction.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Swift Navigation Inc. + * Copyright (C) 2019 Swift Navigation Inc. * Contact: Swift Navigation * * This source is subject to the license found in the file 'LICENSE' which must diff --git a/albatross/models/least_squares.h b/albatross/models/least_squares.h index bfc8a8f5..23398f8a 100644 --- a/albatross/models/least_squares.h +++ b/albatross/models/least_squares.h @@ -42,8 +42,6 @@ class LeastSquares : public ModelBase> { public: using FitType = Fit>; - // std::string get_name() const override { return "least_squares"; }; - FitType fit(const std::vector &features, const MarginalDistribution &targets) const { // The way this is currently implemented we assume all targets have the same @@ -120,8 +118,6 @@ class LeastSquares : public ModelBase> { class LinearRegression : public LeastSquares { public: - // std::string get_name() const { return "linear_regression"; }; - using Base = LeastSquares; Eigen::VectorXd convert_feature(const double &f) const { @@ -153,21 +149,6 @@ class LinearRegression : public LeastSquares { PredictTypeIdentity()); } - /* - * save/load methods are inherited from the SerializableRegressionModel, - * but by defining them here and explicitly showing the inheritence - * through the use of `base_class` we can make use of cereal's - * polymorphic serialization. - */ - // template void save(Archive &archive) const { - // archive(cereal::make_nvp("linear_regression", - // cereal::base_class(this))); - // } - // - // template void load(Archive &archive) { - // archive(cereal::make_nvp("linear_regression", - // cereal::base_class(this))); - // } }; } // namespace albatross