Skip to content

Commit

Permalink
Engine bindings for fit and predict. Engine serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
gAldeia committed Apr 25, 2024
1 parent ddeb6fd commit ee5379d
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 32 deletions.
7 changes: 5 additions & 2 deletions pybrush/BrushEstimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,10 @@ def fit(self, X, y):
self.parameters_.mig_prob = self.mig_prob
self.parameters_.functions = self.functions
self.parameters_.mutation_probs = self.mutation_probs

self.parameters_.validation_size = self.validation_size
self.parameters_.batch_size = self.batch_size
self.parameters_.feature_names = self.feature_names_

self.parameters_.scorer_ = "mse"
if self.mode == "classification":
self.parameters_.scorer_ = "log" if self.n_classes_ == 2 else "multi_log"
Expand All @@ -254,7 +257,7 @@ def fit(self, X, y):
else:
self.engine_ = RegressorEngine(self.parameters_)

self.engine_.run(self.data_)
self.engine_.fit(self.data_)
self.best_estimator_ = self.engine_.best_ind

return self
Expand Down
36 changes: 34 additions & 2 deletions src/bindings/bind_engines.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,44 @@ void bind_engine(py::module& m, string name)
.def_property("params", &T::get_params, &T::set_params)
.def_property_readonly("is_fitted", &T::get_is_fitted)
.def_property_readonly("best_ind", &T::get_best_ind)
.def("run", &T::run, py::call_guard<py::gil_scoped_release>(), "run from brush dataset")
// .def("run", &T::run, py::call_guard<py::gil_scoped_release>(), "run from brush dataset")
.def("fit",
static_cast<T &(T::*)(Dataset &d)>(&T::fit),
py::call_guard<py::gil_scoped_release>(),
"fit from Dataset object")
.def("fit",
static_cast<T &(T::*)(const Ref<const ArrayXXf> &X, const Ref<const ArrayXf> &y)>(&T::fit),
py::call_guard<py::gil_scoped_release>(),
"fit from X,y data")
.def("predict",
static_cast<RetType (T::*)(const Dataset &d)>(&T::predict),
"predict from Dataset object")
.def("predict",
static_cast<RetType (T::*)(const Ref<const ArrayXXf> &X)>(&T::predict),
"predict from X data")
.def(py::pickle(
[](const T &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
// return py::make_tuple(p.value(), p.extra());
nl::json j = p;
return j;
},
[](nl::json j) { // __setstate__
T p = j;
return p;
})
)
;

// specialization for subclasses
if constexpr (std::is_same_v<T,Cls>)
{

engine.def("predict_proba",
static_cast<ArrayXf (T::*)(const Dataset &d)>(&T::predict_proba),
"predict from Dataset object")
.def("predict_proba",
static_cast<ArrayXf (T::*)(const Ref<const ArrayXXf> &X)>(&T::predict_proba),
"predict from X data")
;
}
}
8 changes: 4 additions & 4 deletions src/bindings/bind_individuals.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
namespace nl = nlohmann;
namespace br = Brush;

using Reg = br::Pop::Individual<br::ProgramType::Regressor>;
using Cls = br::Pop::Individual<br::ProgramType::BinaryClassifier>;
using MCls = br::Pop::Individual<br::ProgramType::MulticlassClassifier>;
using Rep = br::Pop::Individual<br::ProgramType::Representer>;
using Reg = Brush::RegressorIndividual;
using Cls = Brush::ClassifierIndividual;
using MCls = Brush::MulticlassClassifierIndividual;
using Rep = Brush::RepresenterIndividual;

using stream_redirect = py::call_guard<py::scoped_ostream_redirect, py::scoped_estream_redirect>;

Expand Down
16 changes: 15 additions & 1 deletion src/bindings/bind_params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ void bind_params(py::module& m)
.def_property("n_classes", &Brush::Parameters::get_n_classes, &Brush::Parameters::set_n_classes)
.def_property("n_jobs", &Brush::Parameters::get_n_jobs, &Brush::Parameters::set_n_classes)
.def_property("classification", &Brush::Parameters::get_classification, &Brush::Parameters::set_classification)
.def_property("validation_size", &Brush::Parameters::get_validation_size, &Brush::Parameters::set_validation_size)
.def_property("feature_names", &Brush::Parameters::get_feature_names, &Brush::Parameters::set_feature_names)
.def_property("batch_size", &Brush::Parameters::get_batch_size, &Brush::Parameters::set_batch_size)
.def_property("max_depth", &Brush::Parameters::get_max_depth, &Brush::Parameters::set_max_depth)
.def_property("max_size", &Brush::Parameters::get_max_size, &Brush::Parameters::set_max_size)
.def_property("objectives", &Brush::Parameters::get_objectives, &Brush::Parameters::set_objectives)
Expand All @@ -37,6 +40,17 @@ void bind_params(py::module& m)
.def_property("mig_prob", &Brush::Parameters::get_mig_prob, &Brush::Parameters::set_mig_prob)
.def_property("functions", &Brush::Parameters::get_functions, &Brush::Parameters::set_functions)
.def_property("mutation_probs", &Brush::Parameters::get_mutation_probs, &Brush::Parameters::set_mutation_probs)

.def(py::pickle(
[](const Brush::Parameters &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
// return py::make_tuple(p.value(), p.extra());
nl::json j = p;
return j;
},
[](nl::json j) { // __setstate__
Brush::Parameters p = j;
return p;
})
)
;
}
41 changes: 38 additions & 3 deletions src/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,36 @@ class Engine{
int best_complexity;
Individual<T>& get_best_ind(){return best_ind;};

Engine<T> &fit(Dataset& data) {
run(data);
return *this;
};
Engine<T> &fit(const Ref<const ArrayXXf>& X, const Ref<const ArrayXf>& y)
{
// Using constructor 2 to create the dataset
Dataset d(X,y,params.feature_names,{},params.classification,
params.validation_size, params.batch_size);
return fit(d);
};

auto predict(const Dataset& data) { return this->best_ind.predict(data); };
auto predict(const Ref<const ArrayXXf>& X)
{
Dataset d(X);
return predict(d);
};

template <ProgramType P = T>
requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
auto predict_proba(const Dataset &d) { return this->best_ind.predict_proba(d); };
template <ProgramType P = T>
requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
auto predict_proba(const Ref<const ArrayXXf>& X)
{
Dataset d(X);
return predict_proba(d);
};

// TODO: starting pop (just like feat)

// TODO: make thesqe work
Expand All @@ -74,9 +104,11 @@ class Engine{
// ArrayXXf predict_proba(MatrixXf& X);

// archive stuff

// TODO: make these work
///return archive size
int get_archive_size(){ return this->archive.individuals.size(); };

///return population as string
vector<json> get_archive(bool front);

Expand All @@ -86,11 +118,11 @@ class Engine{
// ArrayXXf predict_proba_archive(int id, MatrixXf& X, LongData& Z);
// ArrayXXf predict_proba_archive(int id, MatrixXf& X);


/// train the model
void run(Dataset &d);

Parameters params; ///< hyperparameters of brush, which the user can interact
Individual<T> best_ind;
private:
SearchSpace ss;

Expand All @@ -105,7 +137,6 @@ class Engine{
Timer timer; ///< start time of training
Archive<T> archive; ///< pareto front archive

Individual<T> best_ind;
bool is_fitted; ///< keeps track of whether fit was called.

void init();
Expand All @@ -114,7 +145,11 @@ class Engine{
inline void set_is_fitted(bool f){is_fitted=f;}
};

// TODO: serialization for engine with NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE
// Only stuff to make new predictions or call fit again
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::Regressor>, params, best_ind);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::BinaryClassifier>,params, best_ind);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::MulticlassClassifier>,params, best_ind);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::Representer>,params, best_ind);

} // Brush

Expand Down
7 changes: 1 addition & 6 deletions src/ind/individual.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,7 @@ class Individual{

template <ProgramType P = T>
requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
auto predict_proba(const Dataset &d)
{
return program.predict_proba(d);
};

auto predict_proba(const Dataset &d) { return program.predict_proba(d); };
template <ProgramType P = T>
requires((P == PT::BinaryClassifier) || (P == PT::MulticlassClassifier))
auto predict_proba(const Ref<const ArrayXXf>& X)
Expand All @@ -79,7 +75,6 @@ class Individual{
return predict_proba(d);
};


// just getters
bool get_is_fitted() const { return this->is_fitted_; };
string get_model() const { return program.get_model(); };
Expand Down
60 changes: 48 additions & 12 deletions src/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ license: GNU/GPL v3
#include "util/logger.h"

namespace ns = nlohmann;

namespace Brush
{


struct Parameters
{
public:
Expand Down Expand Up @@ -58,22 +58,18 @@ struct Parameters

string scorer_="mse"; ///< actual loss function used, determined by error

// for classification (TODO: should I have these, or they could be just dataset arguments (except the ones needed to use in dataset constructor))

bool classification;
unsigned int n_classes; ///< number of classes for classification

// TODO: set these values when creating the parameters in python side
vector<int> classes; ///< class labels
vector<int> classes; ///< class labels
vector<float> class_weights; ///< weights for each class
vector<float> sample_weights; ///< weights for each sample

// for dataset. TODO: make it work
bool shuffle = true; ///< option to shuffle the data
float split = 0.75; ///< fraction of data to use for training
vector<string> feature_names; ///< names of features
// for creating dataset from X and y in Engine<T>::fit. Ignored if
// the uses uses an dataset
bool classification;
unsigned int n_classes;
float validation_size = 0.75;
vector<string> feature_names = {};
float batch_size = 0.0;
bool use_batch = false; ///< whether to use mini batch for training

string load_population = "";
string save_population = "";
Expand Down Expand Up @@ -153,13 +149,53 @@ struct Parameters
void set_n_classes(unsigned int new_n_classes){ n_classes = new_n_classes; };
unsigned int get_n_classes(){ return n_classes; };

void set_validation_size(float s){ validation_size = s; };
float get_validation_size(){ return validation_size; };

void set_feature_names(vector<string> vn){ feature_names = vn; };
vector<string> get_feature_names(){ return feature_names; };

void set_batch_size(float c){ batch_size = c; };
float get_batch_size(){ return batch_size; };

//TODO: unify unordered or ordered
void set_mutation_probs(std::map<std::string, float> new_mutation_probs){ mutation_probs = new_mutation_probs; };
std::map<std::string, float> get_mutation_probs(){ return mutation_probs; };

void set_functions(std::unordered_map<std::string, float> new_functions){ functions = new_functions; };
std::unordered_map<std::string, float> get_functions(){ return functions; };
};

NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Parameters,
verbosity,
random_state,
pop_size,
gens,
max_stall,
max_time,
scorer_,
load_population,
save_population,
logfile,
current_gen,
num_islands,
max_depth,
n_jobs,
max_size,
objectives,
sel,
surv,
cx_prob,
mig_prob,
classification,
n_classes,
validation_size,
feature_names,
batch_size,
mutation_probs,
functions
);

} // Brush

#endif
1 change: 0 additions & 1 deletion src/pop/population.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(
Population<PT::MulticlassClassifier>, individuals, island_indexes, pop_size, num_islands);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(
Population<PT::Representer>, individuals, island_indexes, pop_size, num_islands);

}// Pop
}// Brush

Expand Down
1 change: 0 additions & 1 deletion src/util/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ struct Log_Stats

typedef struct Log_Stats Log_stats;

// TODO: change this to something more modern
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Log_Stats,
generation,
time,
Expand Down

0 comments on commit ee5379d

Please sign in to comment.