Skip to content

Commit

Permalink
Evaluation implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
gAldeia committed Nov 9, 2023
1 parent 5e862b3 commit 9f7f11c
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 257 deletions.
367 changes: 128 additions & 239 deletions src/eval/evaluation.cpp
Original file line number Diff line number Diff line change
@@ -1,241 +1,130 @@
// /* FEAT
// copyright 2017 William La Cava
// license: GNU/GPL v3
// */

// #include "evaluation.h"

// // code to evaluate GP programs.
// namespace FT{

// using namespace Opt;

// namespace Eval{

// Evaluation::Evaluation(string scorer): S(scorer)
// {
// this->S.set_scorer(scorer);
// }

// Evaluation::~Evaluation(){}

// void Evaluation::validation(vector<Individual>& individuals,
// const Data& d,
// const Parameters& params,
// bool offspring
// )
// {
// unsigned start =0;
// if (offspring)
// start = individuals.size()/2;

// // loop through individuals
// /* #pragma omp parallel for */
// for (unsigned i = start; i<individuals.size(); ++i)
// {
// Individual& ind = individuals.at(i);

// // if there is no validation data,
// // set fitness_v to fitness and return
// if (d.X.cols() == 0)
// {
// ind.fitness_v = ind.fitness;
// continue;
// }

// bool pass = true;

// logger.log("Validating ind " + to_string(i)
// + ", id: " + to_string(ind.id), 3);

// shared_ptr<CLabels> yhat = ind.predict(d);
// // assign aggregate fitness
// logger.log("Assigning fitness to ind " + to_string(i)
// + ", eqn: " + ind.get_eqn(), 3);

// if (!pass)
// {

// ind.fitness_v = MAX_FLT;
// }
// else
// {
// // assign fitness to individual
// VectorXf loss;
// ind.fitness_v = this->S.score(d.y, yhat, loss,
// params.class_weights);
// }
// }
// }
// // fitness of population
// void Evaluation::fitness(vector<Individual>& individuals,
// const Data& d,
// const Parameters& params,
// bool offspring)
// {
// /*!
// * @param individuals: population
// * @param d: Data structure
// * @param params: algorithm parameters
// * @param offspring: if true, only evaluate last half of population

// * Output

// * individuals.fitness, yhat, error is modified
// */

// unsigned start =0;
// if (offspring) start = individuals.size()/2;

// /* for (unsigned i = start; i<individuals.size(); ++i) */
// /* { */
// /* cout << "ind " << i << " size: " */
// /* << individuals.at(i).size() << endl; */
// /* /1* cout << "ind " << i << " eqn: " *1/ */
// /* /1* << individuals.at(i).get_eqn() << endl; *1/ */
// /* /1* cout << "ind " << i << " program str: " *1/ */
// /* /1* << individuals.at(i).program_str() << endl; *1/ */
// /* } */

// // loop through individuals
// #pragma omp parallel for
// for (unsigned i = start; i<individuals.size(); ++i)
// {
// Individual& ind = individuals.at(i);

// if (params.backprop)
// {
// #pragma omp critical
// {
// AutoBackProp backprop(params.scorer_, params.bp.iters,
// params.bp.learning_rate);
// logger.log("Running backprop on " + ind.get_eqn(), 3);
// backprop.run(ind, d, params);
// }
// }
// bool pass = true;

// logger.log("Running ind " + to_string(i)
// + ", id: " + to_string(ind.id), 3);

// shared_ptr<CLabels> yhat = ind.fit(d,params,pass);
// // assign F and aggregate fitness
// logger.log("Assigning fitness to ind " + to_string(i)
// + ", eqn: " + ind.get_eqn(), 3);

// if (!pass)
// {

// ind.fitness = MAX_FLT;
// ind.error = MAX_FLT*VectorXf::Ones(d.y.size());
// }
// else
// {
// // assign weights to individual
// assign_fit(ind,yhat,d,params,false);


// if (params.hillclimb)
// {
// HillClimb hc(params.scorer_, params.hc.iters,
// params.hc.step);
// bool updated = false;
// shared_ptr<CLabels> yhat2 = hc.run(ind, d, params,
// updated);
// // update the fitness of this individual
// if (updated)
// {
// assign_fit(ind, yhat2, d, params);
// }

// }
// }
// }
// }
#include "evaluation.h"

namespace Brush{
namespace Eval{


template<ProgramType T>
void Evaluation<T>::validation(Population<T>& pop,
tuple<size_t, size_t> island_range,
const Dataset& data,
const Parameters& params,
bool offspring
)
{
// if offspring false --> if has offspring, do it on first half. else, do on entire island
// offspring true --> assert that has offspring, do it on the second half of the island

auto [idx_start, idx_end] = island_range;
size_t delta = idx_end - idx_start;
if (offspring)
{
assert(pop.offspring_ready
&& ("Population does not have offspring to calculate validation fitness"));

idx_start = idx_start + (delta/2);
}
else if (pop.offspring_ready) // offspring is false. We need to see where we sould stop
{
idx_end = idx_end - (delta/2);
}

for (unsigned i = idx_start; i<idx_end; ++i)
{
Individual<T>& ind = pop[i];

// // assign fitness to program
// void Evaluation::assign_fit(Individual& ind,
// const shared_ptr<CLabels>& yhat, const Data& d,
// const Parameters& params, bool val)
// {
// /*!
// * assign raw errors and aggregate fitnesses to individuals.
// *
// * Input:
// *
// * ind: individual
// * yhat: predicted output of ind
// * d: data
// * params: feat parameters
// *
// * Output:
// *
// * modifies individual metrics
// */
// VectorXf loss;
// float f = S.score(d.y, yhat, loss, params.class_weights);
// //TODO: add if condition for this
// float fairness = marginal_fairness(loss, d, f);
// if there is no validation data,
// set fitness_v to fitness and return ( this assumes that fitness on train was calculated previously.)
if (!data.use_validation)
{
ind.fitness_v = ind.fitness;
continue;
}

bool pass = true;

if (!pass)
{
// TODO: stop doing this hardcoded?
ind.fitness_v = MAX_FLT;
}
else
{
// TODO: implement the class weights and use it here (and on fitness)
auto y_pred = ind.predict(data.get_validation_data);
assign_fit(ind, y_pred, data, params, true);
}
}
}

// fitness of population
template<ProgramType T>
void Evaluation<T>::fitness(Population<T>& pop,
tuple<size_t, size_t> island_range,
const Dataset& data,
const Parameters& params,
bool offspring
)
{
// if offspring false --> if has offspring, do it on first half. else, do on entire island
// offspring true --> assert that has offspring, do it on the second half of the island

auto [idx_start, idx_end] = island_range;
size_t delta = idx_end - idx_start;
if (offspring)
{
assert(pop.offspring_ready
&& ("Population does not have offspring to calculate validation fitness"));

idx_start = idx_start + (delta/2);
}
else if (pop.offspring_ready) // offspring is false. We need to see where we sould stop
{
idx_end = idx_end - (delta/2);
}

for (unsigned i = idx_start; i<idx_end; ++i)
{
Individual<T>& ind = pop[i];

bool pass = true;

if (!pass)
{
ind.fitness = MAX_FLT;
ind.error = MAX_FLT*VectorXf::Ones(data.y.size());
}
else
{
// assign weights to individual
ind.fit(data);

// if (fairness <0 )
// {
// cout << "fairness is " << fairness << "...\n";
// }
// if (val)
// {
// ind.fitness_v = f;
// ind.fairness_v = fairness;
// }
// else
// {
// ind.fitness = f;
// ind.fairness = fairness;
// ind.error = loss;
// }

// logger.log("ind " + std::to_string(ind.id) + " fitness: "
// + std::to_string(ind.fitness),3);
// }

// float Evaluation::marginal_fairness(VectorXf& loss, const Data& d,
// float base_score, bool use_alpha)
// {
// // averages the deviation of the loss function from average loss
// // over k
// float avg_score = 0;
// float count = 0;
// float alpha = 1;

// ArrayXb x_idx;

// for (const auto& pl : d.protect_levels)
// {
// for (const auto& lvl : pl.second)
// {
// x_idx = (d.X.row(pl.first).array() == lvl);
// float len_g = x_idx.count();
// if (use_alpha)
// alpha = len_g/d.X.cols();
// /* cout << "alpha = " << len_g << "/"
// * << d.X.cols() << endl; */
// float Beta = fabs(base_score -
// x_idx.select(loss,0).sum()/len_g);
// /* cout << "Beta = |" << base_score << " - " */
// /* << x_idx.select(loss,0).sum() << "/" */
// /* << len_g << "|" << endl; */
// avg_score += alpha * Beta;
// ++count;
// }

// }
// avg_score /= count;
// if (std::isinf(avg_score)
// || std::isnan(avg_score)
// || avg_score < 0)
// return MAX_FLT;

// return avg_score;

// }
// }
// }
auto y_pred = ind.predict(data.get_training_data);
assign_fit(ind, y_pred, data, params, false);
}
}
}

// assign fitness to program
template<ProgramType T>
void Evaluation<T>::assign_fit(Individual<T>& ind,
VectorXf& y_pred, const Dataset& data,
const Parameters& params, bool val)
{
VectorXf loss;

float f = S.score(data.y, y_pred, loss, params.class_weights);

if (val)
{
ind.fitness_v = f;
}
else
{
ind.fitness = f;
ind.error = loss;
}
}

} // Pop
} // Brush
Loading

0 comments on commit 9f7f11c

Please sign in to comment.