From 6f2b13c07b993f900df00fa11cd924dc3b043747 Mon Sep 17 00:00:00 2001 From: Guilherme Aldeia Date: Fri, 17 Nov 2023 13:58:03 -0500 Subject: [PATCH] Update to work with island indexes --- src/eval/evaluation.cpp | 46 +++++----------- src/eval/evaluation.h | 4 +- src/population.cpp | 31 +++++------ src/population.h | 6 +-- src/selection/nsga2.cpp | 10 +++- src/variation.cpp | 33 +++--------- src/variation.h | 3 +- tests/cpp/test_population.cpp | 98 +++++++++++++++++++---------------- 8 files changed, 102 insertions(+), 129 deletions(-) diff --git a/src/eval/evaluation.cpp b/src/eval/evaluation.cpp index 36d94ed4..e43e9ee0 100644 --- a/src/eval/evaluation.cpp +++ b/src/eval/evaluation.cpp @@ -6,32 +6,21 @@ namespace Eval{ template void Evaluation::validation(Population& pop, - tuple island_range, + int island, const Dataset& data, const Parameters& params, bool offspring ) { - // if offspring false --> if has offspring, do it on first half. else, do on entire island - // offspring true --> assert that has offspring, do it on the second half of the island + auto idxs = pop.get_island_indexes(island); - auto [idx_start, idx_end] = island_range; - size_t delta = idx_end - idx_start; + int start = 0; if (offspring) - { - assert(pop.offspring_ready - && ("Population does not have offspring to calculate validation fitness")); - - idx_start = idx_start + (delta/2); - } - else if (pop.offspring_ready) // offspring is false. We need to see where we sould stop - { - idx_end = idx_end - (delta/2); - } + start = idxs.size()/2; - for (unsigned i = idx_start; i& ind = pop[i]; + Individual& ind = *pop.individuals.at(idxs.at(i)).get(); // we are modifying it, so operator[] wont work // if there is no validation data, // set fitness_v to fitness and return ( this assumes that fitness on train was calculated previously.) @@ -60,33 +49,22 @@ void Evaluation::validation(Population& pop, // fitness of population template void Evaluation::fitness(Population& pop, - tuple island_range, + int island, const Dataset& data, const Parameters& params, bool fit, bool offspring ) { - // if offspring false --> if has offspring, do it on first half. else, do on entire island - // offspring true --> assert that has offspring, do it on the second half of the island + auto idxs = pop.get_island_indexes(island); - auto [idx_start, idx_end] = island_range; - size_t delta = idx_end - idx_start; + int start = 0; if (offspring) - { - assert(pop.offspring_ready - && ("Population does not have offspring to calculate validation fitness")); - - idx_start = idx_start + (delta/2); - } - else if (pop.offspring_ready) // offspring is false. We need to see where we sould stop - { - idx_end = idx_end - (delta/2); - } + start = idxs.size()/2; - for (unsigned i = idx_start; i& ind = pop.individuals.at(i); + Individual& ind = *pop.individuals.at(idxs.at(i)).get(); // we are modifying it, so operator[] wont work bool pass = true; diff --git a/src/eval/evaluation.h b/src/eval/evaluation.h index 5ddb141f..99b6d729 100644 --- a/src/eval/evaluation.h +++ b/src/eval/evaluation.h @@ -30,7 +30,7 @@ class Evaluation { // TODO: IMPLEMENT THIS /// validation of population. void validation(Population& pop, - tuple island_range, + int island, const Dataset& data, const Parameters& params, bool offspring = false @@ -40,7 +40,7 @@ class Evaluation { // TODO: MAKE it work for classification (do I need to have a way to set accuracy as a minimization problem?) /// fitness of population. void fitness(Population& pop, - tuple island_range, + int island, const Dataset& data, const Parameters& params, bool fit=true, diff --git a/src/population.cpp b/src/population.cpp index 6cb160ff..d0f3446d 100644 --- a/src/population.cpp +++ b/src/population.cpp @@ -41,7 +41,6 @@ void Population::init(SearchSpace& ss, const Parameters& params) // this calls the default constructor for the container template class individuals.resize(2*p); // we will never increase or decrease the size during execution (because is not thread safe). this way, theres no need to sync between selecting and varying the population - #pragma omp parallel for for (int i = 0; i< p; ++i) { individuals.at(i) = std::make_shared>(); @@ -50,7 +49,7 @@ void Population::init(SearchSpace& ss, const Parameters& params) } /// update individual vector size and island indexes -template +template // TODO: rename to include_offspring_indexes or something like this void Population::prep_offspring_slots(int island) { // reading and writing is thread-safe, as long as there's no overlap on island ranges. @@ -64,12 +63,12 @@ void Population::prep_offspring_slots(int island) size_t idx_start = std::floor(island*p/n_islands); size_t idx_end = std::floor((island+1)*p/n_islands); - auto delta = idx_end - idx_start; + auto delta = idx_end - idx_start; // island size // inserting indexes of the offspring island_indexes.at(island).resize(delta*2); iota( - island_indexes.at(island).begin() + p, island_indexes.at(island).end(), + island_indexes.at(island).begin() + delta, island_indexes.at(island).end(), p+idx_start); // Im keeping the offspring and parents in the same population object, because we @@ -92,12 +91,12 @@ void Population::update(vector> survivors) // update will set the complexities (for migration step. we do it here because update handles non-thread safe operations) new_pop.at(i)->set_complexity(); - ++i; + ++i; // this will fill just half of the pop } // need to make island point to original range - size_t idx_start = std::floor(j*size/n_islands); - size_t idx_end = std::floor((j+1)*size/n_islands); + size_t idx_start = std::floor(j*pop_size/n_islands); + size_t idx_end = std::floor((j+1)*pop_size/n_islands); auto delta = idx_end - idx_start; @@ -119,12 +118,14 @@ string Population::print_models(bool just_offspring, string sep) output += "island " + to_string(j) + ":\n"; int start = 0; - if (just_offspring) start = island_indexes.at(j).size()/2; - for (int k=start; kget_model() + sep; + for (int k=start; k& ind = *individuals.at(island_indexes.at(j).at(k)).get(); + output += ind.get_model() + sep; + } } return output; } @@ -138,12 +139,12 @@ vector> Population::sorted_front(unsigned rank) vector> pf_islands; pf_islands.resize(n_islands); - for (int i=0; i pf; - for (unsigned int& i : idxs) + for (unsigned int i : idxs) { // this assumes that rank was previously calculated. It is set in selection (ie nsga2) if the information is useful to select/survive if (individuals.at(i)->rank == rank) @@ -153,7 +154,7 @@ vector> Population::sorted_front(unsigned rank) auto it = std::unique(pf.begin(),pf.end(),SameFitComplexity(*this)); pf.resize(std::distance(pf.begin(),it)); - pf_islands.at(i) = pf; + pf_islands.at(j) = pf; } return pf_islands; @@ -226,7 +227,7 @@ void Population::migrate() island_fronts.at(other_island).end()); } - island_indexes.at(i) = migrating_idx; + island_indexes.at(island).at(i) = migrating_idx; } } } diff --git a/src/population.h b/src/population.h index 612f2256..545a284a 100644 --- a/src/population.h +++ b/src/population.h @@ -71,7 +71,7 @@ class Population{ SortComplexity(Population& p): pop(p){} bool operator()(size_t i, size_t j) { - return pop.individuals[i].get_complexity() < pop.individuals[j].get_complexity(); + return pop[i].get_complexity() < pop[j].get_complexity(); } }; @@ -82,8 +82,8 @@ class Population{ SameFitComplexity(Population& p): pop(p){} bool operator()(size_t i, size_t j) { - return (pop.individuals[i].fitness == pop.individuals[j].fitness - && pop.individuals[i].get_complexity() == pop.individuals[j].get_complexity()); + return (pop[i].fitness == pop[j].fitness + && pop[i].get_complexity() == pop[j].get_complexity()); } }; }; diff --git a/src/selection/nsga2.cpp b/src/selection/nsga2.cpp index 6de51a4f..1c59cf8e 100644 --- a/src/selection/nsga2.cpp +++ b/src/selection/nsga2.cpp @@ -67,6 +67,12 @@ template vector NSGA2::survive(Population& pop, int island, const Parameters& params, const Dataset& d) { + + size_t idx_start = std::floor(island*pop.size()/pop.n_islands); + size_t idx_end = std::floor((island+1)*pop.size()/pop.n_islands); + + auto original_size = idx_end - idx_start; // island size + auto island_pool = pop.get_island_indexes(island); // set objectives (this is when the obj vector is updated.) @@ -83,7 +89,7 @@ vector NSGA2::survive(Population& pop, int island, // Push back selected individuals until full vector selected(0); int i = 0; - while ( selected.size() + front.at(i).size() < island_pool.size()/2 ) // (size/2) because we want to get to the original size (prepare_offspring_slots doubled it before survival operation) + while ( selected.size() + front.at(i).size() < original_size ) // (size/2) because we want to get to the original size (prepare_offspring_slots doubled it before survival operation) { fmt::print("-- crawd dist\n"); std::vector& Fi = front.at(i); // indices in front i @@ -100,7 +106,7 @@ vector NSGA2::survive(Population& pop, int island, std::sort(front.at(i).begin(),front.at(i).end(),sort_n(pop)); fmt::print("adding last front)\n"); - const int extra = island_pool.size()/2 - selected.size(); + const int extra = original_size - selected.size(); for (int j = 0; j < extra; ++j) // Pt+1 = Pt+1 U Fi[1:N-|Pt+1|] selected.push_back(front.at(i).at(j)); diff --git a/src/variation.cpp b/src/variation.cpp index 50128194..8b0a7c99 100644 --- a/src/variation.cpp +++ b/src/variation.cpp @@ -568,34 +568,17 @@ std::optional> Variation::mutate(const Program& parent) } template -void Variation::vary(Population& pop, tuple island_range, +void Variation::vary(Population& pop, int island, const vector& parents) -{ - /*! - * performs variation on the current population. - * - * @param pop: current population - * @param parents: indices of population to use for variation - * @param params: feat parameters - * - * @return appends params.pop_size offspring derived from parent variation - */ - - assert(pop.offspring_ready - && ("Population does not have slots for generating the offspring. " - +"You should `prep_offspring_slots`. `vary` will add new xmen individuals " - +"starting from the middle of the island")); - - // parents should be within island range. TODO: assert that they are - - auto [idx_start, idx_end] = island_range; - size_t delta = idx_end - idx_start; - - idx_start = idx_start + (delta/2); +{ + auto idxs = pop.get_island_indexes(island); + + // assumes it should save new individuals in second half of the island + int start = idxs.size()/2; // TODO: fix pragma omp usage //#pragma omp parallel for - for (unsigned i = idx_start; i> opt=std::nullopt; // new individual @@ -625,7 +608,7 @@ void Variation::vary(Population& pop, tuple island_range, Program child = opt.value(); assert(child.size()>0); - pop.individuals.at(i) = Individual(child); + pop.individuals.at(idxs.at(i)) = std::make_shared>(child); } } } diff --git a/src/variation.h b/src/variation.h index 8fc34c00..564066fc 100644 --- a/src/variation.h +++ b/src/variation.h @@ -124,8 +124,7 @@ class Variation std::optional> mutate(const Program& parent); /// method to handle variation of population - void vary(Population& pop, tuple island_range, - const vector& parents); + void vary(Population& pop, int island, const vector& parents); }; } //namespace Var diff --git a/tests/cpp/test_population.cpp b/tests/cpp/test_population.cpp index 0da60113..819bac09 100644 --- a/tests/cpp/test_population.cpp +++ b/tests/cpp/test_population.cpp @@ -30,6 +30,7 @@ TEST(Population, PopulationTests) SS.init(data); Parameters params; + params.pop_size = 20; // small pop just for tests Population pop = Population(); // aux classes (they are not tested in-depth in this file) @@ -64,61 +65,66 @@ TEST(Population, PopulationTests) // print models fmt::print("Printing from population method:\n"); - fmt::print("{}\n",pop.print_models()); + fmt::print("{}\n",pop.print_models()); // may yeld seg fault if string is too large for buffer // island sizes increases and comes back to the same values after update - fmt::print("Performing all steps of an evolution\n"); + fmt::print("Performing all steps of an evolution (sequential, not parallel)\n"); for (int i=0; i<10; ++i) // update and prep offspring slots works properly - { // wax on wax off - - // fmt::print("Evaluating population\n"); - // vector> survivors(pop.n_islands); - // vector> island_parents(pop.n_islands); - - // for (int j=0; j(pop.get_island_range(j)), - // std::get<1>(pop.get_island_range(j)) ); - - // fmt::print("Fitness\n"); - // // we can calculate the fitness for each island - // evaluator.fitness(pop, pop.get_island_range(j), data, params, true, false); - - // fmt::print("Selection\n"); - // // just so we can call the update method - // vector parents = selector.select(pop, pop.get_island_range(j), params, data); - - // ASSERT_TRUE(parents.size() > 0); - // fmt::print("Updating parents\n"); - // island_parents.at(j) = parents; + { + vector> survivors(pop.n_islands); - // fmt::print("Preparing offspring\n"); - // pop.prep_offspring_slots(j); + fmt::print("Evolution step {}\n", i); + for (int j=0; j parents = selector.select(pop, j, params, data); + ASSERT_TRUE(parents.size() > 0); - // survivors.at(j) = island_survivors; - // } - - // fmt::print("Updating and migrating\n"); - - // // TODO: UPDATE SHOULD SORT SURVIVOR LIST AND REMOVE REPEATED VALUES - // pop.update(survivors); - // ASSERT_TRUE(pop.size() == params.pop_size); + fmt::print("Preparing offspring\n"); + pop.prep_offspring_slots(j); - // pop.migrate(); - // ASSERT_TRUE(pop.size() == params.pop_size); + // variation applied to population + fmt::print("Variations for island {}\n", j); + variator.vary(pop, j, parents); - // fmt::print("Printing generation {} population:\n{}\n", i, pop.print_models()); + fmt::print("fitting {}\n", j); + evaluator.fitness(pop, j, data, params, true, true); + + fmt::print("survivors\n", j); + auto island_survivors = survivor.survive(pop, j, params, data); + survivors.at(j) = island_survivors; + } + + fmt::print("Updating and migrating\n"); + pop.update(survivors); + pop.migrate(); + + // TODO: print islands + fmt::print("Printing generation {} population:\n", i); + for (int i=0; i