Merge pull request #53 from cavalab/mutation_trace

Getting newest features into islands
cavalab · Nov 3, 2023 · a984c20 · a984c20
2 parents 7383776 + 69cc07e
commit a984c20
Show file tree

Hide file tree

Showing 15 changed files with 407 additions and 156 deletions.
diff --git a/src/bindings/bind_programs.h b/src/bindings/bind_programs.h
@@ -46,6 +46,7 @@ void bind_program(py::module& m, string name)
         .def("get_dot_model", &T::get_dot_model, py::arg("extras")="")
         .def("get_weights", &T::get_weights)
         .def("size", &T::size, py::arg("include_weight")=true)
+        .def("complexity", &T::complexity)
         .def("depth", &T::depth)
         .def("cross", &T::cross, py::return_value_policy::automatic,
              "Performs one attempt to stochastically swap subtrees between two programs and generate a child")

diff --git a/src/bindings/bind_search_space.cpp b/src/bindings/bind_search_space.cpp
@@ -13,12 +13,20 @@ void bind_search_space(py::module &m)
     // constructing it with a Dataset object, rather than initializing it as an
     // empty struct and then calling init() with the Dataset object.
     py::class_<br::SearchSpace>(m, "SearchSpace")
-        .def(py::init([](br::Data::Dataset data)
-                    {
+        .def(py::init([](br::Data::Dataset data, bool weights_init=true){
                 SearchSpace SS;
-                SS.init(data);
-                return SS; }))
-        .def(py::init<const Dataset&, const unordered_map<string,float>&>())
+                SS.init(data, {}, weights_init);
+                return SS;
+            }),
+            py::arg("data"),
+            py::arg("weights_init") = true
+        )
+        .def(py::init<const Dataset&, const unordered_map<string,float>&, 
+            bool>(),
+            py::arg("data"),
+            py::arg("user_ops"),
+            py::arg("weights_init") = true
+        )
         .def("make_regressor", &br::SearchSpace::make_regressor)
         .def("make_classifier", &br::SearchSpace::make_classifier)
         .def("make_multiclass_classifier", &br::SearchSpace::make_multiclass_classifier)

diff --git a/src/brush/deap_api/nsga2.py b/src/brush/deap_api/nsga2.py
@@ -1,5 +1,5 @@
 from deap import tools 
-from deap.benchmarks.tools import diversity, convergence, hypervolume
+from deap.benchmarks.tools import hypervolume
 import numpy as np
 import functools
 
@@ -18,18 +18,18 @@ def calculate_statistics(ind):
 
     stats = tools.Statistics(calculate_statistics)
 
-    stats.register("avg", np.mean, axis=0)
-    stats.register("med", np.median, axis=0)
-    stats.register("std", np.std, axis=0)
-    stats.register("min", np.min, axis=0)
-    stats.register("max", np.max, axis=0)
+    stats.register("avg", np.nanmean, axis=0)
+    stats.register("med", np.nanmedian, axis=0)
+    stats.register("std", np.nanstd, axis=0)
+    stats.register("min", np.nanmin, axis=0)
+    stats.register("max", np.nanmax, axis=0)
 
     logbook = tools.Logbook()
-    logbook.header = "gen", "evals", "avg (O1 train, O2 train, O1 val, O2 val)", \
-                                     "med (O1 train, O2 train, O1 val, O2 val)", \
-                                     "std (O1 train, O2 train, O1 val, O2 val)", \
-                                     "min (O1 train, O2 train, O1 val, O2 val)", \
-                                     "max (O1 train, O2 train, O1 val, O2 val)"
+    logbook.header = ['gen', 'evals'] + \
+                     [f"{stat} {partition} O{objective}"
+                         for stat in ['avg', 'med', 'std', 'min', 'max']
+                         for partition in ['train', 'val']
+                         for objective in toolbox.get_objectives()]
 
     pop = toolbox.population(n=MU)
 
@@ -68,7 +68,6 @@ def calculate_statistics(ind):
         parents = toolbox.select(pop, len(pop))
         # offspring = [toolbox.clone(ind) for ind in offspring]
         offspring = []
-
         for ind1, ind2 in zip(parents[::2], parents[1::2]):
             off1, off2 = None, None
             if rnd_flt() < CXPB: # either mutation or crossover
@@ -77,17 +76,18 @@ def calculate_statistics(ind):
                 off1 = toolbox.mutate(ind1)
                 off2 = toolbox.mutate(ind2)
 
-            # Inserting parent if mutation failed
-            offspring.extend([off1 if off1 is not None else toolbox.Clone(ind1)])
-            offspring.extend([off2 if off2 is not None else toolbox.Clone(ind2)])
-
-        # Evaluate (instead of evaluateValidation) to fit the weights of the offspring
-        fitnesses = toolbox.map(functools.partial(toolbox.evaluate), offspring)
-        if (use_batch): #calculating objectives based on batch
-            fitnesses = toolbox.map(functools.partial(toolbox.evaluateValidation, data=batch), offspring)
-
-        for ind, fit in zip(offspring, fitnesses):
-            ind.fitness.values = fit
+            if off1 is not None: # Mutation worked. first we fit, then add to offspring
+                # Evaluate (instead of evaluateValidation) to fit the weights of the offspring
+                off1.fitness.values = toolbox.evaluate(off1) 
+                if use_batch: # Adjust fitness to the same data as parents
+                    off1.fitness.values = toolbox.evaluateValidation(off1, data=batch)
+                offspring.extend([off1])
+
+            if off2 is not None:
+                off2.fitness.values = toolbox.evaluate(off2) 
+                if use_batch:
+                    off2.fitness.values = toolbox.evaluateValidation(off2, data=batch)
+                offspring.extend([off2])
 
         # Select the next generation population (no sorting before this step, as 
         # survive==offspring will cut it in half)

diff --git a/src/brush/deap_api/nsga2island.py b/src/brush/deap_api/nsga2island.py
@@ -27,11 +27,11 @@ def calculate_statistics(ind):
     stats.register("max", np.max, axis=0)
 
     logbook = tools.Logbook()
-    logbook.header = "gen", "evals", "avg (O1 train, O2 train, O1 val, O2 val)", \
-                                     "med (O1 train, O2 train, O1 val, O2 val)", \
-                                     "std (O1 train, O2 train, O1 val, O2 val)", \
-                                     "min (O1 train, O2 train, O1 val, O2 val)", \
-                                     "max (O1 train, O2 train, O1 val, O2 val)"
+    logbook.header = ['gen', 'evals'] + \
+                     [f"{stat} {partition} O{objective}"
+                         for stat in ['avg', 'med', 'std', 'min', 'max']
+                         for partition in ['train', 'val']
+                         for objective in toolbox.get_objectives()]
 
     # Tuples with start and end indexes for each island. Number of individuals
     # in each island can slightly differ if N_ISLANDS is not a divisor of MU
@@ -81,7 +81,9 @@ def calculate_statistics(ind):
             parents.extend(island_parents)
 
         offspring = [] # Will have the same size as pop
+        island_failed_variations = []
         for (idx_start, idx_end) in island_indexes:
+            failed_variations = 0
             for ind1, ind2 in zip(parents[idx_start:idx_end:2],
                                   parents[idx_start+1:idx_end:2]
             ):
@@ -92,9 +94,22 @@ def calculate_statistics(ind):
                     off1 = toolbox.mutate(ind1)
                     off2 = toolbox.mutate(ind2)
 
-                # Inserting parent if mutation failed
-                offspring.extend([off1 if off1 is not None else toolbox.Clone(ind1)])
-                offspring.extend([off2 if off2 is not None else toolbox.Clone(ind2)])
+                if off1 is not None:
+                    off1.fitness.values = toolbox.evaluate(off1) 
+                    if use_batch:
+                        off1.fitness.values = toolbox.evaluateValidation(off1, data=batch)
+                    offspring.extend([off1])
+                else:
+                    failed_variations += 1
+
+                if off2 is not None:
+                    off2.fitness.values = toolbox.evaluate(off2) 
+                    if use_batch:
+                        off2.fitness.values = toolbox.evaluateValidation(off2, data=batch)
+                    offspring.extend([off2])
+                else:
+                    failed_variations += 1
+            island_failed_variations.append(failed_variations)
 
         # Evaluate (instead of evaluateValidation) to fit the weights of the offspring
         fitnesses = toolbox.map(functools.partial(toolbox.evaluate), offspring)
@@ -107,10 +122,15 @@ def calculate_statistics(ind):
 
         # Select the next generation population
         new_pop = []
-        for (idx_start, idx_end) in island_indexes:
-            island_new_pop = toolbox.survive(pop[idx_start:idx_end] \
-                                            +offspring[idx_start:idx_end], 
-                                            idx_end-idx_start)
+        for i, (idx_start, idx_end) in enumerate(island_indexes):
+            # original population combined with offspring, taking into account that variations can fail
+            island_new_pop = toolbox.survive(
+                pop[idx_start:idx_end] \
+                + offspring[
+                    idx_start-sum(island_failed_variations[:i]):idx_end+island_failed_variations[i]
+                ], 
+                idx_end-idx_start # number of selected individuals should still the same
+            )
             new_pop.extend(island_new_pop)
 
         # Migration to fill up the islands for the next generation
@@ -137,4 +157,4 @@ def calculate_statistics(ind):
     archive = tools.ParetoFront() 
     archive.update(pop)
 
-    return archive, logbook
+    return archive, logbook