Skip to content

Commit

Permalink
using warning that can be filtered, adding linear correction option t…
Browse files Browse the repository at this point in the history
…o agraphs, adding negative nmll explicit regression option
  • Loading branch information
gbomarito committed Sep 18, 2024
1 parent bb61b56 commit 73ea9dc
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 62 deletions.
24 changes: 20 additions & 4 deletions bingo/evaluation/fitness_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,39 @@
This module defines the basis of fitness evaluation in bingo evolutionary
analyses.
"""

from abc import ABCMeta, abstractmethod

import numpy as np


# Fitness metric functions, outside of FitnessFunction for use in GradientMixin
def mean_absolute_error(vector):
def mean_absolute_error(vector, individual=None):
"""Calculate the mean absolute error of an error vector"""
return np.mean(np.abs(vector))


def root_mean_squared_error(vector):
def root_mean_squared_error(vector, individual=None):
"""Calculate the root mean squared error of an error vector"""
return np.sqrt(np.mean(np.square(vector)))


def mean_squared_error(vector):
def mean_squared_error(vector, individual=None):
"""Calculate the mean squared error of an error vector"""
return np.mean(np.square(vector))


def negative_nmll_laplace(vector, individual):
"""Calculate the nmll squared error of an error vector"""
n = len(vector)
k = individual.get_number_local_optimization_params() + 1
b = 1 / np.sqrt(n)
mse = np.mean(np.square(vector))
log_like = -n / 2 * np.log(mse) - n / 2 - n / 2 * np.log(2 * np.pi)
nmll_laplace = (1 - b) * log_like + np.log(b) / 2 * k
return -nmll_laplace


class FitnessFunction(metaclass=ABCMeta):
"""Fitness evaluation metric for individuals.
Expand All @@ -42,6 +54,7 @@ class FitnessFunction(metaclass=ABCMeta):
training_data : TrainingData
(Optional) data that can be used in fitness evaluation
"""

def __init__(self, training_data=None):
self.eval_count = 0
self.training_data = training_data
Expand Down Expand Up @@ -80,6 +93,7 @@ class VectorBasedFunction(FitnessFunction, metaclass=ABCMeta):
'mean absolute error'/'mae', 'mean squared error'/'mse', and
'root mean squared error'/'rmse'
"""

def __init__(self, training_data=None, metric="mae"):
super().__init__(training_data)

Expand All @@ -89,6 +103,8 @@ def __init__(self, training_data=None, metric="mae"):
self._metric = mean_squared_error
elif metric in ["root mean squared error", "rmse"]:
self._metric = root_mean_squared_error
elif metric in ["negative nmll laplace"]:
self._metric = negative_nmll_laplace
else:
raise ValueError("Invalid metric for Fitness Function")

Expand All @@ -110,7 +126,7 @@ def __call__(self, individual):
fitness of the individual
"""
fitness_vector = self.evaluate_fitness_vector(individual)
return self._metric(fitness_vector)
return self._metric(fitness_vector, individual)

@abstractmethod
def evaluate_fitness_vector(self, individual):
Expand Down
49 changes: 37 additions & 12 deletions bingo/evaluation/gradient_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
This module defines the basis of gradient and jacobian partial derivatives
of fitness functions used in bingo evolutionary analyses.
"""

from abc import ABCMeta, abstractmethod
import numpy as np

from .fitness_function \
import mean_absolute_error, mean_squared_error, root_mean_squared_error
from .fitness_function import (
mean_absolute_error,
mean_squared_error,
root_mean_squared_error,
negative_nmll_laplace,
)


class GradientMixin(metaclass=ABCMeta):
Expand All @@ -17,6 +22,7 @@ class GradientMixin(metaclass=ABCMeta):
An abstract base class/mixin used to implement the gradients
of fitness functions.
"""

@abstractmethod
def get_fitness_and_gradient(self, individual):
"""Fitness function evaluation and gradient
Expand Down Expand Up @@ -53,21 +59,28 @@ class VectorGradientMixin(GradientMixin):
'mean absolute error', 'mean squared error', and
'root mean squared error'
"""

def __init__(self, training_data=None, metric="mae"):
super().__init__(training_data, metric)

if metric in ["mean absolute error", "mae"]:
self._metric = mean_absolute_error
self._metric_derivative = \
self._metric_derivative = (
VectorGradientMixin._mean_absolute_error_derivative
)
elif metric in ["mean squared error", "mse"]:
self._metric = mean_squared_error
self._metric_derivative = \
VectorGradientMixin._mean_squared_error_derivative
self._metric_derivative = VectorGradientMixin._mean_squared_error_derivative
elif metric in ["root mean squared error", "rmse"]:
self._metric = root_mean_squared_error
self._metric_derivative = \
self._metric_derivative = (
VectorGradientMixin._root_mean_squared_error_derivative
)
elif metric in ["negative nmll laplace"]:
self._metric = negative_nmll_laplace
self._metric_derivative = (
VectorGradientMixin._negative_nmll_laplace_derivative
)
else:
raise ValueError("Invalid metric for vector gradient mixin")

Expand All @@ -90,10 +103,10 @@ def get_fitness_and_gradient(self, individual):
fitness of the individual and the gradient of this function
with respect to the individual's constants
"""
fitness_vector, jacobian = \
self.get_fitness_vector_and_jacobian(individual)
return self._metric(fitness_vector), \
self._metric_derivative(fitness_vector, jacobian.transpose())
fitness_vector, jacobian = self.get_fitness_vector_and_jacobian(individual)
return self._metric(fitness_vector, individual), self._metric_derivative(
fitness_vector, jacobian.transpose()
)

@abstractmethod
def get_fitness_vector_and_jacobian(self, individual):
Expand Down Expand Up @@ -133,5 +146,17 @@ def _mean_squared_error_derivative(fitness_vector, fitness_partials):

@staticmethod
def _root_mean_squared_error_derivative(fitness_vector, fitness_partials):
return 1/np.sqrt(np.mean(np.square(fitness_vector))) \
* np.mean(fitness_vector * fitness_partials, axis=1)
return (
1
/ np.sqrt(np.mean(np.square(fitness_vector)))
* np.mean(fitness_vector * fitness_partials, axis=1)
)

@staticmethod
def _negative_nmll_laplace_derivative(fitness_vector, fitness_partials):
n = len(fitness_vector)
b = 1 / np.sqrt(n)
dmse = 2 * np.mean(fitness_vector * fitness_partials, axis=1)
dll = -0.5 * n / dmse
dnmll = (1 - b) * dll
return -dnmll
77 changes: 41 additions & 36 deletions bingo/symbolic_regression/agraph/agraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@
15 hyperbolic cosine :math:`cosh(p1)`
======== ======================================= =================
"""

import logging
import numpy as np
from sympy.core import Expr
import warnings

from .string_parsing import eq_string_to_command_array_and_constants
from .string_generation import get_formatted_string
Expand All @@ -63,6 +65,7 @@
from .evaluation_backend import evaluation_backend
from .simplification_backend import simplification_backend


LOGGER = logging.getLogger(__name__)

USING_PYTHON_SIMPLIFICATION = False
Expand Down Expand Up @@ -113,6 +116,7 @@ class AGraph(Equation):
constants : tuple of numeric
numeric constants that are used in the equation
"""

def __init__(self, use_simplification=False, equation=None):
super().__init__()

Expand All @@ -139,8 +143,9 @@ def _init_command_array_and_const(self, equation):
self._needs_opt = False
self._modified = False
elif isinstance(equation, (Expr, str)):
command_array, constants = \
eq_string_to_command_array_and_constants(str(equation))
command_array, constants = eq_string_to_command_array_and_constants(
str(equation)
)

self.set_local_optimization_params(constants)
if len(constants) > 0:
Expand Down Expand Up @@ -195,23 +200,27 @@ def _notify_modification(self):

def _update(self):
if self._use_simplification:
self._simplified_command_array = \
simplification_backend.simplify_stack(self._command_array)
self._simplified_command_array = simplification_backend.simplify_stack(
self._command_array
)
else:
self._simplified_command_array = \
simplification_backend.reduce_stack(self._command_array)
self._simplified_command_array = simplification_backend.reduce_stack(
self._command_array
)

const_commands = self._simplified_command_array[:, 0] == CONSTANT
num_const = np.count_nonzero(const_commands)
self._simplified_command_array[const_commands, 1] = np.arange(num_const)
self._simplified_command_array[const_commands, 2] = np.arange(num_const)

optimization_aggression = 0
if optimization_aggression == 0 \
and num_const <= len(self._simplified_constants):
if optimization_aggression == 0 and num_const <= len(
self._simplified_constants
):
self._simplified_constants = self._simplified_constants[:num_const]
elif optimization_aggression == 1 \
and num_const == len(self._simplified_constants):
elif optimization_aggression == 1 and num_const == len(
self._simplified_constants
):
self._simplified_constants = self._simplified_constants[:num_const]
else:
self._simplified_constants = (1.0,) * num_const
Expand Down Expand Up @@ -282,8 +291,7 @@ def get_utilized_commands(self):
list of bool of length N
Boolean values for whether each command is utilized.
"""
return simplification_backend.get_utilized_commands(
self._command_array)
return simplification_backend.get_utilized_commands(self._command_array)

def evaluate_equation_at(self, x):
"""Evaluate the `AGraph` equation.
Expand All @@ -304,13 +312,12 @@ def evaluate_equation_at(self, x):
if self._modified:
self._update()
try:
f_of_x = \
evaluation_backend.evaluate(self._simplified_command_array, x,
self._simplified_constants)
f_of_x = evaluation_backend.evaluate(
self._simplified_command_array, x, self._simplified_constants
)
return f_of_x
except (ArithmeticError, OverflowError, ValueError,
FloatingPointError) as err:
LOGGER.warning("%s in stack evaluation", err)
except (ArithmeticError, OverflowError, ValueError, FloatingPointError) as err:
warnings.warn(f"{err} in stack evaluation")
return np.full(x.shape, np.nan)

def evaluate_equation_with_x_gradient_at(self, x):
Expand All @@ -333,12 +340,11 @@ def evaluate_equation_with_x_gradient_at(self, x):
self._update()
try:
f_of_x, df_dx = evaluation_backend.evaluate_with_derivative(
self._simplified_command_array, x,
self._simplified_constants, True)
self._simplified_command_array, x, self._simplified_constants, True
)
return f_of_x, df_dx
except (ArithmeticError, OverflowError, ValueError,
FloatingPointError) as err:
LOGGER.warning("%s in stack evaluation/deriv", err)
except (ArithmeticError, OverflowError, ValueError, FloatingPointError) as err:
warnings.warn(f"{err} in stack evaluation/deriv")
nan_array = np.full(x.shape, np.nan)
return nan_array, np.array(nan_array)

Expand All @@ -363,14 +369,12 @@ def evaluate_equation_with_local_opt_gradient_at(self, x):
self._update()
try:
f_of_x, df_dc = evaluation_backend.evaluate_with_derivative(
self._simplified_command_array, x,
self._simplified_constants, False)
self._simplified_command_array, x, self._simplified_constants, False
)
return f_of_x, df_dc
except (ArithmeticError, OverflowError, ValueError,
FloatingPointError) as err:
LOGGER.warning("%s in stack evaluation/const-deriv", err)
nan_array = np.full((x.shape[0], len(self._simplified_constants)),
np.nan)
except (ArithmeticError, OverflowError, ValueError, FloatingPointError) as err:
warnings.warn(f"{err} in stack evaluation/const-deriv")
nan_array = np.full((x.shape[0], len(self._simplified_constants)), np.nan)
return nan_array, np.array(nan_array)

def __str__(self):
Expand Down Expand Up @@ -404,8 +408,9 @@ def get_formatted_string(self, format_, raw=False):
return get_formatted_string(format_, self._command_array, tuple())
if self._modified:
self._update()
return get_formatted_string(format_, self._simplified_command_array,
self._simplified_constants)
return get_formatted_string(
format_, self._simplified_command_array, self._simplified_constants
)

def get_complexity(self):
"""Calculate complexity of agraph equation.
Expand Down Expand Up @@ -448,10 +453,10 @@ def _copy_agraph_values_to_new_graph(self, agraph_duplicate):
agraph_duplicate._fitness = self._fitness
agraph_duplicate._fit_set = self._fit_set
agraph_duplicate._command_array = np.copy(self.command_array)
agraph_duplicate._simplified_command_array = \
np.copy(self._simplified_command_array)
agraph_duplicate._simplified_constants = \
tuple(self._simplified_constants)
agraph_duplicate._simplified_command_array = np.copy(
self._simplified_command_array
)
agraph_duplicate._simplified_constants = tuple(self._simplified_constants)
agraph_duplicate._needs_opt = self._needs_opt
agraph_duplicate._modified = self._modified
agraph_duplicate._use_simplification = self._use_simplification
Loading

0 comments on commit 73ea9dc

Please sign in to comment.