experiments.py

import numpy as np
import scipy.linalg as la

from utilities import sample_rkhs_func_from_kernels, dataset_generation_uniform_normal, aposteriori_scaling, aposteriori_scalings_generator, aposteriori_rescalings_generator, check_bounds_on_grid
    
from sklearn.gaussian_process.kernels import RBF, Matern
from sklearn.gaussian_process import GaussianProcessRegressor

from joblib import Parallel, delayed

def run_learning_instance_experiment(xs, ys, config):
    """Run one learning instance on a given target function
    
    A learning instance consists of generating a dataset (details specified by
    the 'dataset_generation' entry of config), training with Gaussian Process Regression
    (settings specified by the 'training' entry of config) and testing the uniform
    validity of various bounds (assuming tube form with widths given by scaled posterior SD
    with the scalings generated by the function in the 'scalings_generator' entry in config).
    
    Arguments
    xs      1d numpy array containing the inputs for evaluation
    ys      1d numpy array containing the target function evaluated on xs
    config  Dictionary containing the settings for this experimental instance
    
    Returns
    A 1d numpy array of length n_bounds, where n_bounds is the number of different widths
    generated by the scalings_generator, with 0 if the bound holds and 1 if the
    corresponding bound is violated at least once in the evaluation grid.
    """
    
    # Generate training data
    n_samples = config['dataset_generation']['n_samples']
    dataset_generator = config['dataset_generation']['dataset_generator']
    (xs_train, ys_train) = dataset_generator(xs, ys, n_samples)
    
    # Learn with GPR
    kernel = config['training']['kernel']
    gpr = GaussianProcessRegressor(
        kernel=kernel, 
        alpha=config['training']['noise_level_train'], 
        optimizer=None).fit(xs_train,ys_train)
    (post_mean, post_sd) = gpr.predict(xs, return_std=True)
    K = kernel(xs_train)
    
    # Generate scalings to be tested
    scalings = config['scalings_generator'](K)
    
    # Check bounds    
    upper_bounds = post_mean.reshape([1,-1]) + scalings.reshape([-1,1])*post_sd.reshape([1,-1])
    lower_bounds = post_mean.reshape([1,-1]) - scalings.reshape([-1,1])*post_sd.reshape([1,-1])
    
    return (scalings, check_bounds_on_grid(ys, upper_bounds, lower_bounds))

def run_function_instance(config, func_id):
    # Generate function
    func_config = config['target_function']
    xs = func_config['xs'].reshape([-1,1])
    kernel = func_config['kernel']
    rkhs_norm = func_config['rkhs_norm']
    n_kernels = func_config['n_kernels']
    ys = sample_rkhs_func_from_kernels(xs, rkhs_norm, n_kernels, kernel)
    
    # Run learning instances
    instance_config = {
        'dataset_generation': config['dataset_generation'],
        'training': config['training'],
        'scalings_generator': config['scalings_generator']
    }
    instance_experiment = lambda: run_learning_instance_experiment(xs, ys, instance_config)
    results = Parallel(n_jobs=config['n_jobs'])(delayed(instance_experiment)()
                                                for _ in range(config['n_rep_training']))
    
    scalings_list = [result[0] for result in results]
    failures_list = [result[1] for result in results]
    scalings_array = np.vstack(scalings_list)
    failures_array = np.vstack(failures_list)
    
    # Save
    with open('outputs/' + config['experiment_prefix'] + '_func_' + str(func_id) + '.npz', 'wb') as f:
        np.savez(f, xs=xs, ys=ys, scalings=scalings_array, failures=failures_array)