-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexperiments.py
84 lines (64 loc) · 3.56 KB
/
experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import scipy.linalg as la
from utilities import sample_rkhs_func_from_kernels, dataset_generation_uniform_normal, aposteriori_scaling, aposteriori_scalings_generator, aposteriori_rescalings_generator, check_bounds_on_grid
from sklearn.gaussian_process.kernels import RBF, Matern
from sklearn.gaussian_process import GaussianProcessRegressor
from joblib import Parallel, delayed
def run_learning_instance_experiment(xs, ys, config):
"""Run one learning instance on a given target function
A learning instance consists of generating a dataset (details specified by
the 'dataset_generation' entry of config), training with Gaussian Process Regression
(settings specified by the 'training' entry of config) and testing the uniform
validity of various bounds (assuming tube form with widths given by scaled posterior SD
with the scalings generated by the function in the 'scalings_generator' entry in config).
Arguments
xs 1d numpy array containing the inputs for evaluation
ys 1d numpy array containing the target function evaluated on xs
config Dictionary containing the settings for this experimental instance
Returns
A 1d numpy array of length n_bounds, where n_bounds is the number of different widths
generated by the scalings_generator, with 0 if the bound holds and 1 if the
corresponding bound is violated at least once in the evaluation grid.
"""
# Generate training data
n_samples = config['dataset_generation']['n_samples']
dataset_generator = config['dataset_generation']['dataset_generator']
(xs_train, ys_train) = dataset_generator(xs, ys, n_samples)
# Learn with GPR
kernel = config['training']['kernel']
gpr = GaussianProcessRegressor(
kernel=kernel,
alpha=config['training']['noise_level_train'],
optimizer=None).fit(xs_train,ys_train)
(post_mean, post_sd) = gpr.predict(xs, return_std=True)
K = kernel(xs_train)
# Generate scalings to be tested
scalings = config['scalings_generator'](K)
# Check bounds
upper_bounds = post_mean.reshape([1,-1]) + scalings.reshape([-1,1])*post_sd.reshape([1,-1])
lower_bounds = post_mean.reshape([1,-1]) - scalings.reshape([-1,1])*post_sd.reshape([1,-1])
return (scalings, check_bounds_on_grid(ys, upper_bounds, lower_bounds))
def run_function_instance(config, func_id):
# Generate function
func_config = config['target_function']
xs = func_config['xs'].reshape([-1,1])
kernel = func_config['kernel']
rkhs_norm = func_config['rkhs_norm']
n_kernels = func_config['n_kernels']
ys = sample_rkhs_func_from_kernels(xs, rkhs_norm, n_kernels, kernel)
# Run learning instances
instance_config = {
'dataset_generation': config['dataset_generation'],
'training': config['training'],
'scalings_generator': config['scalings_generator']
}
instance_experiment = lambda: run_learning_instance_experiment(xs, ys, instance_config)
results = Parallel(n_jobs=config['n_jobs'])(delayed(instance_experiment)()
for _ in range(config['n_rep_training']))
scalings_list = [result[0] for result in results]
failures_list = [result[1] for result in results]
scalings_array = np.vstack(scalings_list)
failures_array = np.vstack(failures_list)
# Save
with open('outputs/' + config['experiment_prefix'] + '_func_' + str(func_id) + '.npz', 'wb') as f:
np.savez(f, xs=xs, ys=ys, scalings=scalings_array, failures=failures_array)