-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhpo_optimize.py
248 lines (209 loc) · 10.3 KB
/
hpo_optimize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import subprocess
import json
import argparse
import numpy as np
from ConfigSpace import ConfigurationSpace
from ConfigSpace.hyperparameters import (CategoricalHyperparameter,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
NormalFloatHyperparameter)
from ConfigSpace.conditions import InCondition
from dehb import DEHB
from smac.facade.smac_hpo_facade import SMAC4HPO
from smac.scenario.scenario import Scenario
RANDOMSTATES = [42, 5, 725]
# To remove naming conflicts between agents, must be 2 characters long
PREFIXES = {"Tree": "t_", "Flood": "f_", "Mobility": "m_"}
def snake_from_config_wrapper(type, num_opponents, num_games_per_eval, timeout):
def snake_from_config(cfg, *args, **kwargs):
""" Run a rust snake with given configuration for NUM_GAMES_PER_EVAL games.
Resulting score is percentage of games won.
Parameters:
----------------
cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration)
Contains indexable parameters that define a snake.
budget: timeout passed to snakes, affecting their cutoff point
num_opponents: number of opponents the snake plays against during evaluation [1, 3]
Returns:
----------------
Percentage of won games. Crashing configurations automatically evaluate to the worst score.
"""
# Turn Configuration into dict and remove None-values, remove prefixes
cfg = {cfg['agent']: {k[2:]: cfg[k]
for k in cfg if cfg[k] and k != 'agent'}}
# assemble cmd line call for the snake to be evaluated
# if budget is None:
# budget = kwargs["max_budget"]
fitness = 0.0
time_used = 0.0
opponents = ["'{\"Flood\":{}}'"] * num_opponents
for rs in RANDOMSTATES:
call = ["cargo run --release --bin simulate --"
f" '{json.dumps(cfg)}' " + " ".join(opponents) +
f" --game-count {num_games_per_eval}"
f" --timeout {timeout}"
f" --seed {rs}"
]
run = subprocess.run(call, capture_output=True,
shell=True, text=True, check=True)
# extract games won and time taken from output
games_won = run.stdout[9:run.stdout.find(",")]
# inverse to minimize (treat games won as loss)
fitness += 1.0 - int(games_won) / num_games_per_eval
stderr = run.stderr
time_used += float(stderr[stderr.rfind(f" "):stderr.rfind("ms")])
fitness /= len(RANDOMSTATES)
time_used / len(RANDOMSTATES)
if type == "DEHB":
return {"fitness": fitness,
"cost": time_used,
"info": {}}
return fitness
return snake_from_config
def get_cs(agentlist=["Flood"]):
"""Return full ConfigurationSpace for given list of agents."""
cs = ConfigurationSpace()
agent = CategoricalHyperparameter(
"agent",
agentlist,
default_value="Flood" if "Flood" in agentlist else agentlist[0]
)
cs.add_hyperparameter(agent)
if "Tree" in agentlist:
tree_hps = {
"mobility": 0.7,
"mobility_decay": 0.0,
"health": 0.012,
"health_decay": 0.0,
"len_advantage": 1.0,
"len_advantage_decay": 0.0,
"food_ownership": 0.65,
"food_ownership_decay": 0.0,
"centrality": 0.1,
"centrality_decay": 0.0,
}
for hp in tree_hps:
tree_hps[hp] = UniformFloatHyperparameter(
PREFIXES["Tree"] + hp, lower=0, upper=1, default_value=tree_hps[hp])
cs.add_hyperparameter(tree_hps[hp])
cs.add_condition(InCondition(tree_hps[hp], agent, ["Tree"]))
if "Mobility" in agentlist:
mobility_hps = {
"health_threshold": UniformIntegerHyperparameter(PREFIXES["Mobility"] + "health_threshold",
lower=0, upper=100, default_value=35),
"min_len": UniformIntegerHyperparameter(PREFIXES["Mobility"] + "min_len",
lower=0, upper=121, default_value=8),
"first_move_cost": UniformFloatHyperparameter(PREFIXES["Mobility"] + "first_move_cost",
lower=0, upper=3, default_value=1.0),
}
for hp in mobility_hps:
cs.add_hyperparameter(mobility_hps[hp])
cs.add_condition(InCondition(
mobility_hps[hp], agent, ["Mobility"]))
if "Flood" in agentlist:
flood_hps = {
"health": {"min": -1.5, "max": 1.5, "default": 0.045, "log": False},
"food_distance": {"min": -2, "max": 2, "default": 0.415, "log": False},
"space": {"min": -6, "max": 6, "default": 0.9, "log": False},
"space_adv": {"min": -6, "max": 6, "default": 0.9, "log": False},
"size_adv": {"min": -15, "max": 15, "default": 6.4, "log": False},
"size_adv_decay": {"min": 1e-10, "max": 0.1, "default": 1e-10, "log": True},
}
for hp in flood_hps:
flood_hps[hp] = UniformFloatHyperparameter(PREFIXES["Flood"] + hp,
lower=flood_hps[hp]["min"],
upper=flood_hps[hp]["max"],
default_value=flood_hps[hp]["default"],
log=flood_hps[hp]["log"])
cs.add_hyperparameter(flood_hps[hp])
cs.add_condition(InCondition(flood_hps[hp], agent, ["Flood"]))
return cs
def optim_dehb(args):
if args.runcount_limit == 0:
args.runcount_limit = None
f = snake_from_config_wrapper(
"DEHB", args.num_opponents, args.num_games_per_eval, args.timeout)
dehb = DEHB(f=f,
cs=cs,
dimensions=len(cs.get_hyperparameters()),
min_budget=1,
max_budget=2,
n_workers=args.n_jobs,
output_path=args.output_dir,
save_smac=True)
def_value = f(cs.get_default_configuration())
print(
f"Default Configuration evaluates to a win percentage of {(1 - def_value['fitness']) * 100:.2f}%")
print(f"Starting Opimization of {args.runcount_limit} configurations...")
trajectory, runtime, history = dehb.run(
fevals=args.runcount_limit,
total_cost=args.walltime,
verbose=True,
# parameters expected as **kwargs in target_function is passed here (surpassed by wrapper)
)
best_config = dehb.vector_to_configspace(dehb.inc_config)
inc_value = f(best_config)
print(
f"Optimized Configuration {best_config} evaluates to a win percentage of {(1 - inc_value['fitness']) * 100:.2f}%")
def optim_smac(args):
tae = snake_from_config_wrapper(
"SMAC", args.num_opponents, args.num_games_per_eval, args.timeout)
def_value = tae(cs.get_default_configuration(),
0,
num_opponents=args.num_opponents,
num_games_per_eval=args.num_games_per_eval,
timeout=args.timeout)
print(
f"Default Configuration evaluates to a win percentage of {(1 - def_value) * 100:.2f}%")
print(
f"Starting Opimization with walltime of {args.walltime/3600:.2f} hours...")
scenario = Scenario({
"run_obj": "quality",
"runcount-limit": args.runcount_limit,
"cs": cs,
"deterministic": True,
"wallclock_limit": args.walltime,
"output_dir":args.output_dir,
"shared_model": args.n_jobs > 1,
"input_psmac_dirs": args.output_dir,
})
smac = SMAC4HPO(scenario=scenario,
rng=np.random.RandomState(RANDOMSTATES[0]),
tae_runner=tae)
try:
incumbent = smac.optimize()
finally:
incumbent = smac.solver.incumbent
inc_value = tae(incumbent)
print(
f"Optimized Configuration {incumbent} evaluates to a win percentage of {(1 - inc_value) * 100:.2f}%")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Optimize hyperparameters of the rust snake.")
parser.add_argument("--optimizer", type=str, choices=["DEHB", "SMAC"], default="DEHB",
help="Which optimizer is used for finding optimal hyperparameters.")
parser.add_argument("--agents", choices=["Flood", "Tree", "Mobility"], nargs="+", default=["Flood"],
help="Which agents are included in the search space.")
parser.add_argument("--num_games_per_eval", type=int, default=1000,
help="Number of games that each snake plays for evaluation")
parser.add_argument("--timeout", type=int, default=0,
help="Timeout parameter passed directly to the snake. Larger values impact execution time a lot.")
parser.add_argument("--runcount_limit", type=int, default=10000,
help="How many different configurations are evaluated at maximum. DEHB prioritizes this over runtime.")
parser.add_argument("--walltime", type=int, default=10*60*60,
help="Max time in seconds optimizer runs for. DEHB overwrites this with runtime, pass 0 as runcount_limit to prevent this..")
parser.add_argument("-j", "--n_jobs", type=int, default=8,
help="Number of workers that are started. Uses pSMAC if optimizer == SMAC.")
parser.add_argument("-o", "--output_dir", type=str, default="./optim",
help="Location of log directory. pSMAC for example uses it also for internal communication,"
" so if using SMAC make sure that the directory is empty. '_output' will be appended.")
parser.add_argument("--num_opponents", type=int, default=1,
help="Number of opponents agent is trained against: [1,3]")
args = parser.parse_args()
args.output_dir += "_output"
cs = get_cs(args.agents)
if args.optimizer == "DEHB":
optim_dehb(args)
elif args.optimizer == "SMAC":
optim_smac(args)
# evaluation and hyperparameter analysis -> PIMP