-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsequentialg.py
79 lines (69 loc) · 2.65 KB
/
sequentialg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
from .utils import *
class SequentiAlg:
def __init__(self, bandit, name="Sequential", params={'init': -np.inf}):
self.bandit = bandit
self.name = name
self.params = params
self.reset()
def reset(self, horizon=None):
self.all_selected = False
self.means = np.zeros(self.bandit.nbr_arms)
self.nbr_pulls = np.zeros(self.bandit.nbr_arms, int)
self.indices = np.zeros(self.bandit.nbr_arms) + self.params['init']
self.time = 0
if self.params is not None:
if 'set_constants_func' in self.params.keys():
self.params['set_constants_func'](self.params, bandit, horizon)
def __repr__(self):
res = f"{self.name} algorithm - time step = {self.time}\n"
for i in range(self.bandit.nbr_arms):
res += " "
res += str(self.bandit.arms[i])
res += " : "
res += f"est. mean = {self.means[i]:.3f} - "
res += f"nbr. pulls = {self.nbr_pulls[i]}\n"
return res
def __str__(self):
res = f"{self.name} algorithm - time step = {self.time}\n"
for i in range(self.bandit.nbr_arms):
res += " "
res += str(self.bandit.arms[i])
res += " : "
res += f"est. mean = {self.means[i]:.3f} - "
res += f"nbr. pulls = {self.nbr_pulls[i]}\n"
return res
def update_statistics(self, arm, r):
self.time += 1
self.means[arm] = (self.means[arm]*self.nbr_pulls[arm]+r)/(self.nbr_pulls[arm]+1)
self.nbr_pulls[arm] += 1
if not self.all_selected:
self.all_selected = np.all(self.nbr_pulls > 0)
self.compute_indices()
def pull(self, arm):
r = self.bandit.pull(arm)
self.update_statistics(arm, r)
return r
def compute_indices(self):
if self.all_selected:
# probably faster computation is possible
self.indices = np.random.rand(self.bandit.nbr_arms)
else:
# probably slower computation
self.indices = np.random.randn(self.bandit.nbr_arms)
def choose_an_arm(self):
return randamin(self.indices)
def play(self):
arm = self.choose_an_arm()
r = self.pull(arm)
return arm, r
def fit(self, horizon, reset=True, experiment=None):
if reset:
self.reset(horizon)
if experiment is not None:
for t in range(horizon):
arm, r = self.play()
experiment(arm, r, t)
else:
for _ in range(horizon):
arm, r = self.play()