-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_rating_script.py
38 lines (26 loc) · 1.58 KB
/
test_rating_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
class Agent:
""" The Agent class defines a stochastic engine which plays a game where there is an optimal
or 'confident' strategy. The skill rating of the agent is updated using a Bayesian model
whenever the it makes a move using the 'update_dens' function
"""
def __init__(self, min_comp, max_comp, delta, epsilon = 0.001) -> None:
self.delta = delta
self.epsilon = epsilon
self.min_comp = min_comp; self.max_comp = max_comp;
self.intval_length = self.max_comp - self.min_comp
self.comp_arr = np.arange(self.min_comp, self.max_comp + self.epsilon, self.epsilon )
self.comp_dens_arr = (1/np.shape(self.comp_arr)[0]) * np.ones(np.shape(self.comp_arr)[0])
def get_skill(self) -> np.float64:
"""Returns the mean of comp_arr w.r.t comp_dens_arr, i.e. the expected skill of Agent"""
return np.average(self.comp_arr, weights = self.comp_dens_arr)
def play_game(self, evals) -> int:
"""Picks a move according to its evaluation from 'evals' with skill 'apparent_comp'."""
apparent_comp = np.random.choice(self.comp_arr, size = 1, p = self.comp_dens_arr )
return NotImplementedError
def update_dens(self, evals, choice) -> None:
"""Updates self.comp_dens_arr according to an observation evals[choice]."""
v_max = np.max(evals)
K = 0.01
cond_prob_arr = (v_max - evals[choice] + K)**(-self.comp_arr)
self.comp_dens_arr = (self.comp_dens_arr*cond_prob_arr)/np.sum(cond_prob_arr * self.comp_dens_arr)