-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample_use.py
115 lines (110 loc) · 3.83 KB
/
example_use.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from empowerment import mazeworld
from empowerment.mazeworld import MazeWorld
from empowerment import compute_empowerment, EmpowermentMaximiser
import numpy as np
import matplotlib.pyplot as plt
import time
def example_1():
""" builds maze world from original empowerment paper(https://uhra.herts.ac.uk/bitstream/handle/2299/1918/901933.pdf?sequence=1) and plots empowerment landscape.
"""
maze = MazeWorld(10,10)
for i in range(6):
maze.add_wall( (1, i), "N" )
for i in range(2):
maze.add_wall( (i+2, 5), "E")
maze.add_wall( (i+2, 6), "E")
maze.add_wall( (3, 6), "N")
for i in range(2):
maze.add_wall( (1, i+7), "N")
for i in range(3):
maze.add_wall( (5, i+2), "N")
for i in range(2):
maze.add_wall( (i+6, 5), "W")
maze.add_wall( (6, 4), "N")
maze.add_wall( (7, 4), "N")
maze.add_wall( (8, 4), "W")
maze.add_wall( (8, 3), "N")
# compute the 5-step empowerment at each cell
E = maze.compute_empowerment(n_step=5)
# plot the maze world
maze.plot(colorMap=E)
plt.title('5-step empowerment')
plt.show()
def example_2():
""" Builds grid world with doors and plots empowerment landscape """
maze = MazeWorld(8,8)
for i in range(maze.width):
if i is not 6 : maze.add_wall([2, i], "N")
for i in range(maze.width):
if i is not 2 : maze.add_wall([5, i], "N")
n_step = 4
E = maze.compute_empowerment(n_step=n_step, n_samples=8000)
maze.plot(colorMap=E)
plt.title('%i-step empowerment' % n_step)
plt.show()
def example_3():
""" Runs empowerment maximising agent running in a chosen grid world """
# maze = klyubin_world()
maze = mazeworld.door_world()
emptymaze = MazeWorld(maze.height, maze.width)
# maze = mazeworld.tunnel_world()
n_step = 3
start = time.time()
initpos = np.random.randint(maze.dims[0], size=2)
initpos = [1,4]
s = maze._cell_to_index(initpos)
T = emptymaze.compute_model()
B = maze.compute_model()
E = maze.compute_empowerment(n_step = n_step).reshape(-1)
n_s, n_a, _ = T.shape
agent = EmpowermentMaximiser(alpha=0.1, gamma=0.9, T = T, n_step=n_step, n_samples=1000, det=1.)
steps = int(10000)
visited = np.zeros(maze.dims)
tau = np.zeros(steps)
D_emp = np.zeros(steps)
D_mod = n_s*n_a*np.ones(steps)
for t in range(steps):
# append data for plotting
tau[t] = agent.tau
D_emp[t] = np.mean((E - agent.E)**2)
D_mod[t] = D_mod[t] - np.sum(np.argmax(agent.T, axis=0) == np.argmax(B, axis=0))
a = agent.act(s)
pos = maze._index_to_cell(s)
visited[pos[0],pos[1]] += 1
s_ = maze.act(s,list(maze.actions.keys())[a])
agent.update(s,a,s_)
s = s_
print("elapsed seconds: %0.3f" % (time.time() - start) )
plt.figure(1)
plt.title("value map")
Vmap = np.max(agent.Q, axis=1).reshape(*maze.dims)
maze.plot(colorMap= Vmap )
plt.figure(2)
plt.title("subjective empowerment")
maze.plot(colorMap= agent.E.reshape(*maze.dims))
plt.figure(3)
plt.title("tau")
plt.plot(tau)
plt.figure(4)
plt.scatter(agent.E, visited.reshape(n_s))
plt.xlabel('true empowerment')
plt.ylabel('visit frequency')
plt.figure(5)
plt.title("visited")
maze.plot(colorMap=visited.reshape(*maze.dims))
fig, ax1 = plt.subplots()
red = 'tab:red'
ax1.set_xlabel('time')
ax1.set_ylabel('MSE of empowerment map', color=red)
ax1.plot(D_emp, color=red)
ax1.tick_params(axis='y', labelcolor=red)
ax2 = ax1.twinx()
ax2.set_ylabel('Model disagreement', color='tab:blue')
ax2.plot(D_mod, color='tab:blue')
ax2.tick_params(axis='y', labelcolor='tab:blue')
plt.show()
if __name__ == "__main__":
## uncomment below to see examples
example_1()
# example_2()
# example_3()