-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_data.py
102 lines (75 loc) · 3.05 KB
/
plot_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import argparse
import os
import json
import matplotlib.pyplot as plt
import numpy as np
script_dir = os.path.dirname(os.path.realpath(__file__))
plotting_dir = os.path.join(script_dir, "../plotting_data")
parser = argparse.ArgumentParser("python plot_data.py")
parser.add_argument("algorithm", help="the algorithm (dqn, ppo or a2c)", type=str, nargs='?')
args = parser.parse_args()
if args.algorithm:
algorithm = args.algorithm
else:
exit()
os.makedirs(plotting_dir, exist_ok=True)
# plot everything on the same plot
ax = None
fig, ax = plt.subplots()
xlimit = 100
model = ['CartPole-v1']
# set y axis depending on the model (negative / positive rewards)
if 'CartPole-v1' in model:
ax.axis([1, xlimit, 0, 600])
elif 'Acrobot-v1' in model:
ax.axis([1, xlimit, 0, -600])
elif 'MountainCar-v0' in model:
ax.axis([1, xlimit, 0, -250])
fig.suptitle(algorithm.upper())
with open(os.path.join(plotting_dir, "%s" % (algorithm)), 'r') as f:
lines = f.readlines()
data_to_be_plotted = {
"pbt": [],
"sma": [],
"rs_": [],
}
# first filter data
for line in lines:
data = json.loads(line)
search_method = ''
for sm in ['pbt', 'sma', 'rs_']:
if sm in data:
iterations_and_rewards = data[sm]
search_method = sm
data_to_be_plotted[search_method].append(iterations_and_rewards)
# calculate mean and std deviation of the filtered data of the 3 different seeds
for search_method, data in data_to_be_plotted.items():
# check that we have the same amount of data points b4 computing mean
for data_length in [len(d[1]) for d in data]:
if data_length != len(data[0][1]):
raise RuntimeError("It looks like for different seeds we have different lengths of iterations")
reward_arrays = [d[1] for d in data]
mean_reward_across_seeds = np.mean([reward_arrays[0], reward_arrays[1], reward_arrays[2]], axis=0)
std_deviation_across_seeds = np.std([reward_arrays[0], reward_arrays[1], reward_arrays[2]], axis=0)
std_err = std_deviation_across_seeds / np.sqrt(3)
# TODO What about smac iterations? Are they always the same length as well?
iterations = data[0][0]
if search_method == 'pbt':
label = 'PBT'
elif search_method == 'sma':
label = 'SMAC'
elif search_method == 'rs_':
label = 'RS'
plt.plot(iterations, mean_reward_across_seeds, label=label)
plt.fill_between(iterations,
np.add(mean_reward_across_seeds, std_err),
np.subtract(mean_reward_across_seeds, std_err),
alpha=0.25)
plt.legend(loc='best')
plt.xlabel("training iteration")
plt.ylabel("mean reward")
script_dir = os.path.dirname(os.path.realpath(__file__))
plotting_files_dir = os.path.join(script_dir, "../plots_final")
os.makedirs(plotting_files_dir, exist_ok=True)
plt.savefig(os.path.join(plotting_files_dir, algorithm+'.jpg'))
plt.show()