forked from ericyangyu/PPO-for-Beginners
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_graph.py
358 lines (297 loc) · 12.2 KB
/
make_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
"""
This file graphs the data in graph_data for each environment
"""
import os
import numpy as np
import matplotlib.pyplot as plt
def get_file_locations():
"""
Gets the absolute paths of each data file to graph.
Parameters:
None
Return:
paths - a dict with the following structure:
{
env: {
seeds: absolute_seeds_file_path
stable_baselines: [absolute_file_paths_to_data],
ppo_for_beginners: [absolute_file_paths_to_data]
}
}
"""
# Get the absolute path of current working directory,
# and append graph data to it
data_path = os.getcwd() + '/graph_data/'
# extract environments from envs
envs = [env for env in os.listdir(data_path)
if os.path.isdir(os.path.join(data_path,env))]
# Stores the file names.
# Structure will be:
# {
# env: {
# seeds: absolute_seeds_file_path
# stable_baselines: [absolute_file_paths_to_data],
# ppo_for_beginners: [absolute_file_paths_to_data]
# }
# }
paths = {}
for env in envs:
# Sub-dict in env as listed above
env_data = {}
# Extract out absolute paths of seeds.txt and data for both codes
for directory, _, filenames in os.walk(data_path + env):
if 'seeds.txt' in filenames:
env_data['seeds'] = directory + '/' + filenames[0]
elif 'stable_baselines' in directory:
env_data['stable_baselines'] = [directory + '/' + filename
for filename in filenames]
elif 'ppo_for_beginners' in directory:
env_data['ppo_for_beginners'] = [directory + '/' + filename
for filename in filenames]
# save the environment data into our outer paths dict
paths[env] = env_data
return paths
def extract_ppo_for_beginners_data(env, filename):
"""
Extract the total timesteps and average episodic return from the logging
data specified to PPO for Beginners.
Parameters:
env - The environment we're currently graphing.
filename - The file containing data. Should be "seed_xxx.txt" such that the
xxx are integers
Return:
x - the total timesteps at each iteration
y - average episodic return at each iteration
"""
# x is timesteps so far, y is average episodic reward
x, y = [], []
# extract out x's and y's
with open(filename, 'r') as f:
for l in f:
l = [e.strip() for e in l.split(':')]
if 'Average Episodic Return' in l:
y.append(float(l[1]))
if 'Timesteps So Far' in l:
x.append(int(l[1]))
return x, y
def extract_stable_baselines_data(env, filename):
"""
Extract the total timesteps and average episodic return from the logging
data specified to Stable Baselines PPO2.
Parameters:
env - The environment we're currently graphing.
filename - The file containing data. Should be "seed_xxx.txt" such that the
xxx are integers
Return:
x - the total timesteps at each iteration
y - average episodic return at each iteration
"""
# x is timesteps so far, y is average episodic reward
x, y = [], []
# extract out x's and y's
with open(filename, 'r') as f:
for l in f:
l = [e.strip() for e in l.split('|')]
if 'ep_reward_mean' in l:
y.append(float(l[2]))
if 'total_timesteps' in l:
x.append(int(l[2]))
return x, y
def calculate_lower_bounds(x_s, y_s):
"""
Calculate lower bounds of total timesteps and average episodic
return per iteration.
Parameters:
x_s - A list of lists of total timesteps so far per seed.
y_s - A list of lists of average episodic return per seed.
Return:
Lower bounds of both x_s and y_s
"""
# x_low is lower bound of timesteps so far, y is lower bound of average episodic reward
x_low, y_low = x_s[0], y_s[0]
# Find lower bound amongst all trials per iteration
for xs, ys in zip(x_s[1:], y_s[1:]):
x_low = [x if x < x_low[i] else x_low[i] for i, x in enumerate(xs)]
y_low = [y if y < y_low[i] else y_low[i] for i, y in enumerate(ys)]
return x_low, y_low
def calculate_upper_bounds(x_s, y_s):
"""
Calculate upper bounds of total timesteps and average episodic
return per iteration.
Parameters:
x_s - A list of lists of total timesteps so far per seed.
y_s - A list of lists of average episodic return per seed.
Return:
Upper bounds of both x_s and y_s
"""
# x_low is upper bound of timesteps so far, y is upper bound of average episodic reward
x_high, y_high = x_s[0], y_s[0]
# Find upper bound amongst all trials per iteration
for xs, ys in zip(x_s[1:], y_s[1:]):
x_high = [x if x > x_high[i] else x_high[i] for i, x in enumerate(xs)]
y_high = [y if y > y_high[i] else y_high[i] for i, y in enumerate(ys)]
return x_high, y_high
def calculate_means(x_s, y_s):
"""
Calculate mean of each total timestep and average episodic return over all
trials at each iteration.
Parameters:
x_s - A list of lists of total timesteps so far per seed.
y_s - A list of lists of average episodic return per seed
Return:
Means of x_s and y_s
"""
if len(x_s) == 1:
return x_s, y_s
return list(np.mean(x_s, axis=0)), list(np.mean(y_s, axis=0))
def clip_data(x_s, y_s):
"""
In the case that there are different number of iterations
across learning trials, clip all trials to the length of the shortest
trial.
Parameters:
x_s - A list of lists of total timesteps so far per seed.
y_s - A list of lists of average episodic return per seed
Return:
x_s and y_s after clipping both.
"""
# Find shortest trial length
x_len_min = min([len(x) for x in x_s])
y_len_min = min([len(y) for y in y_s])
len_min = min([x_len_min, y_len_min])
# Clip each trial in x_s to shortest trial length
for i in range(len(x_s)):
x_s[i] = x_s[i][:len_min]
# Clip each trial in y_s to shortest trial length
for i in range(len(y_s)):
y_s[i] = y_s[i][:len_min]
return x_s, y_s
def extract_data(paths):
"""
Extracts data from all the files, and returns a generator object
extract_data to iterably return data for each environment.
Number of iterations should equal number of environments in graph_data.
Parameters:
paths - Contains the paths to each data file. Check function description of
get_file_locations() to see how paths is structured.
Return:
A generator object extract_data, or iterable, which will return the data for
each environment on each iteration of the generator.
Note:
If you're unfamiliar with Python generators, check this out:
https://wiki.python.org/moin/Generators
If you're unfamiliar with Python "yield", check this out:
https://stackoverflow.com/questions/231767/what-does-the-yield-keyword-do
"""
for env in paths:
# Extract out seeds tested
seeds_txt = paths[env]['seeds']
seeds = []
with open(seeds_txt, 'r') as f:
for l in f:
seeds.append(int(l))
# Prepare the data dict to return
data = {
'env': '',
'seeds': [],
'ppo_for_beginners': {
'x_mean': [],
'x_low': [],
'x_high': [],
'y_mean': [],
'y_low': [],
'y_high': []
},
'stable_baselines': {
'x_mean': [],
'x_low': [],
'x_high': [],
'y_mean': [],
'y_low': [],
'y_high': []
}
}
# Extract out ppo_beginner datapoints
pfb_x_s, pfb_y_s = [], []
for filename in paths[env]['ppo_for_beginners']:
curr_data = extract_ppo_for_beginners_data(env, filename)
pfb_x_s.append(curr_data[0])
pfb_y_s.append(curr_data[1])
# Extract out stable_baselines datapoints
sb_x_s, sb_y_s = [], []
for filename in paths[env]['stable_baselines']:
curr_data = extract_stable_baselines_data(env, filename)
sb_x_s.append(curr_data[0])
sb_y_s.append(curr_data[1])
# Preprocess ppo_beginner and stable_baselines data
pfb_x_s, pfb_y_s = clip_data(pfb_x_s, pfb_y_s)
sb_x_s, sb_y_s = clip_data(sb_x_s, sb_y_s)
# Process ppo_beginner datapoints for mean, lower, and upper bounds
pfb_x_mean, pfb_y_mean = calculate_means(pfb_x_s, pfb_y_s)
pfb_x_low, pfb_y_low = calculate_lower_bounds(pfb_x_s, pfb_y_s)
pfb_x_high, pfb_y_high = calculate_upper_bounds(pfb_x_s, pfb_y_s)
# Process stable_baselines datapoints for mean, lower, and upper bounds
sb_x_mean, sb_y_mean = calculate_means(sb_x_s, sb_y_s)
sb_x_low, sb_y_low = calculate_lower_bounds(sb_x_s, sb_y_s)
sb_x_high, sb_y_high = calculate_upper_bounds(sb_x_s, sb_y_s)
# Intermediary variables to help us more easily save our data
pfbs = [pfb_x_mean, pfb_x_low, pfb_x_high, pfb_y_mean, pfb_y_low, pfb_y_high]
sbs = [sb_x_mean, sb_x_low, sb_x_high, sb_y_mean, sb_y_low, sb_y_high]
# Fill up data packet
data['env'] = env
data['seeds'] = seeds
for i, data_type in enumerate(['x_mean', 'x_low', 'x_high', 'y_mean', 'y_low', 'y_high']):
data['ppo_for_beginners'][data_type] = pfbs[i]
data['stable_baselines'][data_type] = sbs[i]
# Return current data packet
yield data
def graph_data(paths):
"""
Graphs the data with matplotlib. Will display on screen for user to screenshot.
Parameters:
paths - Contains the paths to each data file. Check function description of
get_file_locations() to see how paths is structured.
Return:
None
"""
for data in extract_data(paths):
# Unpack data packet
env = data['env']
seeds = data['seeds']
pfbs = [pfb_x_mean, _, _, pfb_y_mean, pfb_y_low, pfb_y_high] = [[] for _ in range(6)]
sbs = [sb_x_mean, _, _, sb_y_mean, sb_y_low, sb_y_high] = [[] for _ in range(6)]
for i, data_type in enumerate(['x_mean', 'x_low', 'x_high', 'y_mean', 'y_low', 'y_high']):
pfbs[i] = data['ppo_for_beginners'][data_type]
sbs[i] = data['stable_baselines'][data_type]
pfb_x_mean, _, _, pfb_y_mean, pfb_y_low, pfb_y_high = pfbs
sb_x_mean, _, _, sb_y_mean, sb_y_low, sb_y_high = sbs
# Handle specific case with mountaincarcontinuous
if env == 'MountainCarContinuous-v0':
plt.ylim([-70, 100])
# Plot points
plt.plot(sb_x_mean, sb_y_mean, 'b', alpha=0.8)
plt.plot(pfb_x_mean, pfb_y_mean, 'g', alpha=0.8)
# Plot errors
plt.fill_between(sb_x_mean, sb_y_low, sb_y_high, color='b', alpha=0.3)
plt.fill_between(pfb_x_mean, pfb_y_low, pfb_y_high, color='g', alpha=0.3)
# Set labels
plt.title(f'{env} on Random Seeds {seeds}')
plt.xlabel('Average Total Timesteps So Far')
plt.ylabel('Average Episodic Return')
plt.legend(['Stable Baselines PPO2', 'PPO for Beginners'])
# Show graph so user can screenshot
plt.show()
def main():
"""
Main function to get file locations and graph the data.
Parameters:
None
Return:
None
"""
# Extract absolute file paths
paths = get_file_locations()
# Graph the data from the file paths extracted
graph_data(paths)
if __name__ == '__main__':
main()