-
Notifications
You must be signed in to change notification settings - Fork 121
/
Copy pathrun.py
executable file
·70 lines (60 loc) · 1.95 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
# Copyright (c) 2018 Roma Sokolkov
# MIT License
import os
import gym
import numpy as np
from stable_baselines.ddpg.policies import LnMlpPolicy
from stable_baselines.ddpg.noise import OrnsteinUhlenbeckActionNoise
from ddpg_with_vae import DDPGWithVAE as DDPG
from vae.controller import VAEController
# Registers donkey-vae-v0 gym env.
import donkey_gym_wrapper
env = gym.make('donkey-vae-v0')
PATH_MODEL_VAE = "vae.json"
# Final filename will be PATH_MODEL_DDPG + ".pkl"
PATH_MODEL_DDPG = "ddpg"
# Initialize VAE model and add it to gym environment.
# VAE does image post processing to latent vector and
# buffers raw image for future optimization.
vae = VAEController()
env.unwrapped.set_vae(vae)
# Run in test mode of trained models exist.
if os.path.exists(PATH_MODEL_DDPG + ".pkl") and \
os.path.exists(PATH_MODEL_VAE):
print("Task: test")
ddpg = DDPG.load(PATH_MODEL_DDPG, env)
vae.load(PATH_MODEL_VAE)
obs = env.reset()
while True:
action, _states = ddpg.predict(obs)
print(action)
obs, reward, done, info = env.step(action)
if done:
env.reset()
env.render()
# Run in training mode.
else:
print("Task: train")
# the noise objects for DDPG
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(
mean=np.zeros(n_actions),
theta=float(0.6) * np.ones(n_actions),
sigma=float(0.2) * np.ones(n_actions)
)
ddpg = DDPG(LnMlpPolicy,
env,
verbose=1,
batch_size=64,
clip_norm=5e-3,
gamma=0.9,
param_noise=None,
action_noise=action_noise,
memory_limit=10000,
nb_train_steps=3000,
)
ddpg.learn(total_timesteps=3000, vae=vae, skip_episodes=10)
# Finally save model files.
ddpg.save(PATH_MODEL_DDPG)
vae.save(PATH_MODEL_VAE)