-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathddpg.py
45 lines (32 loc) · 1.71 KB
/
ddpg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# individual network settings for each actor + critic pair
# see networkforall for details
from networkforall import Network
from utilities import hard_update, gumbel_softmax, onehot_from_logits
from torch.optim import Adam
import torch
import numpy as np
# add OU noise for exploration
from OUNoise import OUNoise
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = 'cpu'
class DDPGAgent:
def __init__(self, in_actor, hidden_in_actor, hidden_out_actor, out_actor, in_critic, hidden_in_critic, hidden_out_critic, lr_actor=1.0e-2, lr_critic=1.0e-2):
super(DDPGAgent, self).__init__()
self.actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device)
self.critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device)
self.target_actor = Network(in_actor, hidden_in_actor, hidden_out_actor, out_actor, actor=True).to(device)
self.target_critic = Network(in_critic, hidden_in_critic, hidden_out_critic, 1).to(device)
self.noise = OUNoise(out_actor, scale=1.0 )
# initialize targets same as original networks
hard_update(self.target_actor, self.actor)
hard_update(self.target_critic, self.critic)
self.actor_optimizer = Adam(self.actor.parameters(), lr=lr_actor)
self.critic_optimizer = Adam(self.critic.parameters(), lr=lr_critic, weight_decay=1.e-5)
def act(self, obs, noise=0.0):
obs = obs.to(device)
action = self.actor(obs) + noise*self.noise.noise()
return action
def target_act(self, obs, noise=0.0):
obs = obs.to(device)
action = self.target_actor(obs) + noise*self.noise.noise()
return action