-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexperiments.txt
69 lines (65 loc) · 2.85 KB
/
experiments.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
batch_size = 5000 # number of experiences to sample
discount_factor = 0.95 # used in q-learning equation (Bellman equation)
target_model = copy.deepcopy(model)
replay_buffer = deque(maxlen=15000) # [(state, action, reward, next_state, done),...]
learning_rate = 1e-4 # optimizer for gradient descent
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
no_episodes = 50000
no_episodes_to_reach_epsilon = 10000
min_epsilon = 0.01
no_episodes_before_training = 2000
no_episodes_before_updating_target = 100
use_double_dqn = True
batch_size = 5000 # number of experiences to sample
discount_factor = 0.95 # used in q-learning equation (Bellman equation)
target_model = copy.deepcopy(model)
replay_buffer = deque(maxlen=15000) # [(state, action, reward, next_state, done),...]
learning_rate = 1e-4 # optimizer for gradient descent
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
no_episodes = 50000
no_episodes_to_reach_epsilon = 10000
min_epsilon = 0.01
no_episodes_before_training = 2000
no_episodes_before_updating_target = 100
use_double_dqn = False
batch_size = 10000 # number of experiences to sample
discount_factor = 0.95 # used in q-learning equation (Bellman equation)
target_model = copy.deepcopy(model)
replay_buffer = deque(maxlen=50000) # [(state, action, reward, next_state, done),...]
learning_rate = 1e-4 # optimizer for gradient descent
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
no_episodes = 50000
no_episodes_to_reach_epsilon = 10000
min_epsilon = 0.01
no_episodes_before_training = 2000
no_episodes_before_updating_target = 100
use_double_dqn = True
batch_size = 5000 # number of experiences to sample
discount_factor = 0.95 # used in q-learning equation (Bellman equation)
target_model = copy.deepcopy(model)
replay_buffer = deque(maxlen=15000) # [(state, action, reward, next_state, done),...]
learning_rate = 1e-4 # optimizer for gradient descent
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
no_episodes = 50000
no_episodes_to_reach_epsilon = 10000
min_epsilon = 0.01
no_episodes_before_training = 2000
no_episodes_before_updating_target = 10
use_double_dqn = True
batch_size = 5000 # number of experiences to sample
discount_factor = 0.80 # used in q-learning equation (Bellman equation)
target_model = copy.deepcopy(model)
replay_buffer = deque(maxlen=15000) # [(state, action, reward, next_state, done),...]
learning_rate = 1e-4 # optimizer for gradient descent
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
no_episodes = 50000
no_episodes_to_reach_epsilon = 10000
min_epsilon = 0.01
no_episodes_before_training = 2000
no_episodes_before_updating_target = 100
use_double_dqn = True