-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfigs.yaml
172 lines (161 loc) · 3.55 KB
/
configs.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
defaults:
logdir: /fzi/ids/jk639/no_backup/data/dreamer3iris
# logdir: s3://tks-zx.fzi.de:9000/clearml/bogdoll/rl_traffic_rule_Jing/dreamer3
traindir: null
evaldir: null
offline_traindir: ''
offline_evaldir: ''
seed: 0
steps: 4e4
eval_every: 4e2
eval_episode_num: 1
log_every: 4e2
reset_every: 0
device: 'cuda'
compile: True
precision: 32
debug: False
expl_gifs: False
video_pred_log: True
# Environment
# task: 'dmc_walker_walk'
task: 'carla_1'
size: [128, 128]
envs: 1
action_repeat: 1
time_limit: 40
grayscale: False
prefill: 100
eval_noise: 0.0
reward_EMA: True
# Model
dyn_cell: 'gru_layer_norm'
dyn_hidden: 512
dyn_deter: 512
dyn_stoch: 32
dyn_discrete: 32
dyn_input_layers: 1
dyn_output_layers: 1
dyn_rec_depth: 1
dyn_shared: False
dyn_mean_act: 'none'
dyn_std_act: 'sigmoid2'
dyn_min_std: 0.1
dyn_temp_post: True
grad_heads: ['decoder', 'reward', 'cont']
units: 512
reward_layers: 2
cont_layers: 2
value_layers: 2
actor_layers: 2
act: 'SiLU'
norm: 'LayerNorm'
encoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
decoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse}
value_head: 'symlog_disc'
reward_head: 'symlog_disc'
dyn_scale: '0.5'
rep_scale: '0.1'
kl_free: '1.0'
cont_scale: 1.0
reward_scale: 1.0
weight_decay: 0.0
unimix_ratio: 0.01
action_unimix_ratio: 0.01
initial: 'learned'
# Training
batch_size: 16
batch_length: 64
train_ratio: 512
pretrain: 100
model_lr: 1e-4
opt_eps: 1e-8
grad_clip: 1000
value_lr: 3e-5
actor_lr: 3e-5
ac_opt_eps: 1e-5
value_grad_clip: 100
actor_grad_clip: 100
dataset_size: 0
slow_value_target: True
slow_target_update: 1
slow_target_fraction: 0.02
opt: 'adam'
# Behavior.
discount: 0.997
discount_lambda: 0.95
imag_horizon: 15
imag_gradient: 'dynamics'
imag_gradient_mix: '0.0'
imag_sample: True
actor_dist: 'onehot'
actor_entropy: '3e-4'
actor_state_entropy: 0.0
actor_init_std: 1.0
actor_min_std: 0.1
actor_max_std: 1.0
actor_temp: 0.1
expl_amount: 0.0
eval_state_mean: False
collect_dyn_sample: True
behavior_stop_grad: True
value_decay: 0.0
future_entropy: False
# Exploration
expl_behavior: 'greedy'
expl_until: 0
expl_extr_scale: 0.0
expl_intr_scale: 1.0
disag_target: 'stoch'
disag_log: True
disag_models: 10
disag_offset: 1
disag_layers: 4
disag_units: 400
disag_action_cond: False
dmc_vision:
steps: 1e6
train_ratio: 512
video_pred_log: true
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
dmc_proprio:
steps: 5e5
train_ratio: 512
video_pred_log: false
encoder: {mlp_keys: '.*', cnn_keys: '$^'}
decoder: {mlp_keys: '.*', cnn_keys: '$^'}
atari100k:
steps: 4e5
action_repeat: 4
eval_episode_num: 100
stickey: False
lives: unused
noops: 30
resize: opencv
actions: needed
actor_dist: 'onehot'
train_ratio: 1024
imag_gradient: 'reinforce'
time_limit: 1000
carla_vision:
steps: 4e4
action_repeat: 1
eval_episode_num: 20
stickey: False
lives: unused
noops: 30
resize: opencv
actions: needed
actor_dist: 'onehot'
train_ratio: 128
imag_gradient: 'reinforce'
time_limit: 1000
debug:
debug: True
pretrain: 1
prefill: 1
batch_size: 10
batch_length: 20