Skip to content

Commit

Permalink
training (gliding)
Browse files Browse the repository at this point in the history
  • Loading branch information
AIWintermuteAI committed Aug 26, 2023
1 parent 6a00b2a commit e1f29da
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 128 deletions.
12 changes: 7 additions & 5 deletions isaacgymenvs/cfg/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,17 @@ checkpoint: ''
# set to True to use multi-gpu horovod training
multi_gpu: False

experiment_dir: ''

wandb_activate: False
wandb_group: ''
wandb_name: ${train.params.config.name}
wandb_entity: ''
wandb_project: 'isaacgymenvs'
capture_video: False
capture_video_freq: 7000
capture_video_len: 100
force_render: True
wandb_project: ${task.wandb_project}
capture_video: ${task.capture_video}
capture_video_freq: ${task.capture_video_freq}
capture_video_len: ${task.capture_video_len}
force_render: ${task.force_render}

# disables rendering
headless: False
Expand Down
25 changes: 17 additions & 8 deletions isaacgymenvs/cfg/task/Atlas.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
# used to create the object
name: Atlas

wandb_group: ''
wandb_name: Atlas
wandb_entity: ''
wandb_project: 'atlas'
capture_video: True
capture_video_freq: 2500
capture_video_len: 100
force_render: True

physics_engine: 'physx'

env:
Expand Down Expand Up @@ -35,7 +44,7 @@ env:

randomCommandVelocityRanges:
# train
linear_x: [-1., 1.] # min max [m/s]
linear_x: [-2., 2.] # min max [m/s]
linear_y: [0., 0.] # min max [m/s]
yaw: [-1.57, 1.57] # min max [rad/s]

Expand All @@ -44,7 +53,7 @@ env:
stiffness: 85.0 # [N*m/rad]
damping: 4.0 # [N*m*s/rad]
# action scale: target angle = actionScale * action + defaultAngle
actionScale: 0.5
actionScale: 0.75
# decimation: Number of control action updates @ sim DT per policy DT
decimation: 4

Expand Down Expand Up @@ -76,15 +85,15 @@ env:
allowKneeContacts: true
# rewards
terminalReward: 0.0
linearVelocityXYRewardScale: 1.0
linearVelocityXYRewardScale: 5.0
linearVelocityZRewardScale: -4.0
angularVelocityXYRewardScale: -0.05
angularVelocityZRewardScale: 0.5
orientationRewardScale: -0. #-1.
torqueRewardScale: -0.00002 # -0.000025
orientationRewardScale: -0.5 #-1.
torqueRewardScale: -0.000005 # -0.000025
jointAccRewardScale: -0.0005 # -0.0025
baseHeightRewardScale: -0.0 #5
feetAirTimeRewardScale: 1.0
feetAirTimeRewardScale: 2.0
kneeCollisionRewardScale: -0.25
feetStumbleRewardScale: -0. #-2.0
actionRateRewardScale: -0.01
Expand Down Expand Up @@ -127,8 +136,8 @@ env:
enableCameraSensors: False

sim:
dt: 0.02
substeps: 2
dt: 0.005
substeps: 1
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
Expand Down
14 changes: 3 additions & 11 deletions isaacgymenvs/cfg/train/AtlasPPO.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ params:

network:
name: actor_critic
separate: True
separate: False

space:
continuous:
Expand All @@ -23,22 +23,14 @@ params:
fixed_sigma: True

mlp:
units: [512] #, 256, 128]
units: [256, 128, 64]
activation: elu
d2rl: False

initializer:
name: default
regularizer:
name: None
rnn:
name: lstm
units: 256 #128
layers: 1
before_mlp: False #True
concat_input: True
layer_norm: False


load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
Expand Down Expand Up @@ -75,7 +67,7 @@ params:
bounds_loss_coef: 0.

max_epochs: ${resolve_default:1000,${....max_iterations}}
save_best_after: 200
save_best_after: 0
score_to_win: 20000
save_frequency: 50
print_stats: True
81 changes: 81 additions & 0 deletions isaacgymenvs/cfg/train/AtlasPPO_LSTM.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
params:
seed: ${...seed}

algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: True

space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0. # std = 1.
fixed_sigma: True

mlp:
units: [512] #, 256, 128]
activation: elu
d2rl: False

initializer:
name: default
regularizer:
name: None
rnn:
name: lstm
units: 256 #128
layers: 1
before_mlp: False #True
concat_input: True
layer_norm: False


load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load

config:
name: ${resolve_default:Atlas,${....experiment}}
full_experiment_name: ${.name}
env_name: rlgpu
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
normalize_advantage: True
value_bootstrap: True
clip_actions: False
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 1.0
gamma: 0.99
tau: 0.95
e_clip: 0.2
entropy_coef: 0.001
learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
lr_schedule: adaptive
kl_threshold: 0.008 # target kl for adaptive lr
truncate_grads: True
grad_norm: 1.
horizon_length: 24
minibatch_size: 512
mini_epochs: 5
critic_coef: 2
clip_value: True
seq_len: 4 # only for rnn
bounds_loss_coef: 0.

max_epochs: ${resolve_default:1000,${....max_iterations}}
save_best_after: 0
score_to_win: 20000
save_frequency: 50
print_stats: True
Loading

0 comments on commit e1f29da

Please sign in to comment.