training (gliding)

AIWintermuteAI · Aug 26, 2023 · e1f29da · e1f29da
1 parent 6a00b2a
commit e1f29da
Show file tree

Hide file tree

Showing 7 changed files with 251 additions and 128 deletions.
diff --git a/isaacgymenvs/cfg/config.yaml b/isaacgymenvs/cfg/config.yaml
@@ -39,15 +39,17 @@ checkpoint: ''
 # set to True to use multi-gpu horovod training
 multi_gpu: False
 
+experiment_dir: ''
+
 wandb_activate: False
 wandb_group: ''
 wandb_name: ${train.params.config.name}
 wandb_entity: ''
-wandb_project: 'isaacgymenvs'
-capture_video: False
-capture_video_freq: 7000
-capture_video_len: 100
-force_render: True
+wandb_project: ${task.wandb_project}
+capture_video: ${task.capture_video}
+capture_video_freq: ${task.capture_video_freq}
+capture_video_len: ${task.capture_video_len}
+force_render: ${task.force_render}
 
 # disables rendering
 headless: False

diff --git a/isaacgymenvs/cfg/task/Atlas.yaml b/isaacgymenvs/cfg/task/Atlas.yaml
@@ -1,6 +1,15 @@
 # used to create the object
 name: Atlas
 
+wandb_group: ''
+wandb_name: Atlas
+wandb_entity: ''
+wandb_project: 'atlas'
+capture_video: True
+capture_video_freq: 2500
+capture_video_len: 100
+force_render: True
+
 physics_engine: 'physx'
 
 env:
@@ -35,7 +44,7 @@ env:
 
   randomCommandVelocityRanges:
     # train
-    linear_x: [-1., 1.] # min max [m/s]
+    linear_x: [-2., 2.] # min max [m/s]
     linear_y: [0., 0.]   # min max [m/s]
     yaw: [-1.57, 1.57]          # min max [rad/s]
 
@@ -44,7 +53,7 @@ env:
     stiffness: 85.0  # [N*m/rad]
     damping: 4.0     # [N*m*s/rad]
     # action scale: target angle = actionScale * action + defaultAngle
-    actionScale: 0.5
+    actionScale: 0.75
     # decimation: Number of control action updates @ sim DT per policy DT
     decimation: 4
 
@@ -76,15 +85,15 @@ env:
     allowKneeContacts: true
     # rewards
     terminalReward: 0.0
-    linearVelocityXYRewardScale: 1.0
+    linearVelocityXYRewardScale: 5.0
     linearVelocityZRewardScale: -4.0
     angularVelocityXYRewardScale: -0.05
     angularVelocityZRewardScale: 0.5
-    orientationRewardScale: -0. #-1.
-    torqueRewardScale: -0.00002 # -0.000025
+    orientationRewardScale: -0.5 #-1.
+    torqueRewardScale: -0.000005 # -0.000025
     jointAccRewardScale: -0.0005 # -0.0025
     baseHeightRewardScale: -0.0 #5
-    feetAirTimeRewardScale:  1.0
+    feetAirTimeRewardScale:  2.0
     kneeCollisionRewardScale: -0.25
     feetStumbleRewardScale: -0. #-2.0
     actionRateRewardScale: -0.01
@@ -127,8 +136,8 @@ env:
   enableCameraSensors: False
 
 sim:
-  dt: 0.02
-  substeps: 2
+  dt: 0.005
+  substeps: 1
   up_axis: "z"
   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
   gravity: [0.0, 0.0, -9.81]

diff --git a/isaacgymenvs/cfg/train/AtlasPPO.yaml b/isaacgymenvs/cfg/train/AtlasPPO.yaml
@@ -9,7 +9,7 @@ params:
 
   network:
     name: actor_critic
-    separate: True
+    separate: False
 
     space:
       continuous:
@@ -23,22 +23,14 @@ params:
         fixed_sigma: True
 
     mlp:
-      units: [512] #, 256, 128]
+      units: [256, 128, 64]
       activation: elu
       d2rl: False
 
       initializer:
         name: default
       regularizer:
         name: None
-    rnn:
-      name: lstm
-      units: 256 #128
-      layers: 1
-      before_mlp: False #True
-      concat_input: True
-      layer_norm: False
-
 
   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
   load_path: ${...checkpoint} # path to the checkpoint to load
@@ -75,7 +67,7 @@ params:
     bounds_loss_coef: 0.
 
     max_epochs: ${resolve_default:1000,${....max_iterations}}
-    save_best_after: 200
+    save_best_after: 0
     score_to_win: 20000
     save_frequency: 50
     print_stats: True
diff --git a/isaacgymenvs/cfg/train/AtlasPPO_LSTM.yaml b/isaacgymenvs/cfg/train/AtlasPPO_LSTM.yaml
@@ -0,0 +1,81 @@
+params:
+  seed: ${...seed}
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: True
+
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0. # std = 1.
+        fixed_sigma: True
+
+    mlp:
+      units: [512] #, 256, 128]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+    rnn:
+      name: lstm
+      units: 256 #128
+      layers: 1
+      before_mlp: False #True
+      concat_input: True
+      layer_norm: False
+
+
+  load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
+  load_path: ${...checkpoint} # path to the checkpoint to load
+
+  config:
+    name: ${resolve_default:Atlas,${....experiment}}
+    full_experiment_name: ${.name}
+    env_name: rlgpu
+    ppo: True
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    value_bootstrap: True
+    clip_actions: False
+    num_actors: ${....task.env.numEnvs}
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    e_clip: 0.2
+    entropy_coef: 0.001
+    learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
+    lr_schedule: adaptive
+    kl_threshold: 0.008 # target kl for adaptive lr
+    truncate_grads: True
+    grad_norm: 1.
+    horizon_length: 24
+    minibatch_size: 512
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True
+    seq_len: 4 # only for rnn
+    bounds_loss_coef: 0.
+
+    max_epochs: ${resolve_default:1000,${....max_iterations}}
+    save_best_after: 0
+    score_to_win: 20000
+    save_frequency: 50
+    print_stats: True