dtch1997 · dtch1997 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
diff --git a/isaacgymenvs/cfg/task/QuadrupedGetup.yaml b/isaacgymenvs/cfg/task/QuadrupedGetup.yaml
@@ -0,0 +1,178 @@
+# used to create the object
+name: QuadrupedGetup
+
+physics_engine: ${..physics_engine}
+
+env:
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 4.  # [m]
+  episodeLength_s: 8
+  cameraFollow: True # if the camera follows humanoid or not
+  enableDebugVis: False
+
+  pdControl: True
+  powerScale: 1.0
+  stateInit: "Random"
+  hybridInitProb: 0.5
+  numAMPObsSteps: 2
+
+  localRootObs: True
+  targetHeight: 0.40
+  targetHeightDiffEps: 1e-3
+  terminationHeight: 0.15
+  enableEarlyTermination: False
+
+  clipObservations: 5.0
+  clipActions: 1.0
+
+  plane:
+    staticFriction: 1.0  # [-]
+    dynamicFriction: 1.0  # [-]
+    restitution: 0.        # [-]
+
+  baseInitState:
+    pos: [0.0, 0.0, 0.35] # x,y,z [m]
+    rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
+    vLinear: [0.0, 0.0, 0.0]  # x,y,z [m/s]
+    vAngular: [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+
+  randomCommandVelocityRanges:
+    linear_x: [0., 2.] # min max [m/s]
+    linear_y: [0., 0.]   # min max [m/s]
+    yaw: [-0., 0.]          # min max [rad/s]
+
+  control:
+    # PD Drive parameters:
+    stiffness: 40.0  # [N*m/rad]
+    damping: 1.0     # [N*m*s/rad]
+    # Replaced manual actionScale with automatic action scale calculated from URDF
+    # actionScale: 2.0
+    controlFrequencyInv: 1 # 60 Hz
+
+  defaultJointAngles:  # = target angles when action = 0.0
+    FL_hip_joint: 0.0    # [rad]
+    RL_hip_joint: 0.0    # [rad]
+    FR_hip_joint: 0.0   # [rad]
+    RR_hip_joint: 0.0   # [rad]
+
+    FL_thigh_joint: 0.8     # [rad]
+    RL_thigh_joint: 0.8    # [rad]
+    FR_thigh_joint: 0.8     # [rad]
+    RR_thigh_joint: 0.8    # [rad]
+
+    FL_calf_joint: -1.6    # [rad]
+    RL_calf_joint: -1.6    # [rad]
+    FR_calf_joint: -1.6    # [rad]
+    RR_calf_joint: -1.6     # [rad]
+
+  urdfAsset:
+    collapseFixedJoints: True
+    fixBaseLink: False
+    defaultDofDriveMode: 4 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
+
+  learn:
+    # normalization
+    linearVelocityScale: 2.0
+    angularVelocityScale: 0.25
+    dofPositionScale: 1.0
+    dofVelocityScale: 0.05
+
+  # viewer cam:
+  viewer:
+    refEnv: 0
+    pos: [0, 0, 4]  # [m]
+    lookat: [1., 1, 3.3]  # [m]
+
+  # set to True if you use camera sensors in the environment
+  enableCameraSensors: False
+
+sim:
+  dt: 0.02
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [0.0, 0.0, -9.81]
+  physx:
+    num_threads: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
+    num_position_iterations: 4
+    num_velocity_iterations: 1
+    contact_offset: 0.02
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 100.0
+    default_buffer_size_multiplier: 5.0
+    max_gpu_contact_pairs: 8388608 # 8*1024*1024
+    num_subscenes: ${....num_subscenes}
+    contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
+
+task:
+  randomize: False
+  randomization_params:
+    frequency: 600   # Define how many environment steps between generating new randomizations
+    observations:
+      range: [0, .002] # range for the white noise
+      operation: "additive"
+      distribution: "gaussian"
+    actions:
+      range: [0., .02]
+      operation: "additive"
+      distribution: "gaussian"
+    sim_params: 
+      gravity:
+        range: [0, 0.4]
+        operation: "additive"
+        distribution: "gaussian"
+        schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+        schedule_steps: 3000
+    actor_params:
+      anymal:
+        color: True
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
+            schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
+            schedule_steps: 3000
+        rigid_shape_properties:
+          friction:
+            num_buckets: 500
+            range: [0.7, 1.3]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 3000
+          restitution:
+            range: [0., 0.7]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 3000
+        dof_properties:
+          damping: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 3000
+          stiffness: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 3000
+          lower:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 3000
+          upper:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
+            schedule_steps: 3000
diff --git a/isaacgymenvs/cfg/train/QuadrupedGetupPPO.yaml b/isaacgymenvs/cfg/train/QuadrupedGetupPPO.yaml
@@ -0,0 +1,73 @@
+params:
+  seed: ${...seed}
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: True
+
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: -2.9 # std = 1.
+        fixed_sigma: True
+        learn_sigma: False
+
+    mlp:
+      units: [256, 128, 64]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+  load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
+  load_path: ${...checkpoint} # path to the checkpoint to load
+
+  config:
+    name: ${resolve_default:QuadrupedGetup,${....experiment}}
+    full_experiment_name: ${.name}
+    env_name: rlgpu
+    ppo: True
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    value_bootstrap: True
+    num_actors: ${....task.env.numEnvs}
+    reward_shaper:
+      scale_value: 1.0
+    normalize_advantage: True
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
+    lr_schedule: constant
+    kl_threshold: 0.008 # target kl for adaptive lr
+    score_to_win: 20000
+    max_epochs: ${resolve_default:1000,${....max_iterations}}
+    save_best_after: 200
+    save_frequency: 50
+    print_stats: True
+    grad_norm: 1.
+    entropy_coef: 0.0
+    truncate_grads: False
+    e_clip: 0.2
+    horizon_length: 24
+    minibatch_size: 32768
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: False
+    seq_len: 4 # only for rnn
+    bounds_loss_coef: 10.0
+
diff --git a/isaacgymenvs/tasks/__init__.py b/isaacgymenvs/tasks/__init__.py
@@ -48,6 +48,7 @@
 from .trifinger import Trifinger
 from .quadruped import Quadruped
 from .quadruped_amp import QuadrupedAMP
+from .quadruped_getup import QuadrupedGetup
 
 # Mappings from strings to environments
 isaacgym_task_map = {
@@ -70,6 +71,7 @@
     "Quadcopter": Quadcopter,
     "Quadruped": Quadruped, 
     "QuadrupedAMP": QuadrupedAMP,
+    "QuadrupedGetup": QuadrupedGetup,
     "ShadowHand": ShadowHand,
     "Trifinger": Trifinger,
 }
diff --git a/isaacgymenvs/tasks/quadruped_amp_base.py b/isaacgymenvs/tasks/quadruped_amp_base.py
@@ -257,8 +257,8 @@ def compute_reward(self):
 
     def compute_reset(self):
         self.reset_buf, self._terminate_buf = compute_humanoid_reset(
+            self.reset_buf,
             self.progress_buf,
-            self._terminate_buf,
             self.root_states, 
             self.contact_forces,
             self.knee_indices,