Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement CoMiC for reusable skill learning #9

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions isaacgymenvs/cfg/task/QuadrupedGetup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# used to create the object
name: QuadrupedGetup

physics_engine: ${..physics_engine}

env:
numEnvs: ${resolve_default:4096,${...num_envs}}
envSpacing: 4. # [m]
episodeLength_s: 8
cameraFollow: True # if the camera follows humanoid or not
enableDebugVis: False

pdControl: True
powerScale: 1.0
stateInit: "Random"
hybridInitProb: 0.5
numAMPObsSteps: 2

localRootObs: True
targetHeight: 0.40
targetHeightDiffEps: 1e-3
terminationHeight: 0.15
enableEarlyTermination: False

clipObservations: 5.0
clipActions: 1.0

plane:
staticFriction: 1.0 # [-]
dynamicFriction: 1.0 # [-]
restitution: 0. # [-]

baseInitState:
pos: [0.0, 0.0, 0.35] # x,y,z [m]
rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
vLinear: [0.0, 0.0, 0.0] # x,y,z [m/s]
vAngular: [0.0, 0.0, 0.0] # x,y,z [rad/s]

randomCommandVelocityRanges:
linear_x: [0., 2.] # min max [m/s]
linear_y: [0., 0.] # min max [m/s]
yaw: [-0., 0.] # min max [rad/s]

control:
# PD Drive parameters:
stiffness: 40.0 # [N*m/rad]
damping: 1.0 # [N*m*s/rad]
# Replaced manual actionScale with automatic action scale calculated from URDF
# actionScale: 2.0
controlFrequencyInv: 1 # 60 Hz

defaultJointAngles: # = target angles when action = 0.0
FL_hip_joint: 0.0 # [rad]
RL_hip_joint: 0.0 # [rad]
FR_hip_joint: 0.0 # [rad]
RR_hip_joint: 0.0 # [rad]

FL_thigh_joint: 0.8 # [rad]
RL_thigh_joint: 0.8 # [rad]
FR_thigh_joint: 0.8 # [rad]
RR_thigh_joint: 0.8 # [rad]

FL_calf_joint: -1.6 # [rad]
RL_calf_joint: -1.6 # [rad]
FR_calf_joint: -1.6 # [rad]
RR_calf_joint: -1.6 # [rad]

urdfAsset:
collapseFixedJoints: True
fixBaseLink: False
defaultDofDriveMode: 4 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)

learn:
# normalization
linearVelocityScale: 2.0
angularVelocityScale: 0.25
dofPositionScale: 1.0
dofVelocityScale: 0.05

# viewer cam:
viewer:
refEnv: 0
pos: [0, 0, 4] # [m]
lookat: [1., 1, 3.3] # [m]

# set to True if you use camera sensors in the environment
enableCameraSensors: False

sim:
dt: 0.02
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
physx:
num_threads: ${....num_threads}
solver_type: ${....solver_type}
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
num_position_iterations: 4
num_velocity_iterations: 1
contact_offset: 0.02
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 100.0
default_buffer_size_multiplier: 5.0
max_gpu_contact_pairs: 8388608 # 8*1024*1024
num_subscenes: ${....num_subscenes}
contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)

task:
randomize: False
randomization_params:
frequency: 600 # Define how many environment steps between generating new randomizations
observations:
range: [0, .002] # range for the white noise
operation: "additive"
distribution: "gaussian"
actions:
range: [0., .02]
operation: "additive"
distribution: "gaussian"
sim_params:
gravity:
range: [0, 0.4]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 3000
actor_params:
anymal:
color: True
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 3000
rigid_shape_properties:
friction:
num_buckets: 500
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
restitution:
range: [0., 0.7]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
dof_properties:
damping:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
stiffness:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
lower:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
upper:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
73 changes: 73 additions & 0 deletions isaacgymenvs/cfg/train/QuadrupedGetupPPO.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
params:
seed: ${...seed}

algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: True

space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: -2.9 # std = 1.
fixed_sigma: True
learn_sigma: False

mlp:
units: [256, 128, 64]
activation: elu
d2rl: False

initializer:
name: default
regularizer:
name: None

load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load

config:
name: ${resolve_default:QuadrupedGetup,${....experiment}}
full_experiment_name: ${.name}
env_name: rlgpu
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
lr_schedule: constant
kl_threshold: 0.008 # target kl for adaptive lr
score_to_win: 20000
max_epochs: ${resolve_default:1000,${....max_iterations}}
save_best_after: 200
save_frequency: 50
print_stats: True
grad_norm: 1.
entropy_coef: 0.0
truncate_grads: False
e_clip: 0.2
horizon_length: 24
minibatch_size: 32768
mini_epochs: 5
critic_coef: 2
clip_value: False
seq_len: 4 # only for rnn
bounds_loss_coef: 10.0

2 changes: 2 additions & 0 deletions isaacgymenvs/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from .trifinger import Trifinger
from .quadruped import Quadruped
from .quadruped_amp import QuadrupedAMP
from .quadruped_getup import QuadrupedGetup

# Mappings from strings to environments
isaacgym_task_map = {
Expand All @@ -70,6 +71,7 @@
"Quadcopter": Quadcopter,
"Quadruped": Quadruped,
"QuadrupedAMP": QuadrupedAMP,
"QuadrupedGetup": QuadrupedGetup,
"ShadowHand": ShadowHand,
"Trifinger": Trifinger,
}
2 changes: 1 addition & 1 deletion isaacgymenvs/tasks/quadruped_amp_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ def compute_reward(self):

def compute_reset(self):
self.reset_buf, self._terminate_buf = compute_humanoid_reset(
self.reset_buf,
self.progress_buf,
self._terminate_buf,
self.root_states,
self.contact_forces,
self.knee_indices,
Expand Down
Loading