diff --git a/isaacgymenvs/cfg/config.yaml b/isaacgymenvs/cfg/config.yaml index 734bba595..f6a3ce79e 100644 --- a/isaacgymenvs/cfg/config.yaml +++ b/isaacgymenvs/cfg/config.yaml @@ -39,15 +39,17 @@ checkpoint: '' # set to True to use multi-gpu horovod training multi_gpu: False +experiment_dir: '' + wandb_activate: False wandb_group: '' wandb_name: ${train.params.config.name} wandb_entity: '' -wandb_project: 'isaacgymenvs' -capture_video: False -capture_video_freq: 7000 -capture_video_len: 100 -force_render: True +wandb_project: ${task.wandb_project} +capture_video: ${task.capture_video} +capture_video_freq: ${task.capture_video_freq} +capture_video_len: ${task.capture_video_len} +force_render: ${task.force_render} # disables rendering headless: False diff --git a/isaacgymenvs/cfg/task/Atlas.yaml b/isaacgymenvs/cfg/task/Atlas.yaml index ccd677214..66ede6c5a 100644 --- a/isaacgymenvs/cfg/task/Atlas.yaml +++ b/isaacgymenvs/cfg/task/Atlas.yaml @@ -1,6 +1,15 @@ # used to create the object name: Atlas +wandb_group: '' +wandb_name: Atlas +wandb_entity: '' +wandb_project: 'atlas' +capture_video: True +capture_video_freq: 2500 +capture_video_len: 100 +force_render: True + physics_engine: 'physx' env: @@ -35,7 +44,7 @@ env: randomCommandVelocityRanges: # train - linear_x: [-1., 1.] # min max [m/s] + linear_x: [-2., 2.] # min max [m/s] linear_y: [0., 0.] # min max [m/s] yaw: [-1.57, 1.57] # min max [rad/s] @@ -44,7 +53,7 @@ env: stiffness: 85.0 # [N*m/rad] damping: 4.0 # [N*m*s/rad] # action scale: target angle = actionScale * action + defaultAngle - actionScale: 0.5 + actionScale: 0.75 # decimation: Number of control action updates @ sim DT per policy DT decimation: 4 @@ -76,15 +85,15 @@ env: allowKneeContacts: true # rewards terminalReward: 0.0 - linearVelocityXYRewardScale: 1.0 + linearVelocityXYRewardScale: 5.0 linearVelocityZRewardScale: -4.0 angularVelocityXYRewardScale: -0.05 angularVelocityZRewardScale: 0.5 - orientationRewardScale: -0. #-1. - torqueRewardScale: -0.00002 # -0.000025 + orientationRewardScale: -0.5 #-1. + torqueRewardScale: -0.000005 # -0.000025 jointAccRewardScale: -0.0005 # -0.0025 baseHeightRewardScale: -0.0 #5 - feetAirTimeRewardScale: 1.0 + feetAirTimeRewardScale: 2.0 kneeCollisionRewardScale: -0.25 feetStumbleRewardScale: -0. #-2.0 actionRateRewardScale: -0.01 @@ -127,8 +136,8 @@ env: enableCameraSensors: False sim: - dt: 0.02 - substeps: 2 + dt: 0.005 + substeps: 1 up_axis: "z" use_gpu_pipeline: ${eq:${...pipeline},"gpu"} gravity: [0.0, 0.0, -9.81] diff --git a/isaacgymenvs/cfg/train/AtlasPPO.yaml b/isaacgymenvs/cfg/train/AtlasPPO.yaml index 9d9576dde..86e8cc475 100644 --- a/isaacgymenvs/cfg/train/AtlasPPO.yaml +++ b/isaacgymenvs/cfg/train/AtlasPPO.yaml @@ -9,7 +9,7 @@ params: network: name: actor_critic - separate: True + separate: False space: continuous: @@ -23,7 +23,7 @@ params: fixed_sigma: True mlp: - units: [512] #, 256, 128] + units: [256, 128, 64] activation: elu d2rl: False @@ -31,14 +31,6 @@ params: name: default regularizer: name: None - rnn: - name: lstm - units: 256 #128 - layers: 1 - before_mlp: False #True - concat_input: True - layer_norm: False - load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint load_path: ${...checkpoint} # path to the checkpoint to load @@ -75,7 +67,7 @@ params: bounds_loss_coef: 0. max_epochs: ${resolve_default:1000,${....max_iterations}} - save_best_after: 200 + save_best_after: 0 score_to_win: 20000 save_frequency: 50 print_stats: True \ No newline at end of file diff --git a/isaacgymenvs/cfg/train/AtlasPPO_LSTM.yaml b/isaacgymenvs/cfg/train/AtlasPPO_LSTM.yaml new file mode 100644 index 000000000..1f5521f20 --- /dev/null +++ b/isaacgymenvs/cfg/train/AtlasPPO_LSTM.yaml @@ -0,0 +1,81 @@ +params: + seed: ${...seed} + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: True + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0. # std = 1. + fixed_sigma: True + + mlp: + units: [512] #, 256, 128] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + rnn: + name: lstm + units: 256 #128 + layers: 1 + before_mlp: False #True + concat_input: True + layer_norm: False + + + load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint + load_path: ${...checkpoint} # path to the checkpoint to load + + config: + name: ${resolve_default:Atlas,${....experiment}} + full_experiment_name: ${.name} + env_name: rlgpu + ppo: True + mixed_precision: True + normalize_input: True + normalize_value: True + normalize_advantage: True + value_bootstrap: True + clip_actions: False + num_actors: ${....task.env.numEnvs} + reward_shaper: + scale_value: 1.0 + gamma: 0.99 + tau: 0.95 + e_clip: 0.2 + entropy_coef: 0.001 + learning_rate: 3.e-4 # overwritten by adaptive lr_schedule + lr_schedule: adaptive + kl_threshold: 0.008 # target kl for adaptive lr + truncate_grads: True + grad_norm: 1. + horizon_length: 24 + minibatch_size: 512 + mini_epochs: 5 + critic_coef: 2 + clip_value: True + seq_len: 4 # only for rnn + bounds_loss_coef: 0. + + max_epochs: ${resolve_default:1000,${....max_iterations}} + save_best_after: 0 + score_to_win: 20000 + save_frequency: 50 + print_stats: True \ No newline at end of file diff --git a/isaacgymenvs/tasks/atlas.py b/isaacgymenvs/tasks/atlas.py index 8a368d426..78681b64e 100644 --- a/isaacgymenvs/tasks/atlas.py +++ b/isaacgymenvs/tasks/atlas.py @@ -161,14 +161,14 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.feet_air_time = torch.zeros(self.num_envs, 2, dtype=torch.float, device=self.device, requires_grad=False) self.last_dof_vel = torch.zeros_like(self.dof_vel) - self.reset_idx(torch.arange(self.num_envs, device=self.device)) - # reward episode sums torch_zeros = lambda : torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False) self.episode_sums = {"lin_vel_xy": torch_zeros(), "lin_vel_z": torch_zeros(), "ang_vel_z": torch_zeros(), "ang_vel_xy": torch_zeros(), "orient": torch_zeros(), "torques": torch_zeros(), "joint_acc": torch_zeros(), "base_height": torch_zeros(), "air_time": torch_zeros(), "collision": torch_zeros(), "stumble": torch_zeros(), "action_rate": torch_zeros(), "hip": torch_zeros()} + self.reset_idx(torch.arange(self.num_envs, device=self.device)) + self.up_vec = to_torch(get_axis_params(1., self.up_axis_idx), device=self.device).repeat((self.num_envs, 1)) self.heading_vec = to_torch([1, 0, 0], device=self.device).repeat((self.num_envs, 1)) self.basis_vec0 = self.heading_vec.clone() @@ -219,6 +219,12 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.num_dof = self.gym.get_asset_dof_count(anymal_asset) self.num_bodies = self.gym.get_asset_rigid_body_count(anymal_asset) + # prepare friction randomization + rigid_shape_prop = self.gym.get_asset_rigid_shape_properties(anymal_asset) + friction_range = self.cfg["env"]["learn"]["frictionRange"] + num_buckets = 100 + friction_buckets = torch_rand_float(friction_range[0], friction_range[1], (num_buckets,1), device=self.device) + start_pose = gymapi.Transform() start_pose.r = gymapi.Quat.from_euler_zyx(*self.base_init_state[3:6]) start_pose.p = gymapi.Vec3(*self.base_init_state[:3]) @@ -249,6 +255,10 @@ def _create_envs(self, num_envs, spacing, num_per_row): # create env instance env_ptr = self.gym.create_env(self.sim, env_lower, env_upper, num_per_row) anymal_handle = self.gym.create_actor(env_ptr, anymal_asset, start_pose, "anymal", i, 1, 0) + if self.cfg["env"]["learn"]["randomizeFriction"]: + print("Friction randomization") + for s in range(len(rigid_shape_prop)): + rigid_shape_prop[s].friction = friction_buckets[i % num_buckets] self.gym.set_actor_dof_properties(env_ptr, anymal_handle, dof_props) self.gym.enable_actor_dof_force_sensors(env_ptr, anymal_handle) self.envs.append(env_ptr) @@ -259,7 +269,7 @@ def _create_envs(self, num_envs, spacing, num_per_row): for i in range(len(knee_names)): self.knee_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.anymal_handles[0], knee_names[i]) - self.base_index = self.gym.find_actor_rigid_body_handle(self.envs[0], self.anymal_handles[0], "mtorso") + self.base_index = self.gym.find_actor_rigid_body_handle(self.envs[0], self.anymal_handles[0], "utorso") def pre_physics_step(self, actions): self.actions = actions.clone().to(self.device) @@ -283,22 +293,31 @@ def post_physics_step(self): self.reset_idx(env_ids) self.prev_torques = self.torques.clone() - #self.compute_reward() - self.compute_observations() - self.compute_reward(self.actions) + self.compute_observations() + self.compute_reward_old(self.actions) + #self.compute_reward_new() self.last_actions[:] = self.actions[:] self.last_dof_vel[:] = self.dof_vel[:] - def compute_reward(self, actions): - self.rew_buf[:], self.reset_buf[:] = compute_anymal_reward( + def compute_reward_old(self, actions): + + self.rew_buf[:], self.reset_buf[:], self.episode_sums = compute_anymal_reward( # tensors self.root_states, self.commands, + self.actions, + self.last_actions, + self.dof_vel, + self.last_dof_vel, + self.dof_pos, + self.default_dof_pos, self.prev_torques, + self.projected_gravity, self.torques, self.contact_forces, self.knee_indices, + self.feet_indices, self.progress_buf, # Dict self.rew_scales, @@ -308,7 +327,10 @@ def compute_reward(self, actions): self.targets, self.heading_vec, self.up_vec, - self.inv_start_rot + self.inv_start_rot, + self.dt, + self.feet_air_time, + self.episode_sums ) def compute_observations(self): @@ -363,76 +385,13 @@ def reset_idx(self, env_ids): self.feet_air_time[env_ids] = 0. self.progress_buf[env_ids] = 0 self.reset_buf[env_ids] = 1 -""" - def compute_reward(self): - # velocity tracking reward - lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) - ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) - rew_lin_vel_xy = torch.exp(-lin_vel_error/0.25) * self.rew_scales["lin_vel_xy"] - rew_ang_vel_z = torch.exp(-ang_vel_error/0.25) * self.rew_scales["ang_vel_z"] - - # other base velocity penalties - rew_lin_vel_z = torch.square(self.base_lin_vel[:, 2]) * self.rew_scales["lin_vel_z"] - rew_ang_vel_xy = torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1) * self.rew_scales["ang_vel_xy"] - - # orientation penalty - rew_orient = torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1) * self.rew_scales["orient"] - - # base height penalty - rew_base_height = torch.square(self.root_states[:, 2] - 0.52) * self.rew_scales["base_height"] # TODO add target base height to cfg - - # torque penalty - rew_torque = torch.sum(torch.square(self.torques), dim=1) * self.rew_scales["torque"] - - # joint acc penalty - rew_joint_acc = torch.sum(torch.square(self.last_dof_vel - self.dof_vel), dim=1) * self.rew_scales["joint_acc"] - - # collision penalty - knee_contact = torch.norm(self.contact_forces[:, self.knee_indices, :], dim=2) > 1. - rew_collision = torch.sum(knee_contact, dim=1) * self.rew_scales["collision"] # sum vs any ? - - # stumbling penalty - stumble = (torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) > 5.) * (torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.) - rew_stumble = torch.sum(stumble, dim=1) * self.rew_scales["stumble"] - - # action rate penalty - rew_action_rate = torch.sum(torch.square(self.last_actions - self.actions), dim=1) * self.rew_scales["action_rate"] - - # air time reward - # contact = torch.norm(contact_forces[:, feet_indices, :], dim=2) > 1. - contact = self.contact_forces[:, self.feet_indices, 2] > 1. - first_contact = (self.feet_air_time > 0.) * contact - self.feet_air_time += self.dt - rew_airTime = torch.sum((self.feet_air_time - 0.5) * first_contact, dim=1) * self.rew_scales["air_time"] # reward only on first contact with the ground - rew_airTime *= torch.norm(self.commands[:, :2], dim=1) > 0.1 #no reward for zero command - self.feet_air_time *= ~contact - - # cosmetic penalty for hip motion - rew_hip = torch.sum(torch.abs(self.dof_pos[:, [0, 3, 6, 9]] - self.default_dof_pos[:, [0, 3, 6, 9]]), dim=1)* self.rew_scales["hip"] - - # total reward - self.rew_buf = rew_lin_vel_xy + rew_ang_vel_z + rew_lin_vel_z + rew_ang_vel_xy + rew_orient + rew_base_height +\ - rew_torque + rew_joint_acc + rew_collision + rew_action_rate + rew_airTime + rew_hip + rew_stumble - self.rew_buf = torch.clip(self.rew_buf, min=0., max=None) - - # add termination reward - self.rew_buf += self.rew_scales["termination"] * self.reset_buf * ~self.timeout_buf - - # log episode reward sums - self.episode_sums["lin_vel_xy"] += rew_lin_vel_xy - self.episode_sums["ang_vel_z"] += rew_ang_vel_z - self.episode_sums["lin_vel_z"] += rew_lin_vel_z - self.episode_sums["ang_vel_xy"] += rew_ang_vel_xy - self.episode_sums["orient"] += rew_orient - self.episode_sums["torques"] += rew_torque - self.episode_sums["joint_acc"] += rew_joint_acc - self.episode_sums["collision"] += rew_collision - self.episode_sums["stumble"] += rew_stumble - self.episode_sums["action_rate"] += rew_action_rate - self.episode_sums["air_time"] += rew_airTime - self.episode_sums["base_height"] += rew_base_height - self.episode_sums["hip"] += rew_hip -""" + + # fill extras + self.extras["episode"] = {} + for key in self.episode_sums.keys(): + self.extras["episode"]['rew_' + key] = torch.mean(self.episode_sums[key][env_ids]) / self.max_episode_length_s + self.episode_sums[key][env_ids] = 0. + ##################################################################### ###=========================jit functions=========================### @@ -444,10 +403,18 @@ def compute_anymal_reward( # tensors root_states: Tensor, commands: Tensor, + actions: Tensor, + last_actions: Tensor, + dof_vel: Tensor, + last_dof_vel: Tensor, + dof_pos: Tensor, + default_dof_pos: Tensor, prev_torques: Tensor, + projected_gravity: Tensor, torques: Tensor, contact_forces: Tensor, knee_indices: Tensor, + feet_indices: Tensor, episode_lengths: Tensor, # Dict rew_scales: Dict[str, float], @@ -457,7 +424,10 @@ def compute_anymal_reward( targets, vec0, vec1, - inv_start_rot) -> Tuple[Tensor, Tensor]: # (reward, reset, feet_in air, feet_air_time, episode sums) + inv_start_rot, + dt: float, + feet_air_time, + episode_sums: Dict[str, Tensor]) -> Tuple[Tensor, Tensor, Dict[str, Tensor]]: # (reward, reset, feet_in air, feet_air_time, episode sums) # prepare quantities (TODO: return from obs ?) base_quat = root_states[:, 3:7] @@ -477,9 +447,59 @@ def compute_anymal_reward( heading_vec = get_basis_vector(torso_quat, vec0).view(num_envs, 3) heading_proj = torch.bmm(heading_vec.view(num_envs, 1, 3), target_dirs.view(num_envs, 3, 1)).view(num_envs) - # reward from direction headed - #heading_weight_tensor = torch.ones_like(heading_proj) * rew_scales["heading_scale"] - #heading_reward = torch.where(heading_proj > 0.8, heading_weight_tensor, rew_scales["heading_scale"] * heading_proj / 0.8) + # velocity tracking reward + lin_vel_error = torch.sum(torch.square(commands[:, :2] - base_lin_vel[:, :2]), dim=1) + ang_vel_error = torch.square(commands[:, 2] - base_ang_vel[:, 2]) + rew_lin_vel_xy = torch.exp(-lin_vel_error/0.25) * rew_scales["lin_vel_xy"] + rew_ang_vel_z = torch.exp(-ang_vel_error/0.25) * rew_scales["ang_vel_z"] + + # other base velocity penalties + rew_lin_vel_z = torch.square(base_lin_vel[:, 2]) * rew_scales["lin_vel_z"] + rew_ang_vel_xy = torch.sum(torch.square(base_ang_vel[:, :2]), dim=1) * rew_scales["ang_vel_xy"] + + # orientation penalty + rew_orient = torch.sum(torch.square(projected_gravity[:, :2]), dim=1) * rew_scales["orient"] + + # base height penalty + rew_base_height = torch.square(root_states[:, 2] - 0.52) * rew_scales["base_height"] # TODO add target base height to cfg + + # torque penalty + rew_torque = torch.sum(torch.square(torques), dim=1) * rew_scales["torque"] + + # joint acc penalty + rew_joint_acc = torch.sum(torch.square(last_dof_vel - dof_vel), dim=1) * rew_scales["joint_acc"] + + # collision penalty + knee_contact = torch.norm(contact_forces[:, knee_indices, :], dim=2) > 1. + rew_collision = torch.sum(knee_contact, dim=1) * rew_scales["collision"] # sum vs any ? + + # stumbling penalty + stumble = (torch.norm(contact_forces[:, feet_indices, :2], dim=2) > 5.) * (torch.abs(contact_forces[:, feet_indices, 2]) < 1.) + rew_stumble = torch.sum(stumble, dim=1) * rew_scales["stumble"] + + # action rate penalty + rew_action_rate = torch.sum(torch.square(last_actions - actions), dim=1) * rew_scales["action_rate"] + + # air time reward + # contact = torch.norm(contact_forces[:, feet_indices, :], dim=2) > 1. + contact = contact_forces[:, feet_indices, 2] > 1. + first_contact = (feet_air_time > 0.) * contact + feet_air_time += dt + rew_airTime = torch.sum((feet_air_time - 0.5) * first_contact, dim=1) * rew_scales["air_time"] # reward only on first contact with the ground + rew_airTime *= torch.norm(commands[:, :2], dim=1) > 0.1 #no reward for zero command + feet_air_time *= ~contact + + # cosmetic penalty for hip motion + rew_hip = torch.sum(torch.abs(dof_pos[:, [0, 3, 6, 9]] - default_dof_pos[:, [0, 3, 6, 9]]), dim=1)* rew_scales["hip"] + + # total reward + rew_buf = rew_lin_vel_xy + rew_ang_vel_z + rew_lin_vel_z + rew_ang_vel_xy + rew_orient + rew_base_height +\ + rew_torque + rew_joint_acc + rew_collision + rew_action_rate + rew_airTime + rew_hip + rew_stumble + #print(rew_lin_vel_xy[0], rew_ang_vel_z[0], rew_lin_vel_z[0]) + #rew_buf = torch.clip(rew_buf, min=0., max=None) + + # add termination reward + #rew_buf += rew_scales["termination"] * reset_buf * ~timeout_buf up_vec = get_basis_vector(torso_quat, vec1).view(num_envs, 3) up_proj = up_vec[:, 2] @@ -489,16 +509,31 @@ def compute_anymal_reward( #up_reward = torch.where(up_proj > 0.9, up_reward + rew_scales['up_scale'], up_reward) # velocity tracking reward - lin_vel_error = torch.sum(torch.square(commands[:, :2] - base_lin_vel[:, :2]), dim=1) - ang_vel_error = torch.square(commands[:, 2] - base_ang_vel[:, 2]) - rew_lin_vel_xy = torch.exp(-lin_vel_error/0.25) * rew_scales["lin_vel_xy"] - rew_ang_vel_z = torch.exp(-ang_vel_error/0.25) * rew_scales["ang_vel_z"] + #lin_vel_error = torch.sum(torch.square(commands[:, :2] - base_lin_vel[:, :2]), dim=1) + #ang_vel_error = torch.square(commands[:, 2] - base_ang_vel[:, 2]) + #rew_lin_vel_xy = torch.exp(-lin_vel_error/0.25) * rew_scales["lin_vel_xy"] + #rew_ang_vel_z = torch.exp(-ang_vel_error/0.25) * rew_scales["ang_vel_z"] # torque penalty(reward) - rew_torque = torch.sum(torch.abs(prev_torques - torques), dim=1) * rew_scales["torque"] - - total_reward = rew_lin_vel_xy + rew_ang_vel_z + rew_torque #+ up_reward # + heading_reward - total_reward = torch.clip(total_reward, 0., None) + # rew_torque = torch.sum(torch.abs(prev_torques - torques), dim=1) * rew_scales["torque"] + + #total_reward = rew_lin_vel_xy + rew_ang_vel_z + rew_torque #+ up_reward # + heading_reward + #total_reward = torch.clip(total_reward, 0., None) + + # log episode reward sums + episode_sums["lin_vel_xy"] += rew_lin_vel_xy + episode_sums["ang_vel_z"] += rew_ang_vel_z + episode_sums["lin_vel_z"] += rew_lin_vel_z + episode_sums["ang_vel_xy"] += rew_ang_vel_xy + episode_sums["orient"] += rew_orient + episode_sums["torques"] += rew_torque + episode_sums["joint_acc"] += rew_joint_acc + episode_sums["collision"] += rew_collision + episode_sums["stumble"] += rew_stumble + episode_sums["action_rate"] += rew_action_rate + episode_sums["air_time"] += rew_airTime + episode_sums["base_height"] += rew_base_height + episode_sums["hip"] += rew_hip # reset agents reset = torch.norm(contact_forces[:, base_index, :], dim=1) > 1.5 @@ -508,12 +543,12 @@ def compute_anymal_reward( # stay upright reset = torch.where(up_proj < 0.65, torch.ones_like(reset), reset) # no flying - reset = torch.where(torso_position[:, 2] > 1.45, torch.ones_like(reset), reset) + #reset = torch.where(torso_position[:, 2] > 1.45, torch.ones_like(reset), reset) time_out = episode_lengths >= max_episode_length - 1 # no terminal reward for time-outs reset = reset | time_out - return total_reward.detach(), reset + return rew_buf.detach(), reset, episode_sums @torch.jit.script diff --git a/isaacgymenvs/train.py b/isaacgymenvs/train.py index 783a92d71..07d46a706 100644 --- a/isaacgymenvs/train.py +++ b/isaacgymenvs/train.py @@ -66,7 +66,6 @@ def launch_rlg_hydra(cfg: DictConfig): cfg.checkpoint = to_absolute_path(cfg.checkpoint) cfg_dict = omegaconf_to_dict(cfg) - print_dict(cfg_dict) # set numpy formatting for printing only set_np_formatting() @@ -98,9 +97,9 @@ def launch_rlg_hydra(cfg: DictConfig): def create_env_thunk(**kwargs): envs = isaacgymenvs.make( - cfg.seed, - cfg.task_name, - cfg.task.env.numEnvs, + cfg.seed, + cfg.task_name, + cfg.task.env.numEnvs, cfg.sim_device, cfg.rl_device, cfg.graphics_device_id, @@ -148,16 +147,19 @@ def build_runner(algo_observer): runner.reset() # dump config dict - experiment_dir = os.path.join('runs', cfg.train.params.config.name) - os.makedirs(experiment_dir, exist_ok=True) - with open(os.path.join(experiment_dir, 'config.yaml'), 'w') as f: + cfg.experiment_dir = os.path.join('runs', cfg.train.params.config.name) + os.makedirs(cfg.experiment_dir, exist_ok=True) + + with open(os.path.join(cfg.experiment_dir, 'config.yaml'), 'w') as f: f.write(OmegaConf.to_yaml(cfg)) runner.run({ 'train': not cfg.test, 'play': cfg.test, 'checkpoint' : cfg.checkpoint, - 'sigma' : None + 'sigma' : None, + 'experiment_dir' : cfg.experiment_dir, + 'experiment' : cfg.experiment if cfg.experiment else cfg.train.params.config.name }) if cfg.wandb_activate and rank == 0: diff --git a/isaacgymenvs/utils/rlgames_utils.py b/isaacgymenvs/utils/rlgames_utils.py index b5dbe219a..92b8b8142 100644 --- a/isaacgymenvs/utils/rlgames_utils.py +++ b/isaacgymenvs/utils/rlgames_utils.py @@ -64,7 +64,7 @@ def get_rlgames_env_creator( multi_gpu: Whether to use multi gpu post_create_hook: Hooks to be called after environment creation. [Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs] - virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. + virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True) Returns: A VecTaskPython object. @@ -134,8 +134,10 @@ def after_print_stats(self, frame, epoch_num, total_time): infotensor = torch.cat((infotensor, ep_info[key].to(self.algo.device))) value = torch.mean(infotensor) self.writer.add_scalar('Episode/' + key, value, epoch_num) + print('Episode/' + key, value, epoch_num) self.ep_infos.clear() - + print() + for k, v in self.direct_info.items(): self.writer.add_scalar(f'{k}/frame', v, frame) self.writer.add_scalar(f'{k}/iter', v, epoch_num) @@ -157,7 +159,7 @@ def step(self, actions): def reset(self): return self.env.reset() - + def reset_done(self): return self.env.reset_done()