Dear team, I modified the Anymal task under IsaacGymEnvs/isaacgymenvs to get the snake robot task. I replaced the urdf file, the task configuration file, the training configuration file, and the task file. I have two questions. (1)The robot can move normally during training, but when I test the trained policy after training, then robot moves for a while and then stop moving. (2)Although the urdf file is verified by the dof_controls.py file, the robot joint constraints are broken when testing the trained policy. What is the reason? The picture and videos are attached.
constraint violation pic:
train video:
play video:
The train instructions are as follows
(rlgpu) zhou@zhou-G3-3500:~/IsaacGymEnvs/isaacgymenvs$ python train.py task=SnakeWorm
The test instructions are as follows
(rlgpu) zhou@zhou-G3-3500:~/IsaacGymEnvs/isaacgymenvs$ python train.py task=SnakeWorm test=True checkpoint=runs/SnakeWorm_26-00-40-01/nn/last_SnakeWorm_ep_200_rew__8.69_.pth
robot task file
import numpy as np
import os
import torch
from isaacgym import gymtorch
from isaacgym import gymapi
from isaacgymenvs.utils.torch_jit_utils import to_torch, get_axis_params, torch_rand_float, quat_rotate, quat_rotate_inverse
from isaacgymenvs.tasks.base.vec_task import VecTask
from typing import Tuple, Dict
class SnakeWorm(VecTask):
def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, force_render):
self.cfg = cfg
# normalization
self.lin_vel_scale = self.cfg["env"]["learn"]["linearVelocityScale"]
self.ang_vel_scale = self.cfg["env"]["learn"]["angularVelocityScale"]
self.dof_pos_scale = self.cfg["env"]["learn"]["dofPositionScale"]
self.dof_vel_scale = self.cfg["env"]["learn"]["dofVelocityScale"]
self.action_scale = self.cfg["env"]["control"]["actionScale"]
# reward scales
self.rew_scales = {}
self.rew_scales["lin_vel_xy"] = self.cfg["env"]["learn"]["linearVelocityXYRewardScale"]
self.rew_scales["ang_vel_z"] = self.cfg["env"]["learn"]["angularVelocityZRewardScale"]
self.rew_scales["torque"] = self.cfg["env"]["learn"]["torqueRewardScale"]
# randomization
self.randomization_params = self.cfg["task"]["randomization_params"]
self.randomize = self.cfg["task"]["randomize"]
# command ranges
self.command_x_range = self.cfg["env"]["randomCommandVelocityRanges"]["linear_x"]
self.command_y_range = self.cfg["env"]["randomCommandVelocityRanges"]["linear_y"]
self.command_yaw_range = self.cfg["env"]["randomCommandVelocityRanges"]["yaw"]
# plane params
self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
# base init state
pos = self.cfg["env"]["baseInitState"]["pos"]
rot = self.cfg["env"]["baseInitState"]["rot"]
v_lin = self.cfg["env"]["baseInitState"]["vLinear"]
v_ang = self.cfg["env"]["baseInitState"]["vAngular"]
state = pos + rot + v_lin + v_ang
self.base_init_state = state
# default joint positions
#self.named_default_joint_angles = self.cfg["env"]["defaultJointAngles"]
self.cfg["env"]["numObservations"] = 45
self.cfg["env"]["numActions"] = 12
super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
# other
self.dt = self.sim_params.dt
self.max_episode_length_s = self.cfg["env"]["learn"]["episodeLength_s"]
self.max_episode_length = int(self.max_episode_length_s / self.dt + 0.5)
self.Kp = self.cfg["env"]["control"]["stiffness"]
self.Kd = self.cfg["env"]["control"]["damping"]
for key in self.rew_scales.keys():
self.rew_scales[key] *= self.dt
if self.viewer != None:
p = self.cfg["env"]["viewer"]["pos"]
lookat = self.cfg["env"]["viewer"]["lookat"]
cam_pos = gymapi.Vec3(p[0], p[1], p[2])
cam_target = gymapi.Vec3(lookat[0], lookat[1], lookat[2])
self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
# get gym state tensors
self.gym.refresh_actor_root_state_tensor(self.sim)
actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
net_contact_forces = self.gym.acquire_net_contact_force_tensor(self.sim)
torques = self.gym.acquire_dof_force_tensor(self.sim)
#print("torques: ", torques)
self.gym.refresh_dof_state_tensor(self.sim)
#self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
self.gym.refresh_dof_force_tensor(self.sim)
# create some wrapper tensors for different slices
self.root_states = gymtorch.wrap_tensor(actor_root_state)
self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0]
self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1]
self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view(self.num_envs, -1, 3) # shape: num_envs, num_bodies, xyz axis
self.torques = gymtorch.wrap_tensor(torques).view(self.num_envs, self.num_dof)
self.commands = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device, requires_grad=False)
self.commands_y = self.commands.view(self.num_envs, 3)[..., 1]
self.commands_x = self.commands.view(self.num_envs, 3)[..., 0]
self.commands_yaw = self.commands.view(self.num_envs, 3)[..., 2]
self.default_dof_pos = torch.zeros_like(self.dof_pos, dtype=torch.float, device=self.device, requires_grad=False)
for i in range(self.cfg["env"]["numActions"]):
name = self.dof_names[i]
angle = 0#self.named_default_joint_angles[name]
self.default_dof_pos[:, i] = angle
# initialize some data used later on
self.extras = {}
self.initial_root_states = self.root_states.clone()
self.initial_root_states[:] = to_torch(self.base_init_state, device=self.device, requires_grad=False)
self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat((self.num_envs, 1))
self.actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
self.reset_idx(torch.arange(self.num_envs, device=self.device))
def create_sim(self):
self.up_axis_idx = 2 # index of up axis: Y=1, Z=2
self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
self._create_ground_plane()
self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
# If randomizing, apply once immediately on startup before the fist sim step
if self.randomize:
self.apply_randomizations(self.randomization_params)
#print("create_sim")
def _create_ground_plane(self):
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.plane_static_friction
plane_params.dynamic_friction = self.plane_dynamic_friction
self.gym.add_ground(self.sim, plane_params)
def _create_envs(self, num_envs, spacing, num_per_row):
asset_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../assets')
asset_file = "urdf/RLsnake.urdf"
asset_options = gymapi.AssetOptions()
asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
asset_options.collapse_fixed_joints = True
asset_options.replace_cylinder_with_capsule = True
asset_options.flip_visual_attachments = True
asset_options.fix_base_link = self.cfg["env"]["urdfAsset"]["fixBaseLink"]
asset_options.density = 0.001
asset_options.angular_damping = 0.0
asset_options.linear_damping = 0.0
asset_options.armature = 0.0
asset_options.thickness = 0.01
asset_options.disable_gravity = False
snake_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
self.num_dof = self.gym.get_asset_dof_count(snake_asset)
self.num_bodies = self.gym.get_asset_rigid_body_count(snake_asset)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
body_names = self.gym.get_asset_rigid_body_names(snake_asset)
self.dof_names = self.gym.get_asset_dof_names(snake_asset)
self.base_index = 0
dof_props = self.gym.get_asset_dof_properties(snake_asset)
for i in range(self.num_dof):
dof_props['driveMode'][i] = gymapi.DOF_MODE_POS
dof_props['stiffness'][i] = self.cfg["env"]["control"]["stiffness"] #self.Kp
dof_props['damping'][i] = self.cfg["env"]["control"]["damping"] #self.Kd
env_lower = gymapi.Vec3(-spacing, -spacing, 0.0)
env_upper = gymapi.Vec3(spacing, spacing, spacing)
self.snake_handles = []
self.envs = []
for i in range(self.num_envs):
# create env instance
env_ptr = self.gym.create_env(self.sim, env_lower, env_upper, num_per_row)
snake_handle = self.gym.create_actor(env_ptr, snake_asset, start_pose, "snake", i, 1, 0)
self.gym.set_actor_dof_properties(env_ptr, snake_handle, dof_props)
self.gym.enable_actor_dof_force_sensors(env_ptr, snake_handle)
self.envs.append(env_ptr)
self.snake_handles.append(snake_handle)
self.base_index = self.gym.find_actor_rigid_body_handle(self.envs[0], self.snake_handles[0], "head_link")
def pre_physics_step(self, actions):
self.actions = actions.clone().to(self.device)
targets = self.action_scale * self.actions + self.default_dof_pos
self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(targets))
def post_physics_step(self):
self.progress_buf += 1
env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
if len(env_ids) > 0:
self.reset_idx(env_ids)
self.compute_observations()
self.compute_reward(self.actions)
def compute_reward(self, actions):
#print("self.torques: ", self.torques)
self.rew_buf[:], self.reset_buf[:] = compute_snake_reward(
# tensors
self.root_states,
self.commands,
self.torques,
self.contact_forces,
self.progress_buf,
# Dict
self.rew_scales,
# other
self.base_index,
self.max_episode_length,
)
def compute_observations(self):
self.gym.refresh_dof_state_tensor(self.sim) # done in step
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
self.gym.refresh_dof_force_tensor(self.sim)
self.obs_buf[:] = compute_snake_observations( # tensors
self.root_states,
self.commands,
self.dof_pos,
self.default_dof_pos,
self.dof_vel,
#self.gravity_vec,
self.actions,
# scales
self.lin_vel_scale,
self.ang_vel_scale,
self.dof_pos_scale,
self.dof_vel_scale
)
#print("obs_buf: ", self.obs_buf[:])
def reset_idx(self, env_ids):
# Randomization can happen only at reset time, since it can reset actor positions on GPU
if self.randomize:
self.apply_randomizations(self.randomization_params)
positions_offset = torch_rand_float(0.5, 1.5, (len(env_ids), self.num_dof), device=self.device)
velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=self.device)
self.dof_pos[env_ids] = self.default_dof_pos[env_ids] * positions_offset
self.dof_vel[env_ids] = velocities
env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.initial_root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
self.commands_x[env_ids] = torch_rand_float(self.command_x_range[0], self.command_x_range[1], (len(env_ids), 1), device=self.device).squeeze()
self.commands_y[env_ids] = torch_rand_float(self.command_y_range[0], self.command_y_range[1], (len(env_ids), 1), device=self.device).squeeze()
self.commands_yaw[env_ids] = torch_rand_float(self.command_yaw_range[0], self.command_yaw_range[1], (len(env_ids), 1), device=self.device).squeeze()
self.progress_buf[env_ids] = 0
self.reset_buf[env_ids] = 1
#print("reset!!!!!!!!!!!!!!!!!!!!")
#####################################################################
###=========================jit functions=========================###
#####################################################################
@torch.jit.script
def compute_snake_reward(
# tensors
root_states,
commands,
torques,
contact_forces,
episode_lengths,
# Dict
rew_scales,
# other
base_index,
max_episode_length
):
# (reward, reset, feet_in air, feet_air_time, episode sums)
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, Dict[str, float], int, int) -> Tuple[Tensor, Tensor]
# prepare quantities (TODO: return from obs ?)
base_quat = root_states[:, 3:7]
base_lin_vel = quat_rotate_inverse(base_quat, root_states[:, 7:10])
base_ang_vel = quat_rotate_inverse(base_quat, root_states[:, 10:13])
# velocity tracking reward
#print("commands: ",commands)
lin_vel_error = torch.sum(torch.square(commands[:, :2] - base_lin_vel[:, :2]), dim=1)
ang_vel_error = torch.square(commands[:, 2] - base_ang_vel[:, 2])
rew_lin_vel_xy = torch.exp(-lin_vel_error/0.25) * rew_scales["lin_vel_xy"]
rew_ang_vel_z = torch.exp(-ang_vel_error/0.25) * rew_scales["ang_vel_z"]
# torque penalty
rew_torque = torch.sum(torch.square(torques), dim=1) * rew_scales["torque"]
total_reward = rew_lin_vel_xy + rew_ang_vel_z + rew_torque
total_reward = torch.clip(total_reward, 0., None)
# reset agents
reset = torch.norm(contact_forces[:, base_index, :], dim=1) > 80
#print("contact_force: ",contact_forces[:, base_index, :] )
#print("reset 1: ", reset)
#reset = reset | torch.any(torch.abs(root_states[:,2]) > 1,dim=1.)
#print("contact_forces base: ", contact_forces[:, base_index, :])
time_out = episode_lengths >= max_episode_length - 1 # no terminal reward for time-outs
reset = reset | time_out
return total_reward.detach(), reset
@torch.jit.script
def compute_snake_observations(root_states,
commands,
dof_pos,
default_dof_pos,
dof_vel,
actions,
lin_vel_scale,
ang_vel_scale,
dof_pos_scale,
dof_vel_scale
):
# type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float, float, float, float) -> Tensor
base_quat = root_states[:, 3:7]
base_lin_vel = quat_rotate_inverse(base_quat, root_states[:, 7:10]) * lin_vel_scale
base_ang_vel = quat_rotate_inverse(base_quat, root_states[:, 10:13]) * ang_vel_scale
#projected_gravity = quat_rotate(base_quat, gravity_vec)
dof_pos_scaled = (dof_pos - default_dof_pos) * dof_pos_scale
commands_scaled = commands*torch.tensor([lin_vel_scale, lin_vel_scale, ang_vel_scale], requires_grad=False, device=commands.device)
#print("coomands_scaled: ", commands_scaled)
obs = torch.cat((base_lin_vel,
base_ang_vel,
commands_scaled,
dof_pos_scaled,
dof_vel*dof_vel_scale,
actions
), dim=-1)
#print("obs: ", obs)
return obs
task config:
# used to create the object
name: SnakeWorm
physics_engine: ${..physics_engine}
env:
numEnvs: ${resolve_default:1024,${...num_envs}}
envSpacing: 4. # [m]
clipObservations: 5.0
clipActions: 1.0
plane:
staticFriction: 0.5 # [-]
dynamicFriction: 0.5 # [-]
restitution: 0. # [-]
baseInitState:
pos: [0.0, 0.0, 0.1] # x,y,z [m]
rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
vLinear: [0.0, 0.0, 0.0] # x,y,z [m/s]
vAngular: [0.0, 0.0, 0.0] # x,y,z [rad/s]
randomCommandVelocityRanges:
linear_x: [-2., 2.] # min max [m/s]
linear_y: [-1., 1.] # min max [m/s]
yaw: [-1., 1.] # min max [rad/s]
control:
# PD Drive parameters:
stiffness: 85 # [N*m/rad]
damping: 5 # [N*m*s/rad]
actionScale: 0.5 # 0.5
controlFrequencyInv: 1 # 60 Hz
urdfAsset:
collapseFixedJoints: True
fixBaseLink: False
defaultDofDriveMode: 4 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
learn:
# rewards
linearVelocityXYRewardScale: 0.5 # 1.0
angularVelocityZRewardScale: 0.2 # 0.5
torqueRewardScale: -0.000025
# normalization
linearVelocityScale: 1.0 # 2.0
angularVelocityScale: 0.1 # 0.25
dofPositionScale: 0.5 #1.0
dofVelocityScale: 0.025 # 0.05
# episode length in seconds
episodeLength_s: 50
# viewer cam:
viewer:
refEnv: 0
pos: [0, 0, 4] # [m]
lookat: [1., 1, 3.3] # [m]
# set to True if you use camera sensors in the environment
enableCameraSensors: False
sim:
dt: 0.02
substeps: 2
up_axis: "z"
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
physx:
num_threads: ${....num_threads}
solver_type: ${....solver_type}
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
num_position_iterations: 4
num_velocity_iterations: 1
contact_offset: 0.02
rest_offset: 0.0
bounce_threshold_velocity: 0.2
max_depenetration_velocity: 100.0
default_buffer_size_multiplier: 5.0
max_gpu_contact_pairs: 8388608 # 8*1024*1024
num_subscenes: ${....num_subscenes}
contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
task:
randomize: False
randomization_params:
frequency: 600 # Define how many environment steps between generating new randomizations
observations:
range: [0, .002] # range for the white noise
operation: "additive"
distribution: "gaussian"
actions:
range: [0., .02]
operation: "additive"
distribution: "gaussian"
sim_params:
gravity:
range: [0, 0.4]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 3000
actor_params:
snake_worm:
color: True
rigid_body_properties:
mass:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
schedule: "linear" # "linear" will linearly interpolate between no rand and max rand
schedule_steps: 3000
rigid_shape_properties:
friction:
num_buckets: 500
range: [0.7, 1.3]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
restitution:
range: [0., 0.7]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
dof_properties:
damping:
range: [0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
stiffness:
range: [0.5, 1.5] #[0.5, 1.5]
operation: "scaling"
distribution: "uniform"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
lower:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
upper:
range: [0, 0.01]
operation: "additive"
distribution: "gaussian"
schedule: "linear" # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
schedule_steps: 3000
train config:
params:
seed: ${...seed}
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0. # std = 1.
fixed_sigma: True
mlp:
units: [256, 128, 64]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load
config:
name: ${resolve_default:SnakeWorm,${....experiment}}
full_experiment_name: ${.name}
env_name: rlgpu
multi_gpu: ${....multi_gpu}
ppo: True
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
e_clip: 0.2
entropy_coef: 0.0
learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
lr_schedule: adaptive
kl_threshold: 0.008 # target kl for adaptive lr
truncate_grads: True
grad_norm: 1.
horizon_length: 96 #98304
minibatch_size: 32768
mini_epochs: 5
critic_coef: 2
clip_value: True
seq_len: 4 # only for rnn
bounds_loss_coef: 0.001
max_epochs: ${resolve_default:100,${....max_iterations}}
save_best_after: 200
score_to_win: 20000
save_frequency: 50
print_stats: True