Hi all,
I’m working on a custom spider-like quadruped robot and successfully trained a reinforcement learning policy using IsaacLab 2.0. The trained model performs well when tested using play.py. However, when I try to deploy this same policy in IsaacSim 4.5 for testing before running it on the real robot, the behavior is incorrect – the robot only wiggles slightly and doesn’t move as expected.
Here’s what I’ve done/tried so far:
- Physics settings: Ensured
physics_dtmatches training (tested 1/50 and 1/200). - Actuator settings: Verified stiffness/damping and actuator configs match training.
- Action scaling: Tried varying
action_scale, but no effect. - Policy architecture: Since the
.ptfile only contains weights (SKRL), I recreated the policy network manually in PyTorch based on the YAML config used for training:
class Policy(nn.Module):
def __init__(self, obs_size=17, act_size=12):
super().__init__()
self.net_container = nn.Sequential(
nn.Linear(obs_size, 64),
nn.ELU(),
nn.Linear(64, 64),
nn.ELU(),
)
self.policy_net = nn.Linear(64, act_size)
def forward(self, x):
net = self.net_container(x)
return self.policy_net(net)
checkpoint = torch.load(MODEL_PATH, map_location=torch.device('cpu'), weights_only=True)
policy_state_dict = checkpoint["policy"] if "policy" in checkpoint else checkpoint
policy = Policy(17, 12)
policy.load_state_dict(policy_state_dict, strict=False)
policy.eval()
- Observations: Input observations seem reasonable, actions are generated without errors, but the robot only slightly jitters in IsaacSim.
- Tried: Modifying joint stiffness/damping, checking if units mismatch between IsaacLab and IsaacSim (saw a mention that units may differ in docs for IsaacSim 5.0 but unclear in 4.5).
- USD file: Currently using the same USD robot as during training. Didn’t hardcode joint stiffness or initial poses in the USD, but could try that if needed.
A few extra points:
- My training results were significantly better with SKRL than RSL-RL on the same task. However, even though both policies perform well using
play.py, I haven’t been able to deploy either of them successfully in IsaacSim using a standalone Python script. - I’ve seen some mention of using the extension-based method (like the Spot robot demo), but ideally I’d like to keep things in Python for flexibility.
- Question: Is there a way to run these policies in Python only (without IsaacSim or IsaacLab), so I can easily test them on a Raspberry Pi (preferably without ROS)? I’m trying to simplify the deployment stack for running on the real hardware.
The full script that I have been using to test the robot and the SKRL policy with IsaacSim:
from isaacsim import SimulationApp
simulation_app = SimulationApp({"headless": False})
import numpy as np
import torch
from isaacsim.core.api import World
from isaacsim.core.prims import SingleArticulation
from isaacsim.core.utils.stage import add_reference_to_stage
from isaacsim.core.utils.viewports import set_camera_view
from isaacsim.core.utils.types import ArticulationAction
from scipy.spatial.transform import Rotation as Rotation
import torch.nn as nn
import time
USD_PATH = "D:/spyderbot/spdr_stage.usd"
MODEL_PATH = "C:/isaacsim/standalone_examples/tutorials/best_agent.pt"
physics_dt = 1.0 / 50.0
rendering_dt = 8.0 / 50.0
decimation = 8
my_world = World(physics_dt=physics_dt, rendering_dt=rendering_dt)
set_camera_view(eye=[2.5, 2.5, 1.5], target=[0.0, 0.0, 0.5])
add_reference_to_stage(usd_path=USD_PATH, prim_path="/World/SpiderBot")
spider = SingleArticulation(prim_path="/World/SpiderBot")
spider.set_world_pose(position=np.array([0.0, 0.0, 0.1]))
my_world.reset()
spider.initialize()
default_joint_pos = np.array([0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.0], dtype=np.float32)
policy_joint_order = ['Revolute_110', 'Revolute_111', 'Revolute_112', 'Revolute_113', 'Revolute_114', 'Revolute_115', 'Revolute_116', 'Revolute_117', 'Revolute_118', 'Revolute_119', 'Revolute_120', 'Revolute_121']
sim_joint_order = spider.dof_names
perm_indices = np.array([policy_joint_order.index(name) for name in sim_joint_order])
default_sim = default_joint_pos[perm_indices]
spider.set_joint_positions(default_sim)
spider.set_joint_velocities(np.zeros(12))
class Policy(nn.Module):
def __init__(self, obs_size=41, act_size=12):
super().__init__()
self.net_container = nn.Sequential(
nn.Linear(obs_size, 64),
nn.ELU(),
nn.Linear(64, 64),
nn.ELU(),
)
self.policy_net = nn.Linear(64, act_size)
def forward(self, x):
net = self.net_container(x)
return self.policy_net(net)
checkpoint = torch.load(MODEL_PATH, map_location=torch.device('cpu'), weights_only=True)
policy_state_dict = checkpoint["policy"] if "policy" in checkpoint else checkpoint
policy = Policy(41, 12)
policy.load_state_dict(policy_state_dict, strict=False)
policy.eval()
action_scale = 1
commands = np.random.uniform(-1.0, 1.0, size=2).astype(np.float32)
commands_3 = np.append(commands, 0.0)
prev_actions = np.zeros(12, dtype=np.float32)
step_count = 0
period = 5.0
sim_time = 0.0
while simulation_app.is_running():
start = time.time()
position, orientation = spider.get_world_pose()
quat_xyzw = np.array([orientation[1], orientation[2], orientation[3], orientation[0]])
rot = Rotation.from_quat(quat_xyzw)
projected_gravity_b = rot.inv().apply(np.array([0.0, 0.0, -1.0]))
lin_vel_w = spider.get_linear_velocity()
lin_vel_b = rot.inv().apply(lin_vel_w)
ang_vel_w = spider.get_angular_velocity()
ang_vel_b = rot.inv().apply(ang_vel_w)
joint_pos_sim = spider.get_joint_positions()
joint_pos_policy = np.zeros(12, dtype=np.float32)
joint_pos_policy[perm_indices] = joint_pos_sim
joint_pos_rel = joint_pos_policy - default_joint_pos
joint_vel_sim = spider.get_joint_velocities()
joint_vel_policy = np.zeros(12, dtype=np.float32)
joint_vel_policy[perm_indices] = joint_vel_sim
phase = 2 * np.pi * ((sim_time % period) / period)
phase_sin = np.sin(phase)
phase_cos = np.cos(phase)
obs = np.concatenate([
# lin_vel_b,
# ang_vel_b,
# projected_gravity_b,
commands_3,
joint_pos_rel,
joint_vel_policy,
prev_actions,
[phase_sin],
[phase_cos]
]).astype(np.float32)
obs_tensor = torch.from_numpy(obs).unsqueeze(0)
with torch.no_grad():
actions = policy(obs_tensor).squeeze(0).numpy()
prev_actions = actions.copy()
actions_reordered = actions[perm_indices]
joint_targets = actions_reordered * action_scale + default_sim
spider.apply_action(ArticulationAction(joint_positions=joint_targets))
my_world.step(render=True)
sim_time += rendering_dt
step_count += 1
end = time.time()
loop_time = end - start
print(f"Step {step_count}, Loop time: {loop_time:.4f}s")
target_time = physics_dt # 0.16s for real-time
if loop_time < target_time:
time.sleep(target_time - loop_time)
simulation_app.close()
Any help or pointers would be greatly appreciated - I’m very close to getting this from simulation to real hardware. Happy to provide more info or test suggestions. Thanks in advance!
Isaac Sim Version
4.5.0
4.2.0
4.1.0
4.0.0
4.5.0
2023.1.1
2023.1.0-hotfix.1
Other (please specify):
Operating System
Ubuntu 22.04
Ubuntu 20.04
Windows 11
Windows 10
Other (please specify):
GPU Information
- Model: RTX 4090 Founders Edition
- Driver Version: 577.00
Screenshots or Videos
That’s how the robots are walking after training when tested with play.py script:
And this is what happens when I try to run the same policy with my python script in IsaacSim: