Spawning Rigid Body in Isaac Lab is incorrect

pedrosilva7320 · October 8, 2024, 1:53pm

Hello there,

I am trying to implement an Direct RL Workflow for a Franka robot to pick up a cube and move it to a goal position. Unfortunately I am not achieving good results with the RL training, so I simplified the goal to only move the robot’s end effector to the cube. position.

The reward is simply based on a Euclidian distance on the Franka’s hand to the cube’s position. I am running with the same parameters of the Franka Direct Cabinet Task with 24000 steps.

Besides the bad results there is also a problem in the spawning the cube in a random position in which it spawns and moves without collision with the robot. Some input would be great.

Thanks in advance

# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations

import torch

from omni.isaac.core.utils.stage import get_current_stage
from omni.isaac.core.utils.torch.transformations import tf_combine, tf_inverse, tf_vector
from pxr import UsdGeom

import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.actuators.actuator_cfg import ImplicitActuatorCfg
from omni.isaac.lab.assets import Articulation, ArticulationCfg, RigidObject, RigidObjectCfg
from omni.isaac.lab.envs import DirectRLEnv, DirectRLEnvCfg
from omni.isaac.lab.scene import InteractiveSceneCfg
from omni.isaac.lab.sim import SimulationCfg
from omni.isaac.lab.terrains import TerrainImporterCfg
from omni.isaac.lab.utils import configclass
from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.lab.utils.math import sample_uniform, quat_mul, quat_from_euler_xyz



@configclass
class FrankaCuboidEnvCfg(DirectRLEnvCfg):
    # env
    episode_length_s = 8.3333  # 500 timesteps
    decimation = 2
    num_actions = 9
    num_observations = 24
    num_states = 0

    # simulation
    sim: SimulationCfg = SimulationCfg(
        dt=1 / 120,
        render_interval=decimation,
        disable_contact_processing=True,
        physics_material=sim_utils.RigidBodyMaterialCfg(
            friction_combine_mode="multiply",
            restitution_combine_mode="multiply",
            static_friction=1.0,
            dynamic_friction=1.0,
            restitution=0.0,
        ),
    )

    # scene
    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=3.0, replicate_physics=True)

    # robot
    robot = ArticulationCfg(
        prim_path="/World/envs/env_.*/Robot",
        spawn=sim_utils.UsdFileCfg(
            usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Franka/franka_instanceable.usd",
            activate_contact_sensors=False,
            rigid_props=sim_utils.RigidBodyPropertiesCfg(
                disable_gravity=False,
                max_depenetration_velocity=5.0,
            ),
            articulation_props=sim_utils.ArticulationRootPropertiesCfg(
                enabled_self_collisions=False, solver_position_iteration_count=12, solver_velocity_iteration_count=1
            ),
        ),
        init_state=ArticulationCfg.InitialStateCfg(
            joint_pos={
                "panda_joint1": 1.157,
                "panda_joint2": -1.066,
                "panda_joint3": -0.155,
                "panda_joint4": -2.239,
                "panda_joint5": -1.841,
                "panda_joint6": 1.003,
                "panda_joint7": 0.469,
                "panda_finger_joint.*": 0.035,
            },
            pos=(0.0, 0.0, 0.0),
            rot=(0.0, 0.0, 0.0, 1.0),
        ),
        actuators={
            "panda_shoulder": ImplicitActuatorCfg(
                joint_names_expr=["panda_joint[1-4]"],
                effort_limit=87.0,
                velocity_limit=2.175,
                stiffness=80.0,
                damping=4.0,
            ),
            "panda_forearm": ImplicitActuatorCfg(
                joint_names_expr=["panda_joint[5-7]"],
                effort_limit=12.0,
                velocity_limit=2.61,
                stiffness=80.0,
                damping=4.0,
            ),
            "panda_hand": ImplicitActuatorCfg(
                joint_names_expr=["panda_finger_joint.*"],
                effort_limit=200.0,
                velocity_limit=0.2,
                stiffness=2e3,
                damping=1e2,
            ),
        },
    )

    # cuboid
    cuboid = RigidObjectCfg(
        prim_path="/World/envs/env_.*/cuboid",
        spawn=sim_utils.UsdFileCfg(
            usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
        ),
        init_state=RigidObjectCfg.InitialStateCfg(
            pos=(0.5, 0.0, 0.0),
            rot=(0.0, 0.0, 0.0, 1.0),
            lin_vel=(0.0, 0.0, 0.0),
            ang_vel=(0.0, 0.0, 0.0),
        ),
    )

    # ground plane
    terrain = TerrainImporterCfg(
        prim_path="/World/ground",
        terrain_type="plane",
        collision_group=-1,
        physics_material=sim_utils.RigidBodyMaterialCfg(
            friction_combine_mode="multiply",
            restitution_combine_mode="multiply",
            static_friction=1.0,
            dynamic_friction=1.0,
            restitution=0.0,
        ),
    )

    action_scale = 7.5
    dof_velocity_scale = 0.1

    # reward scales
    dist_reward_scale = 1.0
    move_reward_scale = 16
    lift_reward_scale = 15


class FrankaCuboidEnv(DirectRLEnv):
    # pre-physics step calls
    #   |-- _pre_physics_step(action)
    #   |-- _apply_action()
    # post-physics step calls
    #   |-- _get_dones()
    #   |-- _get_rewards()
    #   |-- _reset_idx(env_ids)
    #   |-- _get_observations()

    cfg: FrankaCuboidEnvCfg

    def __init__(self, cfg: FrankaCuboidEnvCfg, render_mode: str | None = None, **kwargs):
        super().__init__(cfg, render_mode, **kwargs)

        def get_env_local_pose(env_pos: torch.Tensor, xformable: UsdGeom.Xformable, device: torch.device):
            """Compute pose in env-local coordinates"""
            world_transform = xformable.ComputeLocalToWorldTransform(0)
            world_pos = world_transform.ExtractTranslation()
            world_quat = world_transform.ExtractRotationQuat()

            px = world_pos[0] - env_pos[0]
            py = world_pos[1] - env_pos[1]
            pz = world_pos[2] - env_pos[2]
            qx = world_quat.imaginary[0]
            qy = world_quat.imaginary[1]
            qz = world_quat.imaginary[2]
            qw = world_quat.real

            return torch.tensor([px, py, pz, qw, qx, qy, qz], device=device)

        self.dt = self.cfg.sim.dt * self.cfg.decimation

        # create auxiliary variables for computing applied action, observations and rewards
        self.robot_dof_lower_limits = self._robot.data.soft_joint_pos_limits[0, :, 0].to(device=self.device)
        self.robot_dof_upper_limits = self._robot.data.soft_joint_pos_limits[0, :, 1].to(device=self.device)

        self.robot_dof_speed_scales = torch.ones_like(self.robot_dof_lower_limits)
        self.robot_dof_speed_scales[self._robot.find_joints("panda_finger_joint1")[0]] = 0.1
        self.robot_dof_speed_scales[self._robot.find_joints("panda_finger_joint2")[0]] = 0.1

        self.robot_dof_targets = torch.zeros((self.num_envs, self._robot.num_joints), device=self.device)

        stage = get_current_stage()
        hand_pose = get_env_local_pose(
            self.scene.env_origins[0],
            UsdGeom.Xformable(stage.GetPrimAtPath("/World/envs/env_0/Robot/panda_link7")),
            self.device,
        )
        lfinger_pose = get_env_local_pose(
            self.scene.env_origins[0],
            UsdGeom.Xformable(stage.GetPrimAtPath("/World/envs/env_0/Robot/panda_leftfinger")),
            self.device,
        )
        rfinger_pose = get_env_local_pose(
            self.scene.env_origins[0],
            UsdGeom.Xformable(stage.GetPrimAtPath("/World/envs/env_0/Robot/panda_rightfinger")),
            self.device,
        )

        finger_pose = torch.zeros(7, device=self.device)
        finger_pose[0:3] = (lfinger_pose[0:3] + rfinger_pose[0:3]) / 2.0
        finger_pose[3:7] = lfinger_pose[3:7]
        hand_pose_inv_rot, hand_pose_inv_pos = tf_inverse(hand_pose[3:7], hand_pose[0:3])

        robot_local_grasp_pose_rot, robot_local_pose_pos = tf_combine(
            hand_pose_inv_rot, hand_pose_inv_pos, finger_pose[3:7], finger_pose[0:3]
        )
        robot_local_pose_pos += torch.tensor([0, 0.04, 0], device=self.device)
        self.robot_local_grasp_pos = robot_local_pose_pos.repeat((self.num_envs, 1))
        self.robot_local_grasp_rot = robot_local_grasp_pose_rot.repeat((self.num_envs, 1))

        self.gripper_forward_axis = torch.tensor([0, 0, 1], device=self.device, dtype=torch.float32).repeat(
            (self.num_envs, 1)
        )
        self.gripper_up_axis = torch.tensor([0, 1, 0], device=self.device, dtype=torch.float32).repeat(
            (self.num_envs, 1)
        )

        self.hand_link_idx = self._robot.find_bodies("panda_link7")[0][0]
        self.left_finger_link_idx = self._robot.find_bodies("panda_leftfinger")[0][0]
        self.right_finger_link_idx = self._robot.find_bodies("panda_rightfinger")[0][0]

        self.robot_grasp_rot = torch.zeros((self.num_envs, 4), device=self.device)
        self.robot_grasp_pos = torch.zeros((self.num_envs, 3), device=self.device)

        self.object_pos = torch.zeros((self.num_envs, 3), device=self.device)

    def _setup_scene(self):
        self._robot = Articulation(self.cfg.robot)
        self._cuboid = RigidObject(self.cfg.cuboid)
        self.scene.articulations["robot"] = self._robot

        self.cfg.terrain.num_envs = self.scene.cfg.num_envs
        self.cfg.terrain.env_spacing = self.scene.cfg.env_spacing
        self._terrain = self.cfg.terrain.class_type(self.cfg.terrain)

        # clone, filter, and replicate
        self.scene.clone_environments(copy_from_source=False)
        self.scene.filter_collisions(global_prim_paths=[self.cfg.terrain.prim_path])

        # add lights
        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
        light_cfg.func("/World/Light", light_cfg)

    # pre-physics step calls

    def _pre_physics_step(self, actions: torch.Tensor):
        self.actions = actions.clone().clamp(-1.0, 1.0)
        targets = self.robot_dof_targets + self.robot_dof_speed_scales * self.dt * self.actions * self.cfg.action_scale
        self.robot_dof_targets[:] = torch.clamp(targets, self.robot_dof_lower_limits, self.robot_dof_upper_limits)

    def _apply_action(self):
        self._robot.set_joint_position_target(self.robot_dof_targets)

    # post-physics step calls

    def _get_dones(self) -> tuple[torch.Tensor, torch.Tensor]:
        #Achieved goal pos
        object_pos = self._cuboid.data.body_pos_w[:, 0]  # Object position
        robot_pos = self._robot.data.body_pos_w[:, self.hand_link_idx]

        terminated = torch.norm(object_pos - robot_pos, p=2, dim=-1) < 0.2
        d=torch.norm(object_pos - robot_pos, p=2, dim=-1)
        print(d)
        #Time limit
        truncated = self.episode_length_buf >= self.max_episode_length - 1
        
        return terminated, truncated

    def _get_rewards(self) -> torch.Tensor:

        object_pos = self._cuboid.data.body_pos_w[:, 0]  # Object position (x, y, z)
        hand_pos = self._robot.data.body_pos_w[:, self.hand_link_idx]  # Hand position (x, y, z)

        return self._compute_rewards(
            self.actions,
            hand_pos,
            object_pos,
            self.num_envs,
        )
    
    def _reset_idx(self, env_ids: torch.Tensor | None):
        super()._reset_idx(env_ids)

        # 1. Reset robot state: randomize joint positions and set joint velocities to zero
        joint_pos = self._robot.data.default_joint_pos[env_ids] + sample_uniform(
            -0.125, 0.125, (len(env_ids), self._robot.num_joints), self.device
        )
        joint_pos = torch.clamp(joint_pos, self.robot_dof_lower_limits, self.robot_dof_upper_limits)
        joint_vel = torch.zeros_like(joint_pos)
        self._robot.set_joint_position_target(joint_pos, env_ids=env_ids)
        self._robot.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)

        root_states = self._cuboid.data.default_root_state[env_ids].clone()

        pose_range = {
            "x": (-0.2, 0.2),   # Range for x position
            "y": (-0.6, 0.6), # Range for y position
            "z": (0.0, 0.0),    # Fixed z position (can be adjusted as needed)
            "roll": (0.0, 0.0),  # Roll range
            "pitch": (0.0, 0.0), # Pitch range
            "yaw": (0.0, 0.0)    # Yaw range
        }
        
        velocity_range = {
            "x": (0.0, 0.0),   # Example range for x velocity
            "y": (0.0, 0.0),   # Example range for y velocity
            "z": (0.0, 0.0),   # Example range for z velocity
            "roll": (0.0, 0.0), # Example roll velocity range
            "pitch": (0.0, 0.0),# Example pitch velocity range
            "yaw": (0.0, 0.0)   # Example yaw velocity range
        }
        
        # poses
        range_list = [pose_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]]
        ranges = torch.tensor(range_list, device=self.device)
        rand_samples = sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 6), device=self.device)

        positions = root_states[:, 0:3] + self.scene.env_origins[env_ids] + rand_samples[:, 0:3]
        orientations_delta = quat_from_euler_xyz(rand_samples[:, 3], rand_samples[:, 4], rand_samples[:, 5])
        orientations = quat_mul(root_states[:, 3:7], orientations_delta)
        # velocities
        range_list = [velocity_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]]
        ranges = torch.tensor(range_list, device=self.device)
        rand_samples = sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 6), device=self.device)

        velocities = root_states[:, 7:13] + rand_samples

        # set into the physics simulation
        self._cuboid.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids)
        self._cuboid.write_root_velocity_to_sim(velocities, env_ids=env_ids)

    def _get_observations(self) -> dict:
        dof_pos_scaled = (
            2.0 * (self._robot.data.joint_pos - self.robot_dof_lower_limits)
            / (self.robot_dof_upper_limits - self.robot_dof_lower_limits)
            - 1.0
        )
        # Distance from the hand to the object
        to_object = self.object_pos - self.robot_grasp_pos

        object_pos = self._cuboid.data.body_pos_w[:, 0]
        
        obs = torch.cat(
            (dof_pos_scaled, self._robot.data.joint_vel * self.cfg.dof_velocity_scale, to_object, object_pos),
            dim=-1,
        )
        return {"policy": torch.clamp(obs, -5.0, 5.0)}
    
    def _compute_rewards(
        self,
        actions,
        franka_grasp_pos,
        cuboid_pos,
        num_envs,
    ):
    
        action_penalty = torch.sum(actions**2, dim=-1)

        # Distance from hand to object (reward for getting close to the object)
        object_distance = torch.norm(franka_grasp_pos - cuboid_pos, p=2, dim=-1)
        distance_reward = 1.0 / (1.0 + object_distance**2)

        rewards = distance_reward #* 1.5 - action_penalty * 0.05

        rewards = torch.where(object_distance < 0.3, rewards + 0.25, rewards)
        rewards = torch.where(object_distance < 0.25, rewards + 0.5, rewards)

        return rewards

pedrosilva7320 · October 9, 2024, 1:32pm

I have fixed the cube’s movement by adding a height to the cube’s spawn position, since it seemed the collision with the ground plane was causing the irregularities.

StrainFlow · October 9, 2024, 7:10pm

Are you getting better results now that you’ve fixed that issue?

pedrosilva7320 · October 10, 2024, 11:22am

Hello @StrainFlow,

I think the main improvement I did, besides adding height to prevent the strange behaviour, was to calculate the middle point between the 2 fingers and use that position instead of the hand position. Now I am getting good results, and will try to increase the complexity of the task by adding a lift reward and distance reward to a goal position to see if it is capable of carrying the cube to that position.

pedrosilva7320 · October 10, 2024, 11:28am

Another question @StrainFlow,
I have additional problem on adapting an ABB Yumi to perform the cabinet opening task. While I am achieving great results, I have a persistent problem that the robot arms are colliding during the task (and quite severely). Do you have any input on this? Do you think. the .usd model is at fault?

mateo.bode.4f · October 14, 2024, 3:50am

Hey, I am trying to create a Manager Based RL environment for JetBot navigation but I have a lot of problems on simulating it. How did you manage to register this environment on Isaac Lab? Can you please tell me how do you run the simulator? I am a beginner on Isaac so I am struggling a bit with this things.
Thanks!

pedrosilva7320 · October 14, 2024, 1:52pm

Hello there,

I have still not worked with Manager Based Environments, so my only advice would be to simplify a task in order to understand how this workflow works. Regarding registering a environment, this is pretty simple, you just have to use the gym function to perform this, I will show you an example from the manager based anymal task:

IsaacLab/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/navigation/config/anymal_c/init.py

# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

import gymnasium as gym

from . import agents, navigation_env_cfg

##
# Register Gym environments.
##

gym.register(
    id="Isaac-Navigation-Flat-Anymal-C-v0",
    entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
    disable_env_checker=True,
    kwargs={
        "env_cfg_entry_point": navigation_env_cfg.NavigationEnvCfg,
        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:NavigationEnvPPORunnerCfg",
        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml",
    },
)

gym.register(
    id="Isaac-Navigation-Flat-Anymal-C-Play-v0",
    entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
    disable_env_checker=True,
    kwargs={
        "env_cfg_entry_point": navigation_env_cfg.NavigationEnvCfg_PLAY,
        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:NavigationEnvPPORunnerCfg",
        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml",
    },
)

In you case, just import your python script and change the ID so you can train it. Registering an Environment

To then train the model, just perform the steps in Train RL Agent. Just ensure the task you want to perform as the agent you want to use: skrl, sb3… and use the ID you selected in the environment registry.

Topic		Replies	Views
Problems about playing a trained policy of snake robots in Isaac gym Isaac Gym	0	214	June 26, 2024
Isaacgym preview 4 actor root state returns nans with isaacgymenvs-style task Isaac Gym	4	2151	June 30, 2023
RL: Franka not able to reach the goal Isaac Gym	5	1850	February 10, 2023
Customization of IsaacSim Deep RL example Isaac Sim	4	1478	March 30, 2023
Failed to get pose changes of deformable object Isaac Sim soft-body , isaac-sim-v4-2-0	2	28	January 21, 2025
Unable to train multi environment robot Isaac Sim isaacsim , gym	8	2584	December 28, 2022
How to speed up the robot? Isaac Sim python	15	1465	July 13, 2022
Issues with Franka Robot Not Moving in Isaac Sim "Hello_world" Tutorial Isaac Sim urdf-importer	6	1319	February 6, 2024
Problem with some samplentasks for training Isaac Sim	5	944	June 29, 2024
IsaacSim Standalone example (reinforcement learning) Isaac Sim rl	1	1794	April 5, 2024

Spawning Rigid Body in Isaac Lab is incorrect

Related topics