End_effector position can not converged to desired position when i control robot by InverseKinematicsSolver with RL reach task

Hi All,
I am a new user. I want to use ur10 robot to run RL reach task. And I set robot action to task space which means end_effector will move to desire position next step. When I use KinematicsSolver to compute ik solution for joints for next step, I find end_effector can not reach desire position at next step. I don’t know why this happend. I suspect that the position is not convergent because of the fast command frequency, so I try to adjust the simulation step size, but the effect is not obvious.Here is my code.

import gym
from gym import spaces
import numpy as np

class Ur10Env(gym.Env):
metadata = {“render.modes”: [“human”]}

def __init__(
    self,
    skip_frame=1,
    physics_dt=1.0 / 60,
    rendering_dt=1.0 / 60,
    max_episode_length=2000,
    seed=0,
    headless=True,
) -> None:
    from omni.isaac.kit import SimulationApp

    self.headless = headless
    self._simulation_app = SimulationApp({"headless": self.headless, "anti_aliasing": 3})
    self._skip_frame = skip_frame
    self._dt = physics_dt * self._skip_frame
    self._max_episode_length = max_episode_length
    self._steps_after_reset = int(rendering_dt / physics_dt)
    
    from omni.isaac.core import World
    from omni.isaac.core.objects import VisualSphere
    from omni.isaac.core.prims import RigidPrim
    from omni.isaac.core.utils.stage import add_reference_to_stage
    from omni.isaac.core.utils.viewports import set_camera_view
    from omni.isaac.core.utils.prims import create_prim
    from omni.isaac.core.articulations import Articulation
    from omni.isaac.universal_robots import KinematicsSolver

    self._my_world = World(physics_dt=physics_dt, rendering_dt=rendering_dt, stage_units_in_meters=1.0)
    self._my_world.scene.add_default_ground_plane()
    
    # add the robot to stage
    usd_path = "omniverse://localhost/Library/robots/ur10e.usd"
    add_reference_to_stage(usd_path=usd_path, prim_path="/World/ur10")  
    robot = Articulation(prim_path="/World/ur10", name="ur10")
    end_effector_prim_path = "/World/ur10/tool0"
    end_effector = RigidPrim(prim_path=end_effector_prim_path, name="end_effector")
    self.robot: Articulation = self._my_world.scene.add(robot)
    self.end_effector = self._my_world.scene.add(end_effector)
    self.robot_controller = self.robot.get_articulation_controller()
    
    # add the target sphere to stage
    target = VisualSphere(
        prim_path= "/World/Target",
        name="target",
        position=np.array([0.5, 0.5, 0.6]),
        radius=0.02,
        color=np.array([0, 0, 1])
    )
    self.target = self._my_world.scene.add(target)
    
    # IK
    self.ik_controller = KinematicsSolver(self.robot, "tool0")

    # set default camera viewport position and target
    camera_position=[3, 3, 2]
    camera_target=[0, 0, 0]
    set_camera_view(eye=camera_position, target=camera_target, camera_prim_path="/OmniverseKit_Persp")
    
    # add cylinder_light
    create_prim(
        prim_path="/Environment/cylinder_light",
        prim_type="CylinderLight",
        position=np.array([0.0, 0.0, 1.5]),
        attributes={
            "inputs:radius": 0.02,
            "inputs:length": 0.50,
            "inputs:intensity": 5e3,
            "inputs:color": (1.0, 0.0, 1.0)
        }
    )
    
    self.seed(seed)
    self.reward_range = (-float("inf"), float("inf"))
    gym.Env.__init__(self)
                                                                  
    self._default_arm_dof_pos = np.array(
        [-np.pi / 2, -np.pi / 2, -np.pi / 2, -np.pi / 2, np.pi / 2, 0], 
        dtype=np.float32
    ) 
    self._max_dof_vel = np.array(
        [2.175, 2.175, 2.175, 2.175, 2.61, 2.61], 
        dtype=np.float32
    )
    self._max_action = np.array([np.pi, np.pi/2, 0.01], dtype=np.float32)  
    
    self.reset_counter = 0  
    self._actions_num = 3                                              
    self._observations_num = 12                                     
    self.sim_step = 0
    self.action_space = spaces.Box(                                     
        low=-1.0,
        high=1.0,
        shape=(self._actions_num, ),
        dtype=np.float32
    )                                                                           
    self.observation_space = spaces.Box(                           
        low=-float("inf"), 
        high=float("inf"), 
        shape=(self._observations_num,), 
        dtype=np.float32
    )
    return
    
def get_dt(self):
    return self._dt

def step(self, action):
    from omni.isaac.core.utils.rotations import euler_angles_to_quat
    from omni.isaac.core.utils.types import ArticulationAction 
    
    pre_obs = self.get_observations()
    pre_ee_position = pre_obs[6:9]
    print("pre_ee_position:", pre_ee_position)
    
    action = np.array(action, dtype=np.float32)
    for i in range(len(action)):
        if i==2:
            action[i] = action[i] * self._max_action[i]/2 + self._max_action[i]/2
        else:
            action[i] = action[i] * self._max_action[i]
    print("action:",action)
    dx = action[2] * np.cos(action[1]) * np.cos(action[0])
    dy = action[2] * np.cos(action[1]) * np.sin(action[0])
    dz = action[2] * np.sin(action[1])
    print("dx:",dx,"dy:",dy,"dz:",dz)
    
    next_ee_position = np.zeros_like(pre_ee_position)
    # next_oritation = np.zeros_like(pre_ee_oritation)
    next_ee_position[0] = pre_ee_position[0] + dx
    next_ee_position[1] = pre_ee_position[1] + dy
    next_ee_position[2] = pre_ee_position[2] + dz
    # next_oritation[0] = pre_ee_oritation[0] + action[2]
    # next_oritation[1] = pre_ee_oritation[1] + action[3]
    # next_oritation[2] = pre_ee_oritation[2] + action[4]
    print("desired position:", next_ee_position)
    # next_oritation = euler_angles_to_quat(next_oritation)
    
    action, succ = self.ik_controller.compute_inverse_kinematics(next_ee_position)

    for i in range(self._skip_frame):
        if succ:
            self.robot.apply_action(action)
            self._my_world.step(render=False)
        else:
            action, _ = self.ik_controller.compute_inverse_kinematics(pre_ee_position)
            self.robot.apply_action(action)
            self._my_world.step(render=False)
    self._my_world.render()
    
    info = {}
    done = False
    self.sim_step += 1
    if self.truncated():
        done = True
   
    current_obs = self.get_observations()   
    print("current_ee_position:", current_obs[6:9])
    reward = self.reward(
        pre_obs,
        current_obs,
    )
    
    if self.done(
        target_world_position=current_obs[0:3], 
        current_ee_position=current_obs[6:9]
    ):
        done = True   
    return current_obs, reward, done, info

def reset(self):
    self._my_world.reset()
    print("---------World reset---------")
    self._arm_dof_idxs = np.array(
        [self.robot.get_dof_index(name) for name in self.robot.dof_names]
    )    
    self.reset_counter = 0
    self.sim_step = 0 
    # randomize robot state
    arm_dof_pos = np.zeros((len(self._arm_dof_idxs), ))
    # randomize DOF positions
    gaussian_noise = np.random.normal(0, np.pi / 32, (len(self._arm_dof_idxs),))
    arm_dof_pos = self._default_arm_dof_pos + gaussian_noise   
    self.robot.set_joint_positions(arm_dof_pos, self._arm_dof_idxs)
    init_obs = self.get_observations()
    init_ee_position, init_ee_oritation= self.end_effector.get_world_pose()
    print("--------initial end_effector_posture--------")
    print("end_effector_position:", init_ee_position)
    print("end_effector_oritation:", init_ee_oritation)
    return init_obs

def get_observations(self):
    from omni.isaac.core.utils.rotations import quat_to_euler_angles
    
    target_position, target_oritation = self.target.get_world_pose()
    # target_linear_vel = self._target.get_linear_velocity()
    target_oritation = quat_to_euler_angles(target_oritation)
    ee_position, ee_oritation = self.end_effector.get_world_pose()
    ee_oritation = quat_to_euler_angles(ee_oritation)
    # obstacle_position = self._obstacle_objects[0].get_world_pose()[0]
    # obstacle_angular_vel = self._obstacle_objects[0].get_angular_velocity()
    # obstacle_linear_vel = self._obstacle_objects[0].get_linear_velocity()
    
    # collect robot joint positions and velocities for observation
    obs = np.concatenate(
        (
            target_position,            # 0:3
            target_oritation,           # 3:6
            ee_position,                # 6:9
            ee_oritation                # 9:12
        ),
    )                                    
    return obs

def reward(
    self, 
    pre_obs,
    current_obs
    ) -> None:
    pre_target_position = pre_obs[0:3] 
    current_target_position = current_obs[0:3] 
    pre_ee_position = pre_obs[6:9]
    current_ee_position = current_obs[6:9]
    last_dists = np.linalg.norm(pre_target_position-pre_ee_position)  
    current_dists = np.linalg.norm(current_target_position-current_ee_position)  
    print("last_dists: ", last_dists)
    print("current_dists: ", current_dists)

    rewards1_1 = -10 * current_dists                      
    rewards1_2 = (last_dists - current_dists)             
    rewards1_3 = rewards1_1 + rewards1_2                   
    print(rewards1_1)
    print(rewards1_2)
    
    rewards3 = -0.01 * self.sim_step
    print(rewards3)
    
    if current_dists < 0.01:
        rewards5 = 50000
    else:
        rewards5 = 0
    print(rewards5)
    
    rewards = rewards1_1 + rewards3 + rewards5
    return rewards

def render(self, mode="human"):
    return

def close(self):
    self._simulation_app.close()
    return

def seed(self, seed=None):
    self.np_random, seed = gym.utils.seeding.np_random(seed)
    np.random.seed(seed)
    return [seed]

def truncated(self):
    if self.sim_step > self._max_episode_length:
        return True
    else:
        return False

def done(
    self,
    target_world_position,
    current_ee_position, 
):
    current_dists = np.linalg.norm(target_world_position-current_ee_position)
    if current_dists < 0.01:
        return True
    else:
        return False

image

I don’t know how to solver your problem, but this guy has done what you want:

Thank you for your reply, I’ll take a look.

1 Like

@1062033103 ,

If you can provide a task space target for which the UR10 IK solver is failing to converge, we can troubleshoot why the IK solver is not working.

In general the IK solver is not designed for continuous control of a robot arm. There is no guarantee that a nearby pose in task space will generate a nearby configuration in joint space. Using a warm-start at the current joint positions will increase the likelihood of finding a nearby configuration, but it is never guaranteed.

Thanks, due to the use of reinforcement learning to send the coordinate position of the end-effector, the position command of each event step is random, which is not convenient to obtain.
I found that some coordinates can approximate convergence, but some deviations are large. For example, the desired position in the picture is the goal position sent by reinforcement learning agent, then it is convert to joint commands by ik solver.The current ee position is the position of end-effector read after step(). It can be seen that the position deviation is relatively large.