Hi All,
I am a new user. I want to use ur10 robot to run RL reach task. And I set robot action to task space which means end_effector will move to desire position next step. When I use KinematicsSolver to compute ik solution for joints for next step, I find end_effector can not reach desire position at next step. I don’t know why this happend. I suspect that the position is not convergent because of the fast command frequency, so I try to adjust the simulation step size, but the effect is not obvious.Here is my code.
import gym
from gym import spaces
import numpy as np
class Ur10Env(gym.Env):
metadata = {“render.modes”: [“human”]}
def __init__(
self,
skip_frame=1,
physics_dt=1.0 / 60,
rendering_dt=1.0 / 60,
max_episode_length=2000,
seed=0,
headless=True,
) -> None:
from omni.isaac.kit import SimulationApp
self.headless = headless
self._simulation_app = SimulationApp({"headless": self.headless, "anti_aliasing": 3})
self._skip_frame = skip_frame
self._dt = physics_dt * self._skip_frame
self._max_episode_length = max_episode_length
self._steps_after_reset = int(rendering_dt / physics_dt)
from omni.isaac.core import World
from omni.isaac.core.objects import VisualSphere
from omni.isaac.core.prims import RigidPrim
from omni.isaac.core.utils.stage import add_reference_to_stage
from omni.isaac.core.utils.viewports import set_camera_view
from omni.isaac.core.utils.prims import create_prim
from omni.isaac.core.articulations import Articulation
from omni.isaac.universal_robots import KinematicsSolver
self._my_world = World(physics_dt=physics_dt, rendering_dt=rendering_dt, stage_units_in_meters=1.0)
self._my_world.scene.add_default_ground_plane()
# add the robot to stage
usd_path = "omniverse://localhost/Library/robots/ur10e.usd"
add_reference_to_stage(usd_path=usd_path, prim_path="/World/ur10")
robot = Articulation(prim_path="/World/ur10", name="ur10")
end_effector_prim_path = "/World/ur10/tool0"
end_effector = RigidPrim(prim_path=end_effector_prim_path, name="end_effector")
self.robot: Articulation = self._my_world.scene.add(robot)
self.end_effector = self._my_world.scene.add(end_effector)
self.robot_controller = self.robot.get_articulation_controller()
# add the target sphere to stage
target = VisualSphere(
prim_path= "/World/Target",
name="target",
position=np.array([0.5, 0.5, 0.6]),
radius=0.02,
color=np.array([0, 0, 1])
)
self.target = self._my_world.scene.add(target)
# IK
self.ik_controller = KinematicsSolver(self.robot, "tool0")
# set default camera viewport position and target
camera_position=[3, 3, 2]
camera_target=[0, 0, 0]
set_camera_view(eye=camera_position, target=camera_target, camera_prim_path="/OmniverseKit_Persp")
# add cylinder_light
create_prim(
prim_path="/Environment/cylinder_light",
prim_type="CylinderLight",
position=np.array([0.0, 0.0, 1.5]),
attributes={
"inputs:radius": 0.02,
"inputs:length": 0.50,
"inputs:intensity": 5e3,
"inputs:color": (1.0, 0.0, 1.0)
}
)
self.seed(seed)
self.reward_range = (-float("inf"), float("inf"))
gym.Env.__init__(self)
self._default_arm_dof_pos = np.array(
[-np.pi / 2, -np.pi / 2, -np.pi / 2, -np.pi / 2, np.pi / 2, 0],
dtype=np.float32
)
self._max_dof_vel = np.array(
[2.175, 2.175, 2.175, 2.175, 2.61, 2.61],
dtype=np.float32
)
self._max_action = np.array([np.pi, np.pi/2, 0.01], dtype=np.float32)
self.reset_counter = 0
self._actions_num = 3
self._observations_num = 12
self.sim_step = 0
self.action_space = spaces.Box(
low=-1.0,
high=1.0,
shape=(self._actions_num, ),
dtype=np.float32
)
self.observation_space = spaces.Box(
low=-float("inf"),
high=float("inf"),
shape=(self._observations_num,),
dtype=np.float32
)
return
def get_dt(self):
return self._dt
def step(self, action):
from omni.isaac.core.utils.rotations import euler_angles_to_quat
from omni.isaac.core.utils.types import ArticulationAction
pre_obs = self.get_observations()
pre_ee_position = pre_obs[6:9]
print("pre_ee_position:", pre_ee_position)
action = np.array(action, dtype=np.float32)
for i in range(len(action)):
if i==2:
action[i] = action[i] * self._max_action[i]/2 + self._max_action[i]/2
else:
action[i] = action[i] * self._max_action[i]
print("action:",action)
dx = action[2] * np.cos(action[1]) * np.cos(action[0])
dy = action[2] * np.cos(action[1]) * np.sin(action[0])
dz = action[2] * np.sin(action[1])
print("dx:",dx,"dy:",dy,"dz:",dz)
next_ee_position = np.zeros_like(pre_ee_position)
# next_oritation = np.zeros_like(pre_ee_oritation)
next_ee_position[0] = pre_ee_position[0] + dx
next_ee_position[1] = pre_ee_position[1] + dy
next_ee_position[2] = pre_ee_position[2] + dz
# next_oritation[0] = pre_ee_oritation[0] + action[2]
# next_oritation[1] = pre_ee_oritation[1] + action[3]
# next_oritation[2] = pre_ee_oritation[2] + action[4]
print("desired position:", next_ee_position)
# next_oritation = euler_angles_to_quat(next_oritation)
action, succ = self.ik_controller.compute_inverse_kinematics(next_ee_position)
for i in range(self._skip_frame):
if succ:
self.robot.apply_action(action)
self._my_world.step(render=False)
else:
action, _ = self.ik_controller.compute_inverse_kinematics(pre_ee_position)
self.robot.apply_action(action)
self._my_world.step(render=False)
self._my_world.render()
info = {}
done = False
self.sim_step += 1
if self.truncated():
done = True
current_obs = self.get_observations()
print("current_ee_position:", current_obs[6:9])
reward = self.reward(
pre_obs,
current_obs,
)
if self.done(
target_world_position=current_obs[0:3],
current_ee_position=current_obs[6:9]
):
done = True
return current_obs, reward, done, info
def reset(self):
self._my_world.reset()
print("---------World reset---------")
self._arm_dof_idxs = np.array(
[self.robot.get_dof_index(name) for name in self.robot.dof_names]
)
self.reset_counter = 0
self.sim_step = 0
# randomize robot state
arm_dof_pos = np.zeros((len(self._arm_dof_idxs), ))
# randomize DOF positions
gaussian_noise = np.random.normal(0, np.pi / 32, (len(self._arm_dof_idxs),))
arm_dof_pos = self._default_arm_dof_pos + gaussian_noise
self.robot.set_joint_positions(arm_dof_pos, self._arm_dof_idxs)
init_obs = self.get_observations()
init_ee_position, init_ee_oritation= self.end_effector.get_world_pose()
print("--------initial end_effector_posture--------")
print("end_effector_position:", init_ee_position)
print("end_effector_oritation:", init_ee_oritation)
return init_obs
def get_observations(self):
from omni.isaac.core.utils.rotations import quat_to_euler_angles
target_position, target_oritation = self.target.get_world_pose()
# target_linear_vel = self._target.get_linear_velocity()
target_oritation = quat_to_euler_angles(target_oritation)
ee_position, ee_oritation = self.end_effector.get_world_pose()
ee_oritation = quat_to_euler_angles(ee_oritation)
# obstacle_position = self._obstacle_objects[0].get_world_pose()[0]
# obstacle_angular_vel = self._obstacle_objects[0].get_angular_velocity()
# obstacle_linear_vel = self._obstacle_objects[0].get_linear_velocity()
# collect robot joint positions and velocities for observation
obs = np.concatenate(
(
target_position, # 0:3
target_oritation, # 3:6
ee_position, # 6:9
ee_oritation # 9:12
),
)
return obs
def reward(
self,
pre_obs,
current_obs
) -> None:
pre_target_position = pre_obs[0:3]
current_target_position = current_obs[0:3]
pre_ee_position = pre_obs[6:9]
current_ee_position = current_obs[6:9]
last_dists = np.linalg.norm(pre_target_position-pre_ee_position)
current_dists = np.linalg.norm(current_target_position-current_ee_position)
print("last_dists: ", last_dists)
print("current_dists: ", current_dists)
rewards1_1 = -10 * current_dists
rewards1_2 = (last_dists - current_dists)
rewards1_3 = rewards1_1 + rewards1_2
print(rewards1_1)
print(rewards1_2)
rewards3 = -0.01 * self.sim_step
print(rewards3)
if current_dists < 0.01:
rewards5 = 50000
else:
rewards5 = 0
print(rewards5)
rewards = rewards1_1 + rewards3 + rewards5
return rewards
def render(self, mode="human"):
return
def close(self):
self._simulation_app.close()
return
def seed(self, seed=None):
self.np_random, seed = gym.utils.seeding.np_random(seed)
np.random.seed(seed)
return [seed]
def truncated(self):
if self.sim_step > self._max_episode_length:
return True
else:
return False
def done(
self,
target_world_position,
current_ee_position,
):
current_dists = np.linalg.norm(target_world_position-current_ee_position)
if current_dists < 0.01:
return True
else:
return False