I am trying to train a robot to reach a target, I have followed the Custom RL Example,
Everything is find when number of environments are 1, but when I trying to use 8 environment, I face a problem. Also I have 8 robot in my scene, here is the code
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
#
from omni.isaac.cloner import GridCloner
from dis import dis
from omni.isaac.core.utils.nucleus import get_assets_root_path
from omni.isaac.core.utils.stage import add_reference_to_stage
from omni.isaac.core.tasks.base_task import BaseTask
from omni.isaac.core.articulations import ArticulationView
from omni.isaac.core.utils.prims import create_prim
from omni.isaac.core.prims import RigidPrimView, RigidPrim, GeometryPrimView, GeometryPrim
from omni.isaac.core.utils.stage import get_current_stage
from pxr import UsdGeom
import omni.kit
from gym import spaces
import numpy as np
import torch
import math
import random
class CartpoleTask(BaseTask):
def __init__(self, name, offset=None) -> None:
# task-specific parameters
self._cartpole_position = [0.0, 0.0, 2.0]
self._reset_dist = 0.005
self._max_push_effort = 50000.0
self._currentframe = 0
# set stage
usd_context = omni.usd.get_context()
self._stage = usd_context.get_stage()
# values used for defining RL buffers
self._num_observations = 11
self._num_actions = 5
self._device = "cpu"
self.num_envs = 8
# a few class buffers to store RL-related states
self.obs = torch.zeros((self.num_envs, self._num_observations))
self.resets = torch.zeros((self.num_envs, 1))
# set the action and observation space for RL
self.action_space = spaces.Box(
np.ones(self._num_actions) * -1.0, np.ones(self._num_actions) * 1.0)
self.observation_space = spaces.Box(
np.ones(self._num_observations) * -
np.Inf, np.ones(self._num_observations) * np.Inf
)
# trigger __init__ of parent class
BaseTask.__init__(self, name=name, offset=offset)
def set_up_scene(self, scene) -> None:
# retrieve file path for the Cartpole USD file
# assets_root_path = get_assets_root_path()
usd_path = "G:\Omniverse\Isaac Sim\Robohouse\Modules\Robotic Arm\kr210-r3100-ultra-m.project.usd"
add_reference_to_stage(usd_path, "/World")
# we need to clone robotic arms
# create a GridCloner instance
# cloner = GridCloner(spacing=3)
# base_env_path_robotic_arm = "/World/kr210_r3100_ultra"
# UsdGeom.Cube.Define(get_current_stage(), base_env_path_robotic_arm)
# target_paths = cloner.generate_paths("/World/kr210_r3100_ultra", self.num_envs)
# cloner.clone(source_prim_path="/World/kr210_r3100_ultra", prim_paths=target_paths)
# target_paths = cloner.generate_paths("/World/Cube", self.num_envs)
# cloner.clone(source_prim_path="/World/Cube", prim_paths=target_paths)
# target_paths = cloner.generate_paths("/World/Target", self.num_envs)
# cloner.clone(source_prim_path="/World/Target", prim_paths=target_paths)
# add the Cartpole USD to our stage
# create_prim(prim_path="/World/Cartpole", prim_type="Xform", position=self._cartpole_position)
# for i in range(self.num_envs):
#
# create an ArticulationView wrapper for our cartpole - this can be extended towards accessing multiple cartpoles
self._robot_arm = ArticulationView(
prim_paths_expr="/World/kr210_r3100_ultra_project*/kr210_r3100_ultra", name="robot_view")
# add Cartpole ArticulationView and ground plane to the Scene
scene.add(self._robot_arm)
self._head = RigidPrimView(
prim_paths_expr="/World/kr210_r3100_ultra_project*/Cube", name="head_view", reset_xform_properties=False)
scene.add(self._head)
self._target = GeometryPrimView(
prim_paths_expr="/World/kr210_r3100_ultra_project*/Target", name="target_view", reset_xform_properties=True)
scene.add(self._target)
# set default camera viewport position and target
self.set_initial_camera_params()
def set_initial_camera_params(self, camera_position=[10, 10, 3], camera_target=[0, 0, 0]):
viewport = omni.kit.viewport_legacy.get_default_viewport_window()
viewport.set_camera_position(
"/OmniverseKit_Persp", camera_position[0], camera_position[1], camera_position[2], True
)
viewport.set_camera_target(
"/OmniverseKit_Persp", camera_target[0], camera_target[1], camera_target[2], True)
def post_reset(self):
self._axis_1 = self._robot_arm.get_dof_index(
"RevoluteJoint_Axis_1_Axis_2")
self._axis_2 = self._robot_arm.get_dof_index(
"RevoluteJoint_Axis_2_Axis_3")
self._axis_3 = self._robot_arm.get_dof_index(
"RevoluteJoint_Axis_3_Axis_4")
self._axis_4 = self._robot_arm.get_dof_index(
"RevoluteJoint_Axis_4_Axis_5")
self._axis_5 = self._robot_arm.get_dof_index(
"RevoluteJoint_Axis_5_Axis_6")
# self._cart_dof_idx = self._robot_arm.get_dof_index("cartJoint")
# self._pole_dof_idx = self._robot_arm.get_dof_index("poleJoint")
# randomize all envs
indices = torch.arange(self._robot_arm.count,
dtype=torch.int64, device=self._device)
self.reset(indices)
def reset(self, env_ids=None):
if env_ids is None:
env_ids = torch.arange(self.num_envs, device=self._device)
num_resets = len(env_ids)
print("reset called" , env_ids)
# set target to random position
print("target count is ", self._target.count)
dof_target = torch.zeros(
(num_resets, 3), device=self._device)
dof_target[:, 0] = 0
dof_target[:, 1] = 0
dof_target[:, 2] = 0
self._target.set_world_poses(positions=dof_target)
# self._target.set_world_poses(positions=torch.FloatTensor(
# [[1.4, random.uniform(-1, 1), random.uniform(.7, 2.8)]]))
self._last_distance = 2000
self._steps = 0
# randomize DOF positions
dof_pos = torch.zeros(
(num_resets, self._robot_arm.num_dof), device=self._device)
dof_pos[:, self._axis_1] = 0
dof_pos[:, self._axis_2] = 0
dof_pos[:, self._axis_3] = 0
dof_pos[:, self._axis_4] = 0
dof_pos[:, self._axis_5] = 0
# dof_pos[:, self._cart_dof_idx] = 1.0 * (1.0 - 2.0 * torch.rand(num_resets, device=self._device))
# dof_pos[:, self._pole_dof_idx] = 0.125 * math.pi * (1.0 - 2.0 * torch.rand(num_resets, device=self._device))
# randomize DOF velocities
dof_vel = torch.zeros(
(num_resets, self._robot_arm.num_dof), device=self._device)
dof_vel[:, self._axis_1] = 0
dof_vel[:, self._axis_2] = 0
dof_vel[:, self._axis_3] = 0
dof_vel[:, self._axis_4] = 0
dof_vel[:, self._axis_5] = 0
# dof_vel[:, self._cart_dof_idx] = 0.5 * (1.0 - 2.0 * torch.rand(num_resets, device=self._device))
# dof_vel[:, self._pole_dof_idx] = 0.25 * math.pi * (1.0 - 2.0 * torch.rand(num_resets, device=self._device))
# apply resets
indices = env_ids.to(dtype=torch.int32)
self._robot_arm.set_joint_positions(dof_pos, indices=indices)
self._robot_arm.set_joint_velocities(dof_vel, indices=indices)
# bookkeeping
self.resets[env_ids] = 0
print("finished reset")
def pre_physics_step(self, actions) -> None:
print("pre_physics_step started")
self._currentframe = self._currentframe + 1
reset_env_ids = self.resets.nonzero(as_tuple=False).squeeze(-1)
if len(reset_env_ids) > 0:
self.reset(reset_env_ids)
# print("pre_physics_step" , actions[0])
# increase steps
self._steps += 1
actions = torch.tensor(actions)
forces = torch.zeros((self._robot_arm.count, self._robot_arm.num_dof),
dtype=torch.float32, device=self._device)
# forces[:, self._cart_dof_idx] = self._max_push_effort * actions[0]
forces[:, self._axis_1] = self._max_push_effort * actions[0]
forces[:, self._axis_2] = self._max_push_effort * actions[1]
forces[:, self._axis_3] = self._max_push_effort * actions[2]
forces[:, self._axis_4] = self._max_push_effort * actions[3]
forces[:, self._axis_5] = self._max_push_effort * actions[4]
indices = torch.arange(self._robot_arm.count,
dtype=torch.int32, device=self._device)
self._robot_arm.set_joint_efforts(forces, indices=indices)
print("pre_physics_step ended")
def get_observations(self):
print("get_observations started")
dof_pos = self._robot_arm.get_joint_positions()
# collect pole and cart joint positions and velocities for observation
# cart_pos = dof_pos[:, self._cart_dof_idx]
# cart_vel = dof_vel[:, self._cart_dof_idx]
# pole_pos = dof_pos[:, self._pole_dof_idx]
# pole_vel = dof_vel[:, self._pole_dof_idx]
# self.obs[:, 0] = cart_pos
# self.obs[:, 1] = cart_vel
# self.obs[:, 2] = pole_pos
# self.obs[:, 3] = pole_vel
axis_1_pos = dof_pos[:, self._axis_1]
axis_2_pos = dof_pos[:, self._axis_2]
axis_3_pos = dof_pos[:, self._axis_3]
axis_4_pos = dof_pos[:, self._axis_4]
axis_5_pos = dof_pos[:, self._axis_5]
# print(axis_2_pos)
targetPose = self._target.get_world_poses()
headPose = self._head.get_local_poses()
self.obs[:, 0] = axis_1_pos
self.obs[:, 1] = axis_2_pos
self.obs[:, 2] = axis_3_pos
self.obs[:, 3] = axis_4_pos
self.obs[:, 4] = axis_5_pos
# we have to assign the values
# Head
self.obs[:, 5] = headPose[0][0][0]
self.obs[:, 6] = headPose[0][0][1]
self.obs[:, 7] = headPose[0][0][2]
# Target
self.obs[:, 8] = targetPose[0][0][0]
self.obs[:, 9] = targetPose[0][0][1]
self.obs[:, 10] = targetPose[0][0][2]
print("get_observations finished" , self.obs)
return self.obs
def calculate_metrics(self) -> None:
print("calculate_metrics started")
head_pos = np.array([self.obs[:, 5], self.obs[:, 6], self.obs[:, 7]])
target_pos = np.array(
[self.obs[:, 8], self.obs[:, 9], self.obs[:, 10]])
# add negative reward
# dist_reward = -0.01
dist = np.linalg.norm(head_pos - target_pos)
# print("distance" , dist)
dist_reward = 1.0 / (1.0 + dist ** 2)
dist_reward *= dist_reward
if dist_reward < self._reset_dist:
dist_reward = dist_reward * 2
# print("distance is ", dist)
# compute reward based on angle of pole and cart velocity
dist_reward = torch.FloatTensor([dist_reward])
# max_distance = 2
# reward = torch.where(torch.abs(torch.FloatTensor([dist])) > max_distance, torch.ones_like(reward) * -2.0, reward)
print("calculate_metrics finished")
return dist_reward.item()
def is_done(self) -> None:
print("is_done started")
head_pos = np.array([self.obs[:, 5], self.obs[:, 6], self.obs[:, 7]])
target_pos = np.array(
[self.obs[:, 8], self.obs[:, 9], self.obs[:, 10]])
dist = np.linalg.norm(head_pos - target_pos)
dist = torch.FloatTensor([dist])
if dist < self._reset_dist:
print("Reached Target")
# reset the robot if cart has reached reset_dist or pole is too far from upright
resets = torch.where(dist < self._reset_dist, 1, 0)
resets = torch.where(torch.FloatTensor(
[self._steps]) > 1200, 1, resets)
# resets = torch.where(torch.abs(pole_pos) > math.pi / 2, 1, resets)
self.resets = resets
print("is_done finished")
return resets.item()
and here is the error:
reset called tensor([0, 1, 2, 3, 4, 5, 6, 7])
target count is 8
finished reset
Using cuda:0 device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
reset called tensor([0, 1, 2, 3, 4, 5, 6, 7])
target count is 8
finished reset
get_observations started
get_observations finished tensor([[ 3.2012e-08, 9.7706e-06, 6.5598e-06, 2.8677e-08, 2.3636e-14,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2012e-08, 9.7706e-06, 6.5598e-06, 2.8672e-08, 1.4273e-13,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2016e-08, 9.7706e-06, 6.5597e-06, 2.8500e-08, -2.3094e-14,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2016e-08, 9.7706e-06, 6.5597e-06, 2.8500e-08, -2.3094e-14,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2012e-08, 9.7706e-06, 6.5599e-06, 2.8653e-08, 4.2498e-14,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2012e-08, 9.7706e-06, 6.5599e-06, 2.8631e-08, 1.0732e-13,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2012e-08, 9.7706e-06, 6.5598e-06, 2.8479e-08, 2.2640e-14,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00],
[ 3.2012e-08, 9.7706e-06, 6.5598e-06, 2.8479e-08, 2.2640e-14,
2.0386e+00, 0.0000e+00, 1.9845e+00, 0.0000e+00, -4.1016e-08,
0.0000e+00]])
Traceback (most recent call last):
File "cartpole_train.py", line 40, in <module>
model.learn(total_timesteps=2000000)
File "C:\Omniverse\Library\isaac_sim-2022.1.1\kit\python\lib\site-packages\stable_baselines3\ppo\ppo.py", line 319, in learn
reset_num_timesteps=reset_num_timesteps,
File "C:\Omniverse\Library\isaac_sim-2022.1.1\kit\python\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 240, in learn
total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, eval_log_path, reset_num_timesteps, tb_log_name
File "C:\Omniverse\Library\isaac_sim-2022.1.1\kit\python\lib\site-packages\stable_baselines3\common\base_class.py", line 446, in _setup_learn
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
File "C:\Omniverse\Library\isaac_sim-2022.1.1\kit\python\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 64, in reset
self._save_obs(env_idx, obs)
File "C:\Omniverse\Library\isaac_sim-2022.1.1\kit\python\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 94, in _save_obs
self.buf_obs[key][env_idx] = obs
ValueError: could not broadcast input array from shape (8,11) into shape (11)