I creating a simpler FrankaCabinet task in IsaacGym in which I am replacing the cabinet with just a box with a cylinder to be reached to. For the same, I have deleted all the cabinet and props parts of the code and added a custom box and cylinder assets.
When I try to run the training, I get this error:
RuntimeError: CUDA error: an illegal memory access was encountered
in the reset_idx() function:
self.progress_buf[env_ids] = 0
The error is specifically in the env_ids
variable as I have tried printing it out and getting the same error.
The problem is not:
-
Incorrect device: Both-
progress_buf
&env_ids
are stored on my GPU explicitly. -
Out of index: Both the tensors are of equal length i.e. 4096.
-
env_ids
does not have any weird misplaced integers: It is a simple array of[1,2,3..., 4096]
. -
The GPU memory should be enough as I can run much more complex examples fairly fine on my RTX3070.
Here’s my reset_idx function for reference.
def reset_idx(self, env_ids):
env_ids_int32 = env_ids.to(dtype=torch.int32)
# reset franka
pos = tensor_clamp(
self.franka_default_dof_pos.unsqueeze(0) + 0.25 * (
torch.rand((len(env_ids), self.num_franka_dofs), device=self.device) - 0.5),
self.franka_dof_lower_limits, self.franka_dof_upper_limits)
self.franka_dof_pos[env_ids, :] = pos
self.franka_dof_vel[env_ids, :] = torch.zeros_like(self.franka_dof_vel[env_ids])
self.franka_dof_targets[env_ids, :self.num_franka_dofs] = pos
multi_env_ids_int32 = self.global_indices[env_ids, :1].flatten()
self.gym.set_dof_position_target_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.franka_dof_targets),
gymtorch.unwrap_tensor(multi_env_ids_int32),
len(multi_env_ids_int32))
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(multi_env_ids_int32), len(multi_env_ids_int32))
if env_ids.device != self.progress_buf.device:
env_ids = env_ids.to(self.progress_buf.device)
# print("env_ids device: ", env_ids.device)
# print("progress_buf device: ", self.progress_buf.device)
# print env_ids index 0
print(env_ids[0])
self.progress_buf[env_ids] = 0
self.reset_buf[env_ids] = 0
Also attaching the complete error code for the same:
[Error] [carb.gym.plugin] Gym cuda error: an illegal memory access was encountered: ../../../source/plugins/carb/gym/impl/Gym/GymPhysX.cpp: 6340
[Error] [carb.gym.plugin] Gym cuda error: an illegal memory access was encountered: ../../../source/plugins/carb/gym/impl/Gym/GymPhysXCuda.cu: 1001
Error executing job with overrides: ['task=FrankaReach', 'headless=True', 'max_iterations=200']
/usr/lib/python3/dist-packages/apport/report.py:13: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses
import fnmatch, glob, traceback, errno, sys, atexit, imp, stat
Traceback (most recent call last):
File "train.py", line 220, in <module>
launch_rlg_hydra()
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/main.py", line 94, in decorated_main
_run_hydra(
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
_run_app(
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/_internal/utils.py", line 457, in _run_app
run_and_report(
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
raise ex
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
return func()
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
lambda: hydra.run(
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/_internal/hydra.py", line 132, in run
_ = ret.return_value
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/core/utils.py", line 260, in return_value
raise self._return_value
File "/home/kudos/.local/lib/python3.8/site-packages/hydra/core/utils.py", line 186, in run_job
ret.return_value = task_function(task_cfg)
File "train.py", line 211, in launch_rlg_hydra
runner.run({
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/torch_runner.py", line 133, in run
self.run_train(args)
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/torch_runner.py", line 113, in run_train
agent = self.algo_factory.create(self.algo_name, base_name='run', params=self.params)
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/common/object_factory.py", line 15, in create
return builder(**kwargs)
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/torch_runner.py", line 37, in <lambda>
self.algo_factory.register_builder('a2c_continuous', lambda **kwargs : a2c_continuous.A2CAgent(**kwargs))
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/algos_torch/a2c_continuous.py", line 15, in __init__
a2c_common.ContinuousA2CBase.__init__(self, base_name, params)
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/common/a2c_common.py", line 1143, in __init__
A2CBase.__init__(self, base_name, params)
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/common/a2c_common.py", line 129, in __init__
self.vec_env = vecenv.create_vec_env(self.env_name, self.num_actors, **self.env_config)
File "/home/kudos/.local/lib/python3.8/site-packages/rl_games/common/vecenv.py", line 224, in create_vec_env
return vecenv_config[vec_env_name](config_name, num_actors, **kwargs)
File "train.py", line 167, in <lambda>
vecenv.register('RLGPU', lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs))
File "/home/kudos/Documents/Honours/IsaacGymEnvs/isaacgymenvs/utils/rlgames_utils.py", line 246, in __init__
self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs)
File "train.py", line 148, in <lambda>
'env_creator': lambda **kwargs: create_isaacgym_env(**kwargs),
File "train.py", line 122, in create_isaacgym_env
envs = isaacgymenvs.make(
File "/home/kudos/Documents/Honours/IsaacGymEnvs/isaacgymenvs/__init__.py", line 55, in make
return create_rlgpu_env()
File "/home/kudos/Documents/Honours/IsaacGymEnvs/isaacgymenvs/utils/rlgames_utils.py", line 115, in create_rlgpu_env
env = isaacgym_task_map[task_name](
File "/home/kudos/Documents/Honours/IsaacGymEnvs/isaacgymenvs/tasks/franka_reach.py", line 100, in __init__
self.reset_idx(torch.arange(self.num_envs))
File "/home/kudos/Documents/Honours/IsaacGymEnvs/isaacgymenvs/tasks/franka_reach.py", line 337, in reset_idx
env_ids = env_ids.to(self.progress_buf.device)
RuntimeError: CUDA error: an illegal memory access was encountered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
Thanks in advance!
I have tried to print out the env_ids variable separately but it causes the same error.
Interestingly, when I try to print the shape of the variable using: env_ids.shape
, it prints that out without any errors.
The error code is not very helpful as there are multiple reasons for it and there isn’t much documentation for the same.