Getting error when trying to use pytorch 1.12.1 with jetpack version 5.1.1

When I am using Cuda 11.4 with the PyTorch 1.12 model on jetpack 5.1.1, I am getting error …
torch::Device device_ = torch::kCUDA
if (device_.is_cuda()) {
c10::cuda::getCurrentCUDAStream().synchronize();
}
This runs fine but,
const auto inputs =
torch::from_blob(working_img.data,
{working_img.rows, working_img.cols, 3}, torch::kUInt8)
.to(device_, torch::kFloat)
.permute({2, 0, 1})
.contiguous();
throws error
[ERROR] [1696585409.990943029]: Segmentation error: CUDA error: no kernel image is available for execution on the device
Exception raised from launch_legacy_kernel at /pytorch/aten/src/ATen/native/cuda/CUDALoops.cuh:155 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits, std::allocator >) + 0xa0 (0xffff866e1e28 in /usr/local/lib/libc10.so)
frame #1: void at::native::gpu_kernel_impl<__nv_hdl_wrapper_t<false, true, __nv_dl_tag<void ()(at::TensorIteratorBase&), &at::native::(anonymous namespace)::direct_copy_kernel_cuda, 7u>, float (float)> >(at::TensorIteratorBase&, __nv_hdl_wrapper_t<false, true, __nv_dl_tag<void ()(at::TensorIteratorBase&), &at::native::(anonymous namespace)::direct_copy_kernel_cuda, 7u>, float (float)> const&) + 0xce8 (0xffff87bf0160 in /usr/local/lib/libtorch_cuda.so)
frame #2: void at::native::gpu_kernel<__nv_hdl_wrapper_t<false, true, _nv_dl_tag<void ()(at::TensorIteratorBase&), &at::native::(anonymous namespace)::direct_copy_kernel_cuda, 7u>, float (float)> >(at::TensorIteratorBase&, __nv_hdl_wrapper_t<false, true, __nv_dl_tag<void ()(at::TensorIteratorBase&), &at::native::(anonymous namespace)::direct_copy_kernel_cuda, 7u>, float (float)> const&) + 0x350 (0xffff87bf0840 in /usr/local/lib/libtorch_cuda.so)
frame #3: at::native::(anonymous namespace)::direct_copy_kernel_cuda(at::TensorIteratorBase&) + 0x318 (0xffff87bccdf8 in /usr/local/lib/libtorch_cuda.so)
frame #4: at::native::copy_device_to_device(at::TensorIterator&, bool) + 0x82c (0xffff87bcd8a4 in /usr/local/lib/libtorch_cuda.so)
frame #5: + 0x14a2b08 (0xffff87bcfb08 in /usr/local/lib/libtorch_cuda.so)
frame #6: + 0xc04af0 (0xffff8e677af0 in /usr/local/lib/libtorch_cpu.so)
frame #7: at::native::copy
(at::Tensor&, at::Tensor const&, bool) + 0x74 (0xffff8e678464 in /usr/local/lib/libtorch_cpu.so)
frame #8: at::ops::copy::call(at::Tensor&, at::Tensor const&, bool) + 0x154 (0xffff8efd250c in /usr/local/lib/libtorch_cpu.so)
frame #9: at::native::clone(at::Tensor const&, c10::optionalc10::MemoryFormat) + 0x178 (0xffff8e90afd0 in /usr/local/lib/libtorch_cpu.so)
frame #10: + 0x1874918 (0xffff8f2e7918 in /usr/local/lib/libtorch_cpu.so)
frame #11: at::_ops::clone::redispatch(c10::DispatchKeySet, at::Tensor const&, c10::optionalc10::MemoryFormat) + 0x9c (0xffff8ed1e4cc in /usr/local/lib/libtorch_cpu.so)
frame #12: + 0x2787574 (0xffff901fa574 in /usr/local/lib/libtorch_cpu.so)
frame #13: + 0x2787b14 (0xffff901fab14 in /usr/local/lib/libtorch_cpu.so)
frame #14: at::_ops::clone::call(at::Tensor const&, c10::optionalc10::MemoryFormat) + 0x148 (0xffff8ed8b578 in /usr/local/lib/libtorch_cpu.so)
frame #15: at::native::contiguous(at::Tensor const&, c10::MemoryFormat) + 0x80 (0xffff8e911158 in /usr/local/lib/libtorch_cpu.so)
frame #16: + 0x19a2e48 (0xffff8f415e48 in /usr/local/lib/libtorch_cpu.so)
frame #17: at::_ops::contiguous::call(at::Tensor const&, c10::MemoryFormat) + 0x148 (0xffff8f0b68d8 in /usr/local/lib/libtorch_cpu.so)
frame #18: at::TensorBase::__dispatch_contiguous(c10::MemoryFormat) const + 0x3c (0xffff8e4ece7c in /usr/local/lib/libtorch_cpu.so)
frame #19: at::TensorBase::contiguous(c10::MemoryFormat) const + 0x64 (0xffff935f15c0 in /media/nvidia/external_drive/catkin_ws/devel/lib/libfox_rt_path_segmentation.so)
frame #20: at::Tensor::contiguous(c10::MemoryFormat) const + 0x40 (0xffff935f199c in /media/nvidia/external_drive/catkin_ws/devel/lib/libfox_rt_path_segmentation.so)
frame #21: SegmentationDriver::segmentImage(cv::Mat const&, std::vector<ObjectSegmentationProperties, std::allocator > const&) + 0x268 (0xffff935ebc10 in /media/nvidia/external_drive/catkin_ws/devel/lib/libfox_rt_path_segmentation.so)
frame #22: + 0x2f228 (0xaaaab5ddf228 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #23: + 0x459b4 (0xaaaab5df59b4 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #24: + 0x4461c (0xaaaab5df461c in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #25: + 0x433e4 (0xaaaab5df33e4 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #26: + 0x41320 (0xaaaab5df1320 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #27: + 0x45df0 (0xaaaab5df5df0 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #28: + 0x44948 (0xaaaab5df4948 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #29: + 0x494e4 (0xaaaab5df94e4 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #30: + 0x480e4 (0xaaaab5df80e4 in /media/nvidia/external_drive/catkin_ws/devel/lib/fox_rt_path_segmentation/fox_rt_path_segmentation_node)
frame #31: ros::SubscriptionQueue::call() + 0x9a4 (0xffff934f6e6c in /opt/ros/noetic/lib/libroscpp.so)
frame #32: ros::CallbackQueue::callOneCB(ros::CallbackQueue::TLS*) + 0x274 (0xffff934a2a6c in /opt/ros/noetic/lib/libroscpp.so)
frame #33: ros::CallbackQueue::callOne(ros::WallDuration) + 0x1d8 (0xffff934a3780 in /opt/ros/noetic/lib/libroscpp.so)
frame #34: ros::AsyncSpinnerImpl::threadFunc() + 0x64 (0xffff934f98bc in /opt/ros/noetic/lib/libroscpp.so)
frame #35: + 0x10624 (0xffff85e06624 in /lib/aarch64-linux-gnu/libboost_thread.so.1.71.0)
frame #36: + 0x7624 (0xffff93342624 in /lib/aarch64-linux-gnu/libpthread.so.0)
frame #37: + 0xd149c (0xffff863fd49c in /lib/aarch64-linux-gnu/libc.so.6)

@DaneLLL I also want to know ,the folder name /usr/local/cuda-11.4/lib64/stubs has been changed to /usr/local/cuda-11.4/lib64/stubs_ in jetpack 5.1 is this causing the error?

Hi,

CUDA error: no kernel image is available for execution on the device

This indicates that your PyTorch doesn’t build with Orin GPU architecture which is sm_87.

Do you build it from the source?
If yes, please make sure you have exported the correct GPU architecture (export TORCH_CUDA_ARCH_LIST="8.7").

Or you can install our prebuilt with the instructions shared in the below document:

Thanks.

1 Like

yes, I built it from the source. Also (export TORCH_CUDA_ARCH_LIST="8.7") solved it thanks

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.