Error: no kernel image is available for execution on the device

amey1695 · October 16, 2019, 1:39am

Hello all
I am trying to use a library written in CUDA in my own C++ program. As I call the functions of this library, I get the following errors -

Error: no kernel image is available for execution on the device
Error: driver shutting down

After searching a lot on the internet, I realized this error is due to unmatched compute capability settings in the library’s CMakeLists.txt. Hence I modified the library’s CMakeLists.txt as shown below. But nothing seemed to change after building with the changes. My code still gives me the same error. I have also attached the result of running - nvidia-smi to give more information about my system. Is the driver being 384.13 a problem for using cuda 9.0? Should I update my driver? Please let me know the steps if so.

System specs -
intel i5 7th gen
8GB RAM
2GB GeForce 940MX GPU
installed CUDA 9.0

CMakeLists.txt

cmake_minimum_required(VERSION 2.8.3)
project(ndt_gpu)


find_package(PCL REQUIRED)
find_package(CUDA)
find_package(autoware_build_flags PATHS /home/amey/NDT_GPU_Autoware/autoware-623c51c8dc29f2d9d66ed6cbd39bcfb1d45183b7-ros-src-computing-perception-localization-lib-ndt_gpu/ros/src/computing/perception/localization/lib/ndt_gpu/autoware_build_flags/cmake/catkin_generated/installspace NO_DEFAULT_PATH)

find_package(Eigen3 QUIET)

if (NOT EIGEN3_FOUND)
    # Fallback to cmake_modules
    find_package(cmake_modules REQUIRED)
    find_package(Eigen REQUIRED)
    set(EIGEN3_INCLUDE_DIRS ${EIGEN_INCLUDE_DIRS})
    set(EIGEN3_LIBRARIES ${EIGEN_LIBRARIES})  # Not strictly necessary as Eigen is head only
    # Possibly map additional variables to the EIGEN3_ prefix.
else ()
    set(EIGEN3_INCLUDE_DIRS ${EIGEN3_INCLUDE_DIR})
endif ()

AW_CHECK_CUDA()

if (USE_CUDA)
    
    set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "")
    message("CUDA_ARCH is: " ${CUDA_ARCH})

    if(CMAKE_CROSSCOMPILING)
        if(NOT CUDA_ARCH)
                message(FATAL_ERROR "Please define the CUDA_ARCH CMake variable")
        endif()
    else()
        if (NOT DEFINED CUDA_CAPABILITY_VERSION_CHECKER)
            set(CUDA_CAPABILITY_VERSION_CHECKER
                    "${CATKIN_DEVEL_PREFIX}/lib/capability_version_checker")
        endif ()

        execute_process(COMMAND ${CUDA_CAPABILITY_VERSION_CHECKER}
                OUTPUT_VARIABLE CUDA_CAPABILITY_VERSION
                OUTPUT_STRIP_TRAILING_WHITESPACE)

        if ("${CUDA_CAPABILITY_VERSION}" MATCHES "^[1-9][0-9]+$")
            set(CUDA_ARCH "sm_${CUDA_CAPABILITY_VERSION}")
        else ()
            set(CUDA_ARCH "sm_52")
	    set(CUDA_CAPABILITY_VERSION "52")
        endif ()
    endif()
    
    #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=${CUDA_ARCH};-std=c++11;--ptxas-options=-v)
    set(
    CUDA_NVCC_FLAGS
    ${CUDA_NVCC_FLAGS};
    -O3
    -gencode arch=compute_30,code=sm_30
    -gencode arch=compute_35,code=sm_35
    -gencode arch=compute_50,code=[sm_50,compute_50]
    -gencode arch=compute_52,code=[sm_52,compute_52]
    #-gencode arch=compute_61,code=sm_61
    #-gencode arch=compute_62,code=sm_62
    -std=c++11
    --ptxas-options=-v
  )
    message("ARCH is: " ${arch})
    #string(APPEND CUDA_NVCC_FLAGS " -gencode arch=compute_52,code=sm_52")
	
    link_directories(/usr/local/cuda-9.0/lib64)

    set(SUBSYS_DESC "Point cloud ndt gpu library")
    message("CUDA ARCHITECTURE: " ${CUDA_ARCH})
    message("CUDA CAPABILITY: " ${CUDA_CAPABILITY_VERSION})
    message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
    catkin_package(
            DEPENDS PCL                                #Non-catkin CMake projects
            INCLUDE_DIRS include                        #The exported include paths
            LIBRARIES ndt_gpu                           #The exported libraries from the project
    )

    include_directories(
            ${PCL_INCLUDE_DIRS}
            ${catkin_INCLUDE_DIRS}
            ${CUDA_INCLUDE_DIRS}
            include
            ${EIGEN3_INCLUDE_DIRS}
    )

    set(srcs
            src/MatrixDevice.cu
            src/MatrixHost.cu
            src/NormalDistributionsTransform.cu
            src/Registration.cu
            src/VoxelGrid.cu
            src/SymmetricEigenSolver.cu
            )

    set(incs
            include/ndt_gpu/common.h
            include/ndt_gpu/debug.h
            include/ndt_gpu/Matrix.h
            include/ndt_gpu/MatrixDevice.h
            include/ndt_gpu/MatrixHost.h
            include/ndt_gpu/NormalDistributionsTransform.h
            include/ndt_gpu/Registration.h
            include/ndt_gpu/SymmetricEigenSolver.h
            include/ndt_gpu/VoxelGrid.h
            )

    cuda_add_library(ndt_gpu ${srcs} ${incs})

    target_link_libraries(ndt_gpu
	    cuda
            ${CUDA_LIBRARIES}
            ${CUDA_CUBLAS_LIBRARIES}
            ${CUDA_curand_LIBRARY}
            ${PCL_LIBRARIES}
            )

    install(DIRECTORY include/${PROJECT_NAME}/
            DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
            FILES_MATCHING PATTERN "*.h"
            )


    install(TARGETS ndt_gpu
            ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
            LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
            RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
            )
	
	
else ()
    message("ndt_gpu will not be built, CUDA was not found.")
endif ()

Result of nvidia-smi

Tue Oct 15 21:20:16 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce 940MX       Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   40C    P8    N/A /  N/A |    296MiB /  2002MiB |     19%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|    0      1635      G   /usr/lib/xorg/Xorg                           182MiB |
|    0      4444      G   compiz                                        43MiB |
|    0      5453      G   ...quest-channel-token=4287617565245160848    13MiB |
|    0      7128      G   ...quest-channel-token=5767221912348951501    52MiB |
|    0      9002      G   /usr/lib/firefox/firefox                       1MiB |
|    0     13185      G   /usr/lib/firefox/firefox                       1MiB |
+-----------------------------------------------------------------------------+

Thanks

Robert_Crovella · October 16, 2019, 1:52am

study the verbose output from the cmake build process to find out what architectures are being passed to nvcc at compile time. There is nothing wrong with your driver for use with CUDA 9.0

Topic		Replies	Views
CUDA error: no kernel image is available for execution on the device Error from operator: output CUDA Setup and Installation	3	18207	January 25, 2019
RuntimeError: CUDA error: no kernel image is available for execution on the device Linux	29	79982	February 22, 2021
Need Help to get CUDA running with c++ CUDA Setup and Installation	1	523	June 25, 2019
OpenCV Cuda: No Kernel Image is Available Jetson Xavier NX opencv , cuda	8	5518	October 18, 2021
RuntimeError: CUDA error: no kernel image is available for execution on the device TensorRT cuda , cudnn	2	648	October 26, 2023
CUDA error: no kernel image is available for execution on the device CUDA Setup and Installation	0	670	September 17, 2021
addKernel launch failed: no kernel image is available for execution on the device CUDA Setup and Installation	3	535	March 7, 2024
CUDA 11.2 w/ GTX 770? CUDA Setup and Installation	3	1938	December 25, 2020
How should I use correctly the sm_XX and compute_XX? CUDA Programming and Performance	3	5028	July 14, 2022
Error: CUDA driver version is insufficient for CUDA runtime version CUDA Setup and Installation	11	16836	July 19, 2018

Error: no kernel image is available for execution on the device

Related topics