Error: no kernel image is available for execution on the device

Hello all
I am trying to use a library written in CUDA in my own C++ program. As I call the functions of this library, I get the following errors -

Error: no kernel image is available for execution on the device
Error: driver shutting down

After searching a lot on the internet, I realized this error is due to unmatched compute capability settings in the library’s CMakeLists.txt. Hence I modified the library’s CMakeLists.txt as shown below. But nothing seemed to change after building with the changes. My code still gives me the same error. I have also attached the result of running - nvidia-smi to give more information about my system. Is the driver being 384.13 a problem for using cuda 9.0? Should I update my driver? Please let me know the steps if so.

System specs -
intel i5 7th gen
8GB RAM
2GB GeForce 940MX GPU
installed CUDA 9.0

CMakeLists.txt

cmake_minimum_required(VERSION 2.8.3)
project(ndt_gpu)


find_package(PCL REQUIRED)
find_package(CUDA)
find_package(autoware_build_flags PATHS /home/amey/NDT_GPU_Autoware/autoware-623c51c8dc29f2d9d66ed6cbd39bcfb1d45183b7-ros-src-computing-perception-localization-lib-ndt_gpu/ros/src/computing/perception/localization/lib/ndt_gpu/autoware_build_flags/cmake/catkin_generated/installspace NO_DEFAULT_PATH)

find_package(Eigen3 QUIET)

if (NOT EIGEN3_FOUND)
    # Fallback to cmake_modules
    find_package(cmake_modules REQUIRED)
    find_package(Eigen REQUIRED)
    set(EIGEN3_INCLUDE_DIRS ${EIGEN_INCLUDE_DIRS})
    set(EIGEN3_LIBRARIES ${EIGEN_LIBRARIES})  # Not strictly necessary as Eigen is head only
    # Possibly map additional variables to the EIGEN3_ prefix.
else ()
    set(EIGEN3_INCLUDE_DIRS ${EIGEN3_INCLUDE_DIR})
endif ()

AW_CHECK_CUDA()

if (USE_CUDA)
    
    set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "")
    message("CUDA_ARCH is: " ${CUDA_ARCH})

    if(CMAKE_CROSSCOMPILING)
        if(NOT CUDA_ARCH)
                message(FATAL_ERROR "Please define the CUDA_ARCH CMake variable")
        endif()
    else()
        if (NOT DEFINED CUDA_CAPABILITY_VERSION_CHECKER)
            set(CUDA_CAPABILITY_VERSION_CHECKER
                    "${CATKIN_DEVEL_PREFIX}/lib/capability_version_checker")
        endif ()

        execute_process(COMMAND ${CUDA_CAPABILITY_VERSION_CHECKER}
                OUTPUT_VARIABLE CUDA_CAPABILITY_VERSION
                OUTPUT_STRIP_TRAILING_WHITESPACE)

        if ("${CUDA_CAPABILITY_VERSION}" MATCHES "^[1-9][0-9]+$")
            set(CUDA_ARCH "sm_${CUDA_CAPABILITY_VERSION}")
        else ()
            set(CUDA_ARCH "sm_52")
	    set(CUDA_CAPABILITY_VERSION "52")
        endif ()
    endif()
    
    #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=${CUDA_ARCH};-std=c++11;--ptxas-options=-v)
    set(
    CUDA_NVCC_FLAGS
    ${CUDA_NVCC_FLAGS};
    -O3
    -gencode arch=compute_30,code=sm_30
    -gencode arch=compute_35,code=sm_35
    -gencode arch=compute_50,code=[sm_50,compute_50]
    -gencode arch=compute_52,code=[sm_52,compute_52]
    #-gencode arch=compute_61,code=sm_61
    #-gencode arch=compute_62,code=sm_62
    -std=c++11
    --ptxas-options=-v
  )
    message("ARCH is: " ${arch})
    #string(APPEND CUDA_NVCC_FLAGS " -gencode arch=compute_52,code=sm_52")
	
    link_directories(/usr/local/cuda-9.0/lib64)

    set(SUBSYS_DESC "Point cloud ndt gpu library")
    message("CUDA ARCHITECTURE: " ${CUDA_ARCH})
    message("CUDA CAPABILITY: " ${CUDA_CAPABILITY_VERSION})
    message("CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
    catkin_package(
            DEPENDS PCL                                #Non-catkin CMake projects
            INCLUDE_DIRS include                        #The exported include paths
            LIBRARIES ndt_gpu                           #The exported libraries from the project
    )

    include_directories(
            ${PCL_INCLUDE_DIRS}
            ${catkin_INCLUDE_DIRS}
            ${CUDA_INCLUDE_DIRS}
            include
            ${EIGEN3_INCLUDE_DIRS}
    )

    set(srcs
            src/MatrixDevice.cu
            src/MatrixHost.cu
            src/NormalDistributionsTransform.cu
            src/Registration.cu
            src/VoxelGrid.cu
            src/SymmetricEigenSolver.cu
            )

    set(incs
            include/ndt_gpu/common.h
            include/ndt_gpu/debug.h
            include/ndt_gpu/Matrix.h
            include/ndt_gpu/MatrixDevice.h
            include/ndt_gpu/MatrixHost.h
            include/ndt_gpu/NormalDistributionsTransform.h
            include/ndt_gpu/Registration.h
            include/ndt_gpu/SymmetricEigenSolver.h
            include/ndt_gpu/VoxelGrid.h
            )

    cuda_add_library(ndt_gpu ${srcs} ${incs})

    target_link_libraries(ndt_gpu
	    cuda
            ${CUDA_LIBRARIES}
            ${CUDA_CUBLAS_LIBRARIES}
            ${CUDA_curand_LIBRARY}
            ${PCL_LIBRARIES}
            )

    install(DIRECTORY include/${PROJECT_NAME}/
            DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
            FILES_MATCHING PATTERN "*.h"
            )


    install(TARGETS ndt_gpu
            ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
            LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
            RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
            )
	
	
else ()
    message("ndt_gpu will not be built, CUDA was not found.")
endif ()

Result of nvidia-smi

Tue Oct 15 21:20:16 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce 940MX       Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   40C    P8    N/A /  N/A |    296MiB /  2002MiB |     19%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|    0      1635      G   /usr/lib/xorg/Xorg                           182MiB |
|    0      4444      G   compiz                                        43MiB |
|    0      5453      G   ...quest-channel-token=4287617565245160848    13MiB |
|    0      7128      G   ...quest-channel-token=5767221912348951501    52MiB |
|    0      9002      G   /usr/lib/firefox/firefox                       1MiB |
|    0     13185      G   /usr/lib/firefox/firefox                       1MiB |
+-----------------------------------------------------------------------------+

Thanks

study the verbose output from the cmake build process to find out what architectures are being passed to nvcc at compile time. There is nothing wrong with your driver for use with CUDA 9.0