Hello, I’ve been spending the day trouble shooting a way to add CUDA code to a TX2 host app, programmed in C++ code and compiled with Cmakelist/catkin_make.
The compile process is completing without error, but the program segfaults upon execution.
I was wondering if anyone on this forum could specify whether my usage of NVCC is correct, or whether there is a bug in my setup.
Toy example:
cuda_kernel.cu:
#include "cuda_kernel.hpp"
__global__ void test_cuda(int *a, int b)
{
a += 1;
}
cuda_kernel.hpp:
#include <cuda.h>
#include <cuda_runtime.h>
__global__ void test_cuda(int *a, int b);
Calling function in master cpp (app_source.cpp):
int one = 1;
test_cuda(&one, 1);
CmakeList:
cmake_minimum_required(VERSION 2.8.3)
cmake_policy(SET CMP0012 NEW)
project(test)
find_package(CUDA REQUIRED)
set(CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3 -lineinfo
-gencode=arch=compute_62,code=sm_62)
set(CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
#set(CUDA_SEPERABLE_COMPILATION ON)
#set(BUILD_SHARED_LIBS ON)
#set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
set (LIB_TYPE STATIC)
#set (LIB_TYPE SHARED)
CUDA_ADD_LIBRARY(cuda_functions ${LIB_TYPE} src/cuda_kernel.cu)
include_directories(... ${CUDA_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
link_directories(... ${CUDA_CUFFT_LIBRARIES} ${CUDA_LIBRARIES})
set(CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS} -march=native -lpthread -O3 -fopenmp")
add_executable(appname app_source.cpp)
add_dependencies(appname cuda_functions)
target_link_libraries(appname
${catkin_LIBRARIES}
${CUDA_LIBRARIES}
${CUDA_CUFFT_LIBRARIES}
cuda_functions)
I have tried compiling the cuda code as both static and shared libraries, but both appear to segfault whenever I call the test function. Any suggestions on how the cuda compiles can be better linked?