Hello, i encountered weird issue, .cu code compiled with C++/Qt code has worse accuracy than the same code compiled in terminal with nvcc compiler. The margin of error is around 2%, but in my calculation it is unacceptable. Im running this code on ubuntu 18.04, GTX 1050 Ti mobile with installed cuda toolkit 9.2. I assume it must be something with .pro file configuration. Please let me know if I made any serious mistake.
.pro file:
# CUDA settings <-- may change depending on your system
CUDA_SOURCES += $$OTHER_FILES
CUDA_SDK = "/usr/local/cuda-9.2/" # Path to cuda SDK install
CUDA_DIR = "/usr/local/cuda-9.2/" # Path to cuda toolkit install
# DO NOT EDIT BEYOND THIS UNLESS YOU KNOW WHAT YOU ARE DOING....
SYSTEM_NAME = x64 # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 64 # '32' or '64', depending on your system
CUDA_ARCH = sm_60 # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
NVCC_OPTIONS = --use_fast_math -O2
# include paths
INCLUDEPATH += $$CUDA_DIR/include
CUDA_OBJECTS_DIR = ./
# Add the necessary libraries
CUDA_LIBS = -lcuda -lcudart
# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
#LIBS += $$join(CUDA_LIBS,'.so ', '', '.so')
LIBS += -L$$CUDA_DIR/lib64 -lcuda -lcudart
# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
# Debug mode
cuda_d.input = CUDA_SOURCES
cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda_d.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
# Release mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}