Hello,
I have programmed something with the NSight Eclipse Edition and if i compile and run this in RELEASE mode, it is very fast, as expected.
Now I want to write my own CMakeLists.txt file to compile all the files and find all setups for the compilation automatically. The CMakeLists.txt file works (in 64bit mode), but the performance of execution is extremely (really extreme) slow compared to Eclipse compilation.
I use:
cmake -DCMAKE_BUILD_TYPE=Release ..
make
to build everything.
Now I wanted to try setting:
set(CUDA_64_BIT_DEVICE_CODE OFF)
add_definitions(-m32)
set (CMAKE_C_FLAGS -m32)
set (CMAKE_CXX_FLAGS -m32)
(CUDA_64_BIT_DEVICE_CODE to OFF) in the CMakeLists.txt file (before calling find(CUDA)), but it gives me an linking error, but I do not know how to fix this problem:
<b>Linking CXX executable my_project</b>
/usr/local/cuda/lib64/libcudart.so: could not read symbols: File in wrong format
collect2: error: ld returned 1 exit status
I think the CMake own findCUDA.cmake mixes 32bit and 64bit anywhere if I set CUDA_64_BIT_DEVICE_CODE to OFF and then crashes.
Please, can anyone help me? I’m trying to fix the performance problem since two days and have not found any useful help on the web.
Thanks for all answers!
GeForce 750Ti
[Debian stable 64-bit, CUDA 5.5, CMAKE 2.8.12, CUSP 1.7 and Thrust 0.4]
My actual CMakeLists file:
cmake_minimum_required(VERSION 2.8.9)
project(my_project)
set(CUDA_64_BIT_DEVICE_CODE OFF)
add_definitions(-m32)
set (CMAKE_C_FLAGS -m32)
set (CMAKE_CXX_FLAGS -m32)
find_package(OpenCV REQUIRED)
find_package(CUDA REQUIRED)
###############################################################################
# Find THRUST
# (from here: https://groups.google.com/forum/#!topic/thrust-users/UX7Gm4piBiU)
find_path( THRUST_INCLUDE_DIR
HINTS ${CUDA_INCLUDE_DIRS} /usr/include/cuda /usr/local/include
NAMES thrust/version.h
DOC "Thrust headers"
)
if( THRUST_INCLUDE_DIR )
list( REMOVE_DUPLICATES THRUST_INCLUDE_DIR )
include_directories( ${THRUST_INCLUDE_DIR} )
endif( THRUST_INCLUDE_DIR )
file( STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h
version
REGEX "#define THRUST_VERSION[ \t]+([0-9x]+)"
)
string( REGEX REPLACE "#define THRUST_VERSION[ \t]+" "" version $
{version} )
string( REGEX MATCH "^[0-9]" major ${version} )
string( REGEX REPLACE "^${major}00" "" version ${version} )
string( REGEX MATCH "^[0-9]" minor ${version} )
string( REGEX REPLACE "^${minor}0" "" version ${version} )
set( THRUST_VERSION "${major}.${minor}.${version}")
# Check for required components
set( THRUST_FOUND TRUE )
include( FindPackageHandleStandardArgs )
find_package_handle_standard_args( Thrust
REQUIRED_VARS
THRUST_INCLUDE_DIR
VERSION_VAR
THRUST_VERSION
)
######################################################################################
list(APPEND CUDA_NVCC_FLAGS -O3) #-gencode arch=compute_20,code=sm_20)
file (GLOB HEADER_LIST "./*.h")
file (GLOB SRC_LIST "./*.cpp")
file (GLOB CU_LIST "./*.cu")
include_directories(${PROJECT_SOURCE_DIR})
cuda_add_executable( ${PROJECT_NAME} ${THRUST_INCLUDE_DIR} ${HEADER_LIST} ${SRC_LIST} ${CU_LIST} )
target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${OpenCV_LIBS} )