Linking Error Help

Hello,

I’m trying to compile some CUDA code (NVIDIA driver 440; CUDA-10.2; gcc 7.5.0; Ubuntu 18.04 LTS), and I’m getting a linking error during one part of the compile part. It’s a linking error and I’m a little unsure as to how to fix it.

Here’s the error:

nvcc -lstdc++ -arch=sm_30 -c main_cu.cu
bison -ofasta_y.cc -pfasta -d fasta.y
bash -c 'if [[ -f fasta_y.h ]]; then mv fasta_y.h fasta_y.hh; fi'
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -c fasta_y.cc
bison -opairs_y.cc -ppairs -d pairs.y
bash -c 'if [[ -f pairs_y.h ]]; then mv pairs_y.h pairs_y.hh; fi'
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -c pairs_y.cc
bison -osm_y.cc -psm -d sm.y
bash -c 'if [[ -f sm_y.h ]]; then mv sm_y.h sm_y.hh; fi'
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -c sm_y.cc
flex -ofasta_l.cc -Pfasta fasta.l
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -c fasta_l.cc
flex -osm_l.cc -Psm sm.l
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -c sm_l.cc
flex -opairs_l.cc -Ppairs pairs.l
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -c pairs_l.cc
g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11 -o grasshopper-build results_option.o \
pps_kmer_value_option.o file_option.o pairs_option.o \
pps_results_file_option.o pps_partial_size_option.o \
test_substitution_matrix_option.o paired_end_option.o \
thread_manager_runnable.o nws_single_gpu_runnable.o \
nws_algorithm_option.o gpus_value_option.o exception.o \
fasta_option.o mutex.o pps_partial_size_value_option.o \
perm_value_option.o thread.o pairs.o pps_results_option.o \
substitution_matrix_file_option.o results.o \
pps_win_size_option.o fasta_file_option.o gpus_option.o \
pps_errors_option.o promising_pairs_selection.o \
pps_win_size_value_option.o pps_errors_value_option.o \
pps_shortest_lexicographical_index_value_option.o \
results_file_option.o pps_partial_count_value_option.o \
pps_score_cutoff_value_option.o root_option.o \
pps_partial_count_option.o all_with_all_option.o \
substitution_matrix.o substitution_matrix_option.o \
pps_score_cutoff_option.o gap_penalty_option.o sequences.o \
hi_res_timer.o help_option.o thread_manager.o promising_pairs.o \
frequency_characteristics.o complete_option.o \
pps_kmer_option.o pairs_per_gpu_value_option.o \
nws_single_gpu_runnable_init.o algorithm_option.o \
paired_end_value_option.o \
pps_shortest_lexicographical_index_option.o gold_option.o \
nws_single_gpu_runnable_finalization.o \
gap_penalty_value_option.o option.o option_manager.o \
pairs_per_gpu_option.o main.o pairs_file_option.o perm_option.o \
verify_results_option.o sem.o main_cu.o fasta_y.o pairs_y.o \
sm_y.o fasta_l.o sm_l.o pairs_l.o
nws_algorithm_option.o: In function `align::NWSAlgorithmOption::deviceQuery()':
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:389: undefined reference to `cudaGetDeviceCount'
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:431: undefined reference to `cudaSetDevice'
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:434: undefined reference to `cudaMalloc'
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:435: undefined reference to `cudaMemcpy'
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:438: undefined reference to `cudaMemcpy'
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:439: undefined reference to `cudaFree'
/home/cbfgws3/grasshopper-assembler-code/grasshopper-build/nws_algorithm_option.cpp:441: undefined reference to `cudaGetDeviceProperties'

Here’s (I think) the relevant part of the MAKEFILE:

CUDA_LIB=-L/usr/local/cuda-10.2/lib64
CUDA_INC=-I/usr/local/cuda-10.2/include
EN=grasshopper-build
CC=g++
NVCC=nvcc
CC_FL=-lstdc++ $(CUDA_INC) $(CUDA_LIB) -lcudart -fopenmp -g -O2 -std=c++11
NVCC_FL=-lstdc++ -arch=sm_30

nws_algorithm_option.o: nws_algorithm_option.cpp \
	constants.h nws_algorithm_option.h option.h results.h \
	cuda_declarations.h pairs.h sequences.h sequence.h \
	substitution_matrix.h hi_res_timer.h thread_manager.h thread.h \
	exception.h sem.h mutex.h nws_single_gpu_runnable_init.h \
	thread_manager_runnable.h dev_pointers.h \
	nws_single_gpu_runnable.h dev_params.h \
	nws_single_gpu_runnable_finalization.h 
	$(CC) $(CC_FL) -c nws_algorithm_option.cpp

So, if I am intrepreting the code correctly, the compiler line should work out to be:

g++ -lstdc++ -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart -fopenmp -g -O2 -std=c++11

And that means that the linkage order is incorrect. But I don’t quite understand how to troubleshoot the ordering.

Is there a tool to check the order, or a guideline to structure the order of the linking?

TIA

I was able to reproduce your error with a simple Hello World. And rearranging linker call fixed it. Hope this helps.

g++ -lstdc++ -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudart -lcuda -fopenmp -g -O2 -std=c++11 helloWorld.cpp -o hello
/tmp/cc3MFpG1.o: In function `main':
/home/belt/helloWorld.cpp:119: undefined reference to `cudaSetDevice'
collect2: error: ld returned 1 exit status
g++ -o hello helloWorld.cpp -lstdc++ -fopenmp -g -O2 -std=c++11 -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudart -lcuda

Here’s a link for reference.
https://stackoverflow.com/questions/9421108/how-can-i-compile-cuda-code-then-link-it-to-a-c-project

Thanks; I’ll try this fix and let you know!

I think that fixed it!

I re-ordered the linking statements, as you suggested, and ran the g++ command (verbose mode) and did NOT get an error. I then re-ran the MAKE command and got a “nothing to do” response (see below).

Am I correct in assuming that this has resolved the linking issue?

TIA

$ g++ -v -o grasshopper-build results_option.o pps_kmer_value_option.o file_option.o pairs_option.o pps_results_file_option.o pps_partial_size_option.o test_substitution_matrix_option.o paired_end_option.o thread_manager_runnable.o nws_single_gpu_runnable.o nws_algorithm_option.o gpus_value_option.o exception.o fasta_option.o mutex.o pps_partial_size_value_option.o perm_value_option.o thread.o pairs.o pps_results_option.o substitution_matrix_file_option.o results.o pps_win_size_option.o fasta_file_option.o gpus_option.o pps_errors_option.o promising_pairs_selection.o pps_win_size_value_option.o pps_errors_value_option.o pps_shortest_lexicographical_index_value_option.o results_file_option.o pps_partial_count_value_option.o pps_score_cutoff_value_option.o root_option.o pps_partial_count_option.o all_with_all_option.o substitution_matrix.o substitution_matrix_option.o pps_score_cutoff_option.o gap_penalty_option.o sequences.o hi_res_timer.o help_option.o thread_manager.o promising_pairs.o frequency_characteristics.o complete_option.o pps_kmer_option.o pairs_per_gpu_value_option.o nws_single_gpu_runnable_init.o algorithm_option.o paired_end_value_option.o pps_shortest_lexicographical_index_option.o gold_option.o nws_single_gpu_runnable_finalization.o gap_penalty_value_option.o option.o option_manager.o pairs_per_gpu_option.o main.o pairs_file_option.o perm_option.o verify_results_option.o sem.o main_cu.o fasta_y.o pairs_y.o sm_y.o fasta_l.o sm_l.o pairs_l.o -lstdc++ -fopenmp -g -O2 -std=c++11 -I/usr/local/cuda-10.2/include -L/usr/local/cuda-10.2/lib64 -lcudart
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/7/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 7.5.0-3ubuntu1~18.04' --with-bugurl=file:///usr/share/doc/gcc-7/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++ --prefix=/usr --with-gcc-major-version-only --program-suffix=-7 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) 
COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/
LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/7/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../../x86_64-linux-gnu/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../../x86_64-linux-gnu/lib/:/usr/lib/gcc/x86_64-linux-gnu/7/../../../:/lib/:/usr/lib/
Reading specs from /usr/lib/gcc/x86_64-linux-gnu/7/libgomp.spec
COLLECT_GCC_OPTIONS='-v' '-o' 'grasshopper-build' '-fopenmp' '-g' '-O2' '-std=c++11' '-I' '/usr/local/cuda-10.2/include' '-L/usr/local/cuda-10.2/lib64' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-pthread'
 /usr/lib/gcc/x86_64-linux-gnu/7/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/7/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/7/lto-wrapper -plugin-opt=-fresolution=/tmp/ccaPeHIW.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lpthread -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o grasshopper-build /usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/7/crtbeginS.o /usr/lib/gcc/x86_64-linux-gnu/7/crtoffloadbegin.o -L/usr/local/cuda-10.2/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/7 -L/usr/lib/gcc/x86_64-linux-gnu/7/../../../../x86_64-linux-gnu/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/7/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/7/../../../../x86_64-linux-gnu/lib -L/usr/lib/gcc/x86_64-linux-gnu/7/../../.. results_option.o pps_kmer_value_option.o file_option.o pairs_option.o pps_results_file_option.o pps_partial_size_option.o test_substitution_matrix_option.o paired_end_option.o thread_manager_runnable.o nws_single_gpu_runnable.o nws_algorithm_option.o gpus_value_option.o exception.o fasta_option.o mutex.o pps_partial_size_value_option.o perm_value_option.o thread.o pairs.o pps_results_option.o substitution_matrix_file_option.o results.o pps_win_size_option.o fasta_file_option.o gpus_option.o pps_errors_option.o promising_pairs_selection.o pps_win_size_value_option.o pps_errors_value_option.o pps_shortest_lexicographical_index_value_option.o results_file_option.o pps_partial_count_value_option.o pps_score_cutoff_value_option.o root_option.o pps_partial_count_option.o all_with_all_option.o substitution_matrix.o substitution_matrix_option.o pps_score_cutoff_option.o gap_penalty_option.o sequences.o hi_res_timer.o help_option.o thread_manager.o promising_pairs.o frequency_characteristics.o complete_option.o pps_kmer_option.o pairs_per_gpu_value_option.o nws_single_gpu_runnable_init.o algorithm_option.o paired_end_value_option.o pps_shortest_lexicographical_index_option.o gold_option.o nws_single_gpu_runnable_finalization.o gap_penalty_value_option.o option.o option_manager.o pairs_per_gpu_option.o main.o pairs_file_option.o perm_option.o verify_results_option.o sem.o main_cu.o fasta_y.o pairs_y.o sm_y.o fasta_l.o sm_l.o pairs_l.o -lstdc++ -lcudart -lstdc++ -lm -lgomp -lgcc_s -lgcc -lpthread -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/7/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/7/../../../x86_64-linux-gnu/crtn.o /usr/lib/gcc/x86_64-linux-gnu/7/crtoffloadend.o
COLLECT_GCC_OPTIONS='-v' '-o' 'grasshopper-build' '-fopenmp' '-g' '-O2' '-std=c++11' '-I' '/usr/local/cuda-10.2/include' '-L/usr/local/cuda-10.2/lib64' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-pthread'
$ make
make: Nothing to be done for 'all'.

Am I correct in assuming that this has resolved the linking issue?

If you’re not getting the error and your code is executing correctly, I’d believe so.

This seems more a make issue versus CUDA>

Excellent! Thanks so much. And, yes, this is an error in the MAKEFILE by the authors of the program.