Background
I am trying to create a library with some common CUDA device and host functions that are used in several places throughout my project. Then i want to link with this library when building executables and other libraries so i don’t have to repeat the code (common use case for a library).
Problem
I am able to build my library just fine, however when i go to link an executable or other library to it using nvcc (nvlink) i get linker error:
nvlink error : Undefined reference to ‘_ZN5iwork13CudaUtilities14bearingDiffMaxEdd’ in ‘main.o’
Steps to repeat
I’ve broken down the problem in to a simple example that highlights the issue.
Structure:
- CudaUtilities.cu/h - common utility code that i want to put into my library
- main.cu - code for an executable that uses CudaUtilities
- build.sh - simple build script to show the error
- libcudautilties.a - a static library that will contain the object code from CudaUtilities.cu (created by build script)
- a.out - an executable (main.cu) that uses the CudaUtilities functions and tries to link against libcudautilities.a (created by build script)
My “Cuda Utilities” class that is in my library
CudaUtilities.cuh
#ifndef _CUDAUTILITIES_H_
#define _CUDAUTILITIES_H_
#include <cuComplex.h>
class CudaUtilities
{
public:
static __device__ __host__ float cuCanglef(const cuFloatComplex x);
static __device__ __host__ double cuCangle(const cuDoubleComplex x);
private:
CudaUtilities();
~CudaUtilities();
};
#endif // _CUDAUTILITIES_H_
CudaUtilities.cu
#include "CudaUtilities.cuh"
// static
float CudaUtilities::cuCanglef(const cuFloatComplex x)
{
return atan2f(x.y, x.x);
}
// static
double CudaUtilities::cuCangle(const cuDoubleComplex x)
{
return atan2(x.y, x.x);
}
Code for my executable that uses the CudaUtilities class
main.cu
#include "CudaUtilities.cuh"
#include <stdio.h>
__global__ void kernel()
{
cuDoubleComplex cplx = make_cuDoubleComplex(10, 20);
double a = CudaUtilities::cuCangle(cplx);
printf("%0.3f\n", a);
}
int main(int argc, char *argv[])
{
const static dim3 blocks(1, 1);
const static dim3 threads(16, 16);
kernel<<<blocks, threads>>>();
cudaPeekAtLastError();
cudaDeviceSynchronize();
}
Simple build script (shell) that executes the commands that cause the issue
build.sh
#!/bin/bash
set -o verbose
# build libcudautilities.a containing CudaUtilities.cu
nvcc -arch=sm_20 -I. -dc CudaUtilities.cu
nvcc -arch=sm_20 -I, -dlink CudaUtilities.o -o CudaUtilities.link.o
/usr/bin/ar cr libcudautilities.a CudaUtilities.o CudaUtilities.link.o
/usr/bin/ranlib libcudautilities.a
# build an executable out of main.cu trying to link against libcudautilities.a
nvcc -arch=sm_20 -I. -dc main.cu
nvcc -arch=sm_20 -I. -L. -lcudautilities -dlink main.o -o main.link.o
g++ -L/usr/local/cuda/lib64 -L. main.o main.link.o -lcudadevrt -lcudart -lcudautilities
Error on build
$ ./build.sh
# build libcudautilities.a containing CudaUtilities.cu
nvcc -arch=sm_20 -I. -dc CudaUtilities.cu
nvcc -arch=sm_20 -I, -dlink CudaUtilities.o -o CudaUtilities.link.o
/usr/bin/ar cr libcudautilities.a CudaUtilities.o CudaUtilities.link.o
/usr/bin/ranlib libcudautilities.a
# build an executable out of main.cu trying to link against libcudautilities.a
nvcc -arch=sm_20 -I. -dc main.cu
nvcc -arch=sm_20 -I. -L. -lcudautilities -dlink main.o -o main.link.o
<b>nvlink error : Undefined reference to '_ZN13CudaUtilities8cuCangleE7double2' in 'main.o'</b>
Comments
I am able to run nm on my static library and see that my symbol is in there
$ nm libcudautilities.a
CudaUtilities.o:
0000000000000000 r _ZL15__module_id_str
0000000000000010 b _ZL20__cudaFatCubinHandle
0000000000000057 t _ZL22____nv_dummy_param_refPv
0000000000000000 b _ZL22__nv_inited_managed_rt
000000000000006c t _ZL26__cudaUnregisterBinaryUtilv
00000000000000a5 t _ZL31__nv_cudaEntityRegisterCallbackPPv
0000000000000008 b _ZL32__nv_fatbinhandle_for_managed_rt
000000000000008b t _ZL32__nv_init_managed_rt_with_modulePPv
0000000000000000 t _ZL37__nv_save_fatbinhandle_for_managed_rtPPv
00000000000000ca t _ZL85__sti____cudaRegisterAll_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944dv
<b>0000000000000034 T _ZN13CudaUtilities8cuCangleE7double2</b>
0000000000000016 T _ZN13CudaUtilities9cuCanglefE6float2
0000000000000020 b _ZZL22____nv_dummy_param_refPvE5__ref
0000000000000018 b _ZZL31__nv_cudaEntityRegisterCallbackPPvE5__ref
U __cudaInitModule
U __cudaRegisterLinkedBinary_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944d
U __cudaUnregisterFatBinary
0000000000000000 R __fatbinwrap_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944d
U atan2
U atan2f
0000000000000000 r fatbinData
CudaUtilities.link.o:
0000000000000000 r _ZL15__fatDeviceText
0000000000000000 b _ZL20__cudaFatCubinHandle
0000000000000010 b _ZL22__cudaPrelinkedFatbins
000000000000005b t _ZL26__cudaRegisterLinkedBinaryPK19__fatBinC_Wrapper_tPFvPPvES2_
0000000000000000 r _ZL78def_module_id_str_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944d
0000000000000020 b _ZZ87__cudaRegisterLinkedBinary_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944dE3__p
0000000000000030 b _ZZL26__cudaRegisterLinkedBinaryPK19__fatBinC_Wrapper_tPFvPPvES2_E16__callback_array
0000000000000028 b _ZZL26__cudaRegisterLinkedBinaryPK19__fatBinC_Wrapper_tPFvPPvES2_E3__i
U __cudaRegisterFatBinary
0000000000000015 T __cudaRegisterLinkedBinary_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944d
0000000000000000 t __cudaUnregisterBinaryUtil
U __cudaUnregisterFatBinary
U __fatbinwrap_48_tmpxft_000014aa_00000000_6_CudaUtilities_cpp1_ii_c49f944d
U atexit
0000000000000000 r fatbinData
However if i modify my build script to NOT link against libcudautilities.a and instead link with the object file CudaUtilities.o then everything works just fine.
#!/bin/bash
set -o verbose
# build libcudautilities.a containing CudaUtilities.cu
nvcc -arch=sm_20 -I. -dc CudaUtilities.cu
nvcc -arch=sm_20 -I, -dlink CudaUtilities.o -o CudaUtilities.link.o
/usr/bin/ar cr libcudautilities.a CudaUtilities.o CudaUtilities.link.o
/usr/bin/ranlib libcudautilities.a
# build an executable out of main.cu trying to link against libcudautilities.a
nvcc -arch=sm_20 -I. -dc main.cu
# broken linker line
# nvcc -arch=sm_20 -I. -L. -lcudautilities -dlink main.o -o main.link.o
# hacked linker line
nvcc -arch=sm_20 -I. -L. -dlink CudaUtilities.o main.o -o main.link.o
g++ -L/usr/local/cuda/lib64 -L. main.o main.link.o -lcudadevrt -lcudart -lcudautilities
I’m sure i’m doing something wrong because this sort of thing has to be a common use case.
Any help is greatly appreciated.
-Nick
Edit:
Ideally i would like my library to be a shared library not static. However, after reading the nvcc docs i don’t think this is possible because nvcc (nvlink) ignores .so’s and only searches through .a’s
http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#libraries