Linking error for cuda separate compilation mode and static linking of cudart

Basically, my goal is the following: I want to create a program using CUDA SDK Toolkit and I want to run this program on systems which have installed NVIDIA GPUs and NVIDIA drivers but not necessary CUDA driver/Toolkit. My idea was to link cuda libraries statically. Well, I failed to do it even for a simple case. Let me show you it. I have three files: main.cpp, kernel.cu and header.h. The content of these files is below.

main.cpp

#include <stdio.h>
#include "header.h"
int main()
{
    const int arraySize = 5;
    const int a[arraySize] = { 1, 2, 3, 4, 5 };
    const int b[arraySize] = { 10, 20, 30, 40, 50 };
    int c[arraySize] = { 0 };

    // Add vectors in parallel.
    addWithCuda(c, a, b, arraySize);
    printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
        c[0], c[1], c[2], c[3], c[4]);


    return 0;
}

header.h

#pragma once
void addWithCuda(int *c, const int *a, const int *b, unsigned int size);

kernel.cu

#include "header.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>

__global__ void addKernel(int *c, const int *a, const int *b)
{
    int i = threadIdx.x;
    c[i] = a[i] + b[i];
}

void addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
    int *dev_a = 0;
    int *dev_b = 0;
    int *dev_c = 0;
    cudaError_t cudaStatus;

    // Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
        goto Error;
    }

    // Allocate GPU buffers for three vectors (two input, one output)    .
    cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    // Copy input vectors from host memory to GPU buffers.
    cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

    cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

    // Launch a kernel on the GPU with one thread for each element.
    addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

    // Check for any errors launching the kernel
    cudaStatus = cudaGetLastError();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        goto Error;
    }
    
    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
    cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
        goto Error;
    }

    // Copy output vector from GPU buffer to host memory.
    cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

Error:
    cudaFree(dev_c);
    cudaFree(dev_a);
    cudaFree(dev_b);
}

While I do not really think that the context of the code is important, I nevertheless decided to present it.

Then I have two files: run and run_static.

run

g++ -c main.cpp;
nvcc -c kernel.cu;
g++ main.o kernel.o -o test -L/usr/local/cuda-10.2/lib64 -lcudart -lcudadevrt

run_static

g++ -c main.cpp;
nvcc -c kernel.cu;
g++ main.o kernel.o -o test_static -L/usr/local/cuda-10.2/lib64 -lcudart_static -lcudadevrt

I could use make but I have decided to simplify everything.

Well, ./run creates test executable which in turn produces the correct output. This is the case of a dynamic linking. However, ./run_static does not create test_static . Instead, it throws an error:

/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::initializeDriverEntrypoints()':
(.text+0x10990): undefined reference to `dlsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::initializeDriverEntrypoints()':
(.text+0x109ba): undefined reference to `dlsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::initializeDriverEntrypoints()':
(.text+0x109e6): undefined reference to `dlsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::initializeDriverEntrypoints()':
(.text+0x10a12): undefined reference to `dlsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::initializeDriverEntrypoints()':
(.text+0x10a3e): undefined reference to `dlsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o):(.text+0x10a6a): more undefined references to `dlsym' follow
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::loadDriverInternal()':
(.text+0x13c52): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::loadDriverInternal()':
(.text+0x13c84): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::globalState::initializeDriverInternal()':
(.text+0x15f55): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosSemaphoreSignal(sem_t*)':
(.text+0x4e1e5): undefined reference to `sem_post'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosSemaphoreWait(sem_t*, unsigned int)':
(.text+0x4e215): undefined reference to `sem_trywait'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosSemaphoreWait(sem_t*, unsigned int)':
(.text+0x4e22a): undefined reference to `sem_wait'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosSemaphoreWait(sem_t*, unsigned int)':
(.text+0x4e2ec): undefined reference to `sem_timedwait'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosSemaphoreDestroy(sem_t*)':
(.text+0x4e325): undefined reference to `sem_destroy'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosSemaphoreCreate(sem_t*, int)':
(.text+0x4e349): undefined reference to `sem_init'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `__tcf_4':
cuosLinux.cpp:(.text+0x4f4b1): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `__tcf_0':
cuosLinux.cpp:(.text+0x4f4e1): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `__tcf_1':
cuosLinux.cpp:(.text+0x4f511): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `__tcf_2':
cuosLinux.cpp:(.text+0x4f541): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `__tcf_3':
cuosLinux.cpp:(.text+0x4f571): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4fedc): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4fee8): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ff0a): undefined reference to `dlvsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ff16): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ff2c): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ff7c): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ff88): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ffaa): undefined reference to `dlvsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ffb6): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x4ffcc): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x5001c): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x50028): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x5004a): undefined reference to `dlvsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x50056): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x5006c): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x500bc): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x500c8): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x500ea): undefined reference to `dlvsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x500f6): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x5010c): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x50153): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x5015f): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x50181): undefined reference to `dlvsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x5018d): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInit()':
(.text+0x501a3): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosHasThreadExited(cudart::CUOSthread_st*)':
(.text+0x51ddb): undefined reference to `pthread_kill'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosThreadDetach(cudart::CUOSthread_st*)':
(.text+0x51df9): undefined reference to `pthread_detach'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosThreadCreateWithName(cudart::CUOSthread_st**, int (*)(void*), void*, char const*)':
(.text+0x51eb6): undefined reference to `pthread_create'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosCondCreateWithSharedFlag(pthread_cond_t*, int)':
(.text+0x52135): undefined reference to `pthread_condattr_setpshared'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosOnce(int*, void (*)())':
(.text+0x52155): undefined reference to `pthread_once'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTryAcquireWriterLock(void**)':
(.text+0x52198): undefined reference to `pthread_rwlock_trywrlock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTryAcquireReaderLock(void**)':
(.text+0x521c8): undefined reference to `pthread_rwlock_tryrdlock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitRWLockEx(void**, void*, unsigned long)':
(.text+0x52249): undefined reference to `pthread_rwlockattr_init'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitRWLockEx(void**, void*, unsigned long)':
(.text+0x5225a): undefined reference to `pthread_rwlockattr_setpshared'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitRWLockEx(void**, void*, unsigned long)':
(.text+0x52269): undefined reference to `pthread_rwlock_init'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTryEnterCriticalSection(pthread_mutex_t*)':
(.text+0x52295): undefined reference to `pthread_mutex_trylock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitializeCriticalSectionWithSharedFlag(pthread_mutex_t*, int)':
(.text+0x522fc): undefined reference to `pthread_mutexattr_init'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitializeCriticalSectionWithSharedFlag(pthread_mutex_t*, int)':
(.text+0x52321): undefined reference to `pthread_mutexattr_settype'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitializeCriticalSectionWithSharedFlag(pthread_mutex_t*, int)':
(.text+0x5232f): undefined reference to `pthread_mutexattr_setpshared'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitializeCriticalSectionWithSharedFlag(pthread_mutex_t*, int)':
(.text+0x5234a): undefined reference to `pthread_mutexattr_destroy'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTlsSetValue(unsigned int, void*)':
(.text+0x52418): undefined reference to `pthread_setspecific'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTlsAlloc(void (*)(void*))':
(.text+0x52460): undefined reference to `pthread_key_create'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosFreeLibrary(void*)':
(.text+0x52585): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosLoadLibrary(char const*)':
(.text+0x525a5): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosPosixInit()':
(.text+0x527ac): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosPosixInit()':
(.text+0x527b8): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosPosixInit()':
(.text+0x527da): undefined reference to `dlvsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosPosixInit()':
(.text+0x527e6): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosPosixInit()':
(.text+0x527fc): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `__tcf_0':
cuos_common_posix.cpp:(.text+0x52ba1): undefined reference to `dlclose'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosDestroyRWLock(void**)':
(.text+0x52bc8): undefined reference to `pthread_rwlock_destroy'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosLoadLibraryUnsafe(char const*)':
(.text+0x52c05): undefined reference to `dlerror'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitRWLock(void**)':
(.text+0x52c51): undefined reference to `pthread_rwlockattr_init'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitRWLock(void**)':
(.text+0x52c89): undefined reference to `pthread_rwlockattr_setpshared'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosInitRWLock(void**)':
(.text+0x52c98): undefined reference to `pthread_rwlock_init'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosThreadJoin(cudart::CUOSthread_st*, int*)':
(.text+0x52ccf): undefined reference to `pthread_join'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosCondCreateShared(pthread_cond_t*)':
(.text+0x533dc): undefined reference to `pthread_condattr_setpshared'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosCondCreate(pthread_cond_t*)':
(.text+0x53439): undefined reference to `pthread_condattr_setpshared'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosShmCloseEx(cudart::cuosShmInfoEx_st*, unsigned int, unsigned int)':
(.text+0x534e6): undefined reference to `shm_unlink'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosShmOpenNamedEx(void*, char const*, unsigned long, cudart::cuosShmInfoEx_st**)':
(.text+0x535b1): undefined reference to `shm_open'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosShmCreateNamedEx(void*, char const*, unsigned long, cudart::cuosShmInfoEx_st**)':
(.text+0x537cf): undefined reference to `shm_open'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosShmCreateNamedEx(void*, char const*, unsigned long, cudart::cuosShmInfoEx_st**)':
(.text+0x537e9): undefined reference to `shm_unlink'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosVirtualReserveInRange(unsigned long, void*, void*, unsigned long)':
(.text+0x53a31): undefined reference to `pthread_once'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosDestroyRWLockEx(void**)':
(.text+0x52174): undefined reference to `pthread_rwlock_destroy'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosReleaseReaderLock(void**)':
(.text+0x52184): undefined reference to `pthread_rwlock_unlock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosAcquireWriterLock(void**)':
(.text+0x521f4): undefined reference to `pthread_rwlock_wrlock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosAcquireReaderLock(void**)':
(.text+0x52204): undefined reference to `pthread_rwlock_rdlock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTlsGetValue(unsigned int)':
(.text+0x52434): undefined reference to `pthread_getspecific'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosTlsFree(unsigned int)':
(.text+0x52444): undefined reference to `pthread_key_delete'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosGetProcAddress(void*, char const*)':
(.text+0x52571): undefined reference to `dlsym'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosLoadLibrary(char const*)':
(.text+0x525b3): undefined reference to `dlopen'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosReleaseWriterLock(void**)':
(.text+0x52a44): undefined reference to `pthread_rwlock_unlock'
/usr/local/cuda-10.2/lib64/libcudart_static.a(libcudart_static.a.o): In function `cudart::cuosLoadLibraryUnsafe(char const*)':
(.text+0x52c13): undefined reference to `dlopen'
collect2: error: ld returned 1 exit status

Clearly, I am missing something simple.

Hi Alex, I just met the same problem, have you solved it?

for the code that is shown here:

  1. don’t link against -lcudadevrt

the code shown here does not require that

  1. add:

     -lculibos -lpthread -lrt -ldl
    

cudart_static requires that.

Another alternative is to use only nvcc for both compilation and linking steps. That should resolve the issues, and nvcc links against cudart_static by default.