Can i use more than 1 cuda shared library at once

I had an assignment to implement 2 algorithms , each of them in a separate shared library. The libraries are used in another program.

Linux Ubuntu , T4 7.5, ToolKit 11.8. eclipse , Compiled and linked with PTX 7.5

Each shared library on its own works fine.

However when using both of them together we are getting cuda Error 300 as soon as the program starts. (it seems that it occurs as soon as the second lib is loaded)

we’ve removed any cu files , only cpp and the error still exists.

  1. The same scenario with static library works
  2. when adding SASS 7.5 to the shared libraries it works

I create 3 programs. 1 shared library with one cpp file compiled and linked with nvcc, another shared library and a program that uses the 2 libraries. each library was compiled and linked with PTX 7.5

When executing the third program we get a cuda error 300 as soon as we call any cuda run time api.

I don’t seem to have any trouble with it:

$ cat l1.cu
#include <cstdio>
__global__ void k1(){

  printf("k1\n");
}

void f1(){

  k1<<<1,1>>>();
  cudaDeviceSynchronize();
}


$ cat l2.cu
#include <cstdio>
__global__ void k2(){

  printf("k2\n");
}

void f2(){

  k2<<<1,1>>>();
  cudaDeviceSynchronize();
}


$ cat main.cpp
#include <cuda_runtime.h>
#include <iostream>

void f1();
void f2();

int main(){
  int d;
  cudaError_t err = cudaGetDeviceCount(&d);
  if (err != cudaSuccess) std::cout << cudaGetErrorString(err) << std::endl;
  f1();
  f2();
}
$ nvcc -shared -Xcompiler -fPIC -o libl1.so l1.cu              
$ nvcc -shared -Xcompiler -fPIC -o libl2.so l2.cu
$ g++ -I/usr/local/cuda/include -o test main.cpp -L. -ll1 -ll2 -L/usr/local/cuda/lib64 -lcudart
$ ./test
k1
k2
$ compute-sanitizer ./test
========= COMPUTE-SANITIZER
k1
k2
========= ERROR SUMMARY: 0 errors
$

My suggestion would be that you provide a short, complete example, just like the one that I have provided, that demonstrates the error.

I do note that with the following compile sequence:

$ nvcc -gencode arch=compute_75,code=compute_75 -shared -Xcompiler -fPIC -o libl1.so l1.cu
$ nvcc -gencode arch=compute_75,code=compute_75 -shared -Xcompiler -fPIC -o libl2.so l2.cu
$ g++ -I/usr/local/cuda/include -o test main.cpp -L. -ll1 -ll2 -L/usr/local/cuda/lib64 -lcudart

(basically creating each library so that it has only cc7.5 PTX)

I experience errors on CUDA 11.4 but not on CUDA 12.0

I suggest moving your environment forward to CUDA 12.0 (or newer)

Toolkit 11.4 RTXA4000 (8.6) - but its the same for 11.8 T4 7.5

Device Id : 0
Device Name : NVIDIA RTX A4000
MultiProcessorCount : 48
maxThreadsPerMultiProcessor : 1536
ClockRate(kilohertz) : 1560000
compute Capability : 8.6
Global Memory : 16504128(KB)
Const Memory : 64(KB)
shared Memory per Block : 48(KB)
MaxGridSize : 2147483647.65535.65535
maxThreadsDim : 1024.1024.64
MaxThreadsPerBlock : 1024
warpSize : 32
concurrentKernels : 1
streamPrioritiesSupported : Supported
StreamPriorityRange :(0–5)
MemoryClockRate(kilohertz) : 7001000
MemoryBusWidth : 256

a.h

void lib1rt();

a.cpp

#include
#include “a.h”
void lib1rt(){

std::cout << "lib1" <<std::endl;

}

make all
Building file: …/a.cpp
Invoking: NVCC Compiler
/usr/local/cuda/bin/nvcc --debug -gencode arch=compute_86,code=compute_86 -Xcompiler -fPIC -ccbin g++ -c -o “a.o” “…/a.cpp”
Finished building: …/a.cpp

Building target: libNirSharedLib1.so
Invoking: NVCC linker
/usr/local/cuda/bin/nvcc --cudart=static -ccbin g++ --shared -gencode arch=compute_86,code=compute_86 -o “libNirSharedLib1.so” ./a.o
Finished building target: libNirSharedLib1.so

b.h

void lib2rt();

b.cpp

#include
#include “b.h”
void lib2rt(){

std::cout << "lib2" <<std::endl;

}

Building file: …/b.cpp
Invoking: NVCC Compiler
/usr/local/cuda/bin/nvcc --device-debug --debug -gencode arch=compute_86,code=compute_86 -Xcompiler -fPIC -ccbin g++ -c -o “b.o” “…/b.cpp”
Finished building: …/b.cpp

Building target: libNirSharedLib2.so
Invoking: NVCC linker
/usr/local/cuda/bin/nvcc --cudart=static -ccbin g++ --shared -gencode arch=compute_86,code=compute_86 -o “libNirSharedLib2.so” ./b.o
Finished building target: libNirSharedLib2.so

LibTest.cpp

#include
#include <cuda_runtime.h>
#include <a.h>
#include <b.h>
int main() {
std::cout << “!!!Hello World!!!” << std::endl; // prints !!!Hello World!!!

cudaError_t err;

err = cudaDeviceSynchronize();
if (err != cudaSuccess)
{
	std::cout << "error" << std::endl;
}
lib1rt();
lib2rt();
return 0;

}

Building file: …/src/LibTest.cpp
Invoking: NVCC Compiler
/usr/local/cuda/bin/nvcc -I"/home/zinir/eclipse-workspaceLib/NirSharedLib1" -I"/home/zinir/eclipse-workspaceLib/NirSharedLib2" --debug -gencode arch=compute_86,code=compute_86 -ccbin g++ -c -o “src/LibTest.o” “…/src/LibTest.cpp”
Finished building: …/src/LibTest.cpp

Building target: LibTest
Invoking: NVCC linker
/usr/local/cuda/bin/nvcc --cudart=static -L"/home/zinir/eclipse-workspaceLib/NirSharedLib1/Debug" -L"/home/zinir/eclipse-workspaceLib/NirSharedLib2/Debug" -ccbin g++ -gencode arch=compute_86,code=compute_86 -o “LibTest” ./src/LibTest.o -lNirSharedLib1 -lNirSharedLib2
Finished building target: LibTest

switch to CUDA 12.0 or newer