I will make hook cudaMalloc and cudaFree.
I test in kubernetes container
Here are my test code.
mylib.cpp
#include <stdio.h>
#include <unistd.h>
#include <dlfcn.h>
#include <cuda_runtime.h>
cudaError_t cudaMalloc( void** devPtr, size_t count )
{
cudaError_t (*lcudaMalloc) ( void**, size_t) = (cudaError_t (*) ( void**, size_t))dlsym(RTLD_NEXT, "cudaMalloc");
printf("cudaMalloc hooked=> %p\n", devPtr);
return lcudaMalloc(devPtr, count);
}
cudaError_t cudaFree( void* devPtr)
{
cudaError_t (*lcudaFree) ( void*) = (cudaError_t (*) ( void*))dlsym(RTLD_NEXT, "cudaFree");
printf("cudaFree hooked=> %p\n", devPtr);
return lcudaFree(devPtr);
}
and I compiled C++ code
g++ -I/usr/local/cuda/include -fPIC -shared -o lib.so mylib.cpp -ldl -L/usr/local/cuda/lib64 -lcudart
t1.cu
#include <stdio.h>
#include <unistd.h>
int main(){
int* d_a;
cudaMalloc(&d_a, 1024*1024);
cudaFree(d_a);
}
nvcc -o t1 t1.cu -cudart shared
and then I run this command
LD_PRELOAD=./lib.so ./t1
I got messages.
cudaMalloc hooked=> 0x7ffdf4993fb0
cudaFree hooked=> 0x7f1287000000
Why are the they different?
How can I get the allocated address?
tensorflow = 1.14
cuda = 10.1
nvidia driver = 470.57.02
gpu = Tesla T4