cudaMalloc, cudaFree address

I will make hook cudaMalloc and cudaFree.
I test in kubernetes container

Here are my test code.
mylib.cpp

#include <stdio.h>
#include <unistd.h>
#include <dlfcn.h>
#include <cuda_runtime.h>

cudaError_t cudaMalloc( void** devPtr, size_t count )
{
    cudaError_t (*lcudaMalloc) ( void**, size_t) = (cudaError_t (*) ( void**, size_t))dlsym(RTLD_NEXT, "cudaMalloc");
    printf("cudaMalloc hooked=> %p\n", devPtr);
    return lcudaMalloc(devPtr, count);
}

cudaError_t cudaFree( void* devPtr)
{
    cudaError_t (*lcudaFree) ( void*) = (cudaError_t (*) ( void*))dlsym(RTLD_NEXT, "cudaFree");
    printf("cudaFree   hooked=> %p\n", devPtr);
    return lcudaFree(devPtr);
}

and I compiled C++ code

g++ -I/usr/local/cuda/include -fPIC -shared -o lib.so mylib.cpp -ldl -L/usr/local/cuda/lib64 -lcudart

t1.cu

#include <stdio.h>
#include <unistd.h>

int main(){
  int* d_a;
  cudaMalloc(&d_a, 1024*1024);
  cudaFree(d_a);
}

nvcc -o t1 t1.cu -cudart shared

and then I run this command

LD_PRELOAD=./lib.so ./t1

I got messages.

cudaMalloc hooked=> 0x7ffdf4993fb0
cudaFree hooked=> 0x7f1287000000

Why are the they different?
How can I get the allocated address in cudaMalloc?

tensorflow = 1.14
cuda = 10.1
nvidia driver = 470.57.02
gpu = Tesla T4

First, in the hooked cudaMalloc function you print the address of the passed variable devPtr, not its (pointer) value.

Second, if you want to print the allocated address, you have to print it after allocation, i.e. after the call to lcudaMalloc.

cudaError_t result = lcudaMalloc(devPtr, count);
printf("cudaMalloc hooked=> %p\n", *devPtr);
return result;
1 Like

wow… I solved it thanks to you

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.