I will make hook cudaMalloc and cudaFree.
I test in kubernetes container
Here are my test code.
mylib.cpp
#include <stdio.h>
#include <unistd.h>
#include <dlfcn.h>
#include <cuda_runtime.h>
cudaError_t cudaMalloc( void** devPtr, size_t count )
{
cudaError_t (*lcudaMalloc) ( void**, size_t) = (cudaError_t (*) ( void**, size_t))dlsym(RTLD_NEXT, "cudaMalloc");
printf("ptr: %u\n", &devPtr);
printf("cudaMalloc hooked\n");
return lcudaMalloc(devPtr, count);
}
cudaError_t cudaFree( void* devPtr)
{
cudaError_t (*lcudaFree) ( void*) = (cudaError_t (*) ( void*))dlsym(RTLD_NEXT, "cudaFree");
printf("ptr: %u\n", &devPtr);
printf("cudaFree hooked\n");
return lcudaFree(devPtr);
}
and I compiled C++ code
g++ -I/usr/local/cuda/include -fPIC -shared -o lib.so mylib.cpp -ldl -L/usr/local/cuda/lib64 -lcudart
test_src.py
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=20)
model.evaluate(x_test, y_test, verbose=2)
and then I run this command
LD_PRELOAD=lib.so python3 test_src.py
Then I got the message.
cudaMalloc hooked
Segmentation fault (core dumped)
But if I run without “LD_PRELOAD=lib.so”, I get no error
env
tensorflow = 1.14
cuda = 10.1
nvidia driver = 470.57.02
gpu = Tesla T4
What is the problem??