Segmentation fault (core dumped)

I will make hook cudaMalloc and cudaFree.
I test in kubernetes container

Here are my test code.
mylib.cpp

#include <stdio.h>
#include <unistd.h>
#include <dlfcn.h>
#include <cuda_runtime.h>

cudaError_t cudaMalloc( void** devPtr, size_t count )
{
    cudaError_t (*lcudaMalloc) ( void**, size_t) = (cudaError_t (*) ( void**, size_t))dlsym(RTLD_NEXT, "cudaMalloc");
    printf("ptr: %u\n", &devPtr);
    printf("cudaMalloc hooked\n");
    return lcudaMalloc(devPtr, count);
}

cudaError_t cudaFree( void* devPtr)
{
    cudaError_t (*lcudaFree) ( void*) = (cudaError_t (*) ( void*))dlsym(RTLD_NEXT, "cudaFree");
    printf("ptr: %u\n", &devPtr);
    printf("cudaFree hooked\n");
    return lcudaFree(devPtr);
}

and I compiled C++ code

g++ -I/usr/local/cuda/include -fPIC -shared -o lib.so mylib.cpp -ldl -L/usr/local/cuda/lib64 -lcudart

test_src.py

import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=20)

model.evaluate(x_test,  y_test, verbose=2)

and then I run this command

LD_PRELOAD=lib.so python3 test_src.py

Then I got the message.

cudaMalloc hooked
Segmentation fault (core dumped)

But if I run without “LD_PRELOAD=lib.so”, I get no error

env

tensorflow = 1.14
cuda = 10.1
nvidia driver = 470.57.02
gpu = Tesla T4

What is the problem??