TF-TRT does not free up memory when converting a SavedModel.
I am using TF-TRT to convert my model to do inference for my project. I loaded the model and converted it in a loop. After a few time, my system ran out of memory.
I found that just by converting the same model several times has already stacked up a lot of memory and I do not know how to free it. Is there a way to free the memory?
Environment
TensorRT Version: 8.6.1.6
GPU Type: Nvidia GeForce GTX 1660 SUPER
Nvidia Driver Version: 536.67
Operating System + Version: WSL2 Ubuntu 22.04.2 LTS, Windows 10 22H2
Python Version (if applicable): 3.10.6
TensorFlow Version (if applicable): 2.12.0
Baremetal or Container (if container which image + tag): nvcr.io/nvidia/tensorflow:23.07-tf2-py3
Steps To Reproduce
def create_flat_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(54, 9, 7), name='layers_flatten'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense1'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense2'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense3'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense4'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense5'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense6'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense7'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense8'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense9'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense10'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense11'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense12'),
tf.keras.layers.Dense(512, activation='relu', name='layers_dense13'),
tf.keras.layers.Dropout(0.2, name='layers_dropout'),
tf.keras.layers.Dense(10, activation='softmax', name='layers_dense_14')
])
model.compile(tf.keras.optimizers.SGD(learning_rate=0.02, momentum=0.1),
loss=['categorical_crossentropy'])
return model
def input_fn():
model_input = np.zeros((54, 9, 7), dtype=np.float32)
input_shapes = (20, 80)
for shape in input_shapes:
yield [np.array([model_input for _ in range(shape)])]
def convert_tf_trt_model(model_path):
converter = trt_convert.TrtGraphConverterV2(input_saved_model_dir=model_path,
precision_mode=trt_convert.TrtPrecisionMode.FP32,
use_dynamic_shape=True,
dynamic_shape_profile_strategy="Optimal",
)
converter.convert()
converter.build(input_fn=input_fn)
def main():
tmp_path = "tf_saved_model"
model = create_flat_model()
model.save(tmp_path, save_format='tf')
ram_info = []
ram_info.append(psutil.virtual_memory())
print(f"\n*****Before convert_tf_trt_model, available Ram: {ram_info[-1].available / 1024 / 1024 / 1024:.2f} GB\n")
for i in range(15):
convert_tf_trt_model(tmp_path)
ram_info.append(psutil.virtual_memory())
print(f"\n*****After {i + 1} convert_tf_trt_model, available Ram: {ram_info[-1].available / 1024 / 1024 / 1024:.2f} GB\n")
# tf.keras.backend.clear_session()
# tmp_info = psutil.virtual_memory()
# print(f"\n*****After {i + 1} tf.keras.backend.clear_session(), available Ram: {tmp_info.available / 1024 / 1024 / 1024:.2f} GB\n")
# tf.compat.v1.reset_default_graph()
# tmp_info = psutil.virtual_memory()
# print(f"\n*****After {i + 1} tf.compat.v1.reset_default_graph(), available Ram: {tmp_info.available / 1024 / 1024 / 1024:.2f} GB\n")
for idx, info in enumerate(ram_info):
print(f"available Ram: {info.available / 1024 / 1024 / 1024:.2f} GB")
if idx != 0:
print(f"Change: {info.available / 1024 / 1024 / 1024 - ram_info[idx - 1].available / 1024 / 1024 / 1024:.2f} GB")
if __name__ == "__main__":
main()
The output:
*****Before convert_tf_trt_model, available Ram: 5.86 GB
*****After 1 convert_tf_trt_model, available Ram: 4.19 GB
*****After 2 convert_tf_trt_model, available Ram: 3.85 GB
*****After 3 convert_tf_trt_model, available Ram: 2.88 GB
*****After 4 convert_tf_trt_model, available Ram: 1.81 GB
*****After 5 convert_tf_trt_model, available Ram: 1.62 GB
*****After 6 convert_tf_trt_model, available Ram: 1.15 GB
*****After 7 convert_tf_trt_model, available Ram: 1.05 GB
*****After 8 convert_tf_trt_model, available Ram: 1.02 GB
*****After 9 convert_tf_trt_model, available Ram: 1.06 GB
*****After 10 convert_tf_trt_model, available Ram: 1.10 GB
*****After 11 convert_tf_trt_model, available Ram: 0.79 GB
*****After 12 convert_tf_trt_model, available Ram: 0.72 GB
*****After 13 convert_tf_trt_model, available Ram: 0.45 GB
*****After 14 convert_tf_trt_model, available Ram: 0.13 GB
*****After 15 convert_tf_trt_model, available Ram: 0.08 GB
available Ram: 5.86 GB
available Ram: 4.19 GB
Change: -1.67 GB
available Ram: 3.85 GB
Change: -0.34 GB
available Ram: 2.88 GB
Change: -0.97 GB
available Ram: 1.81 GB
Change: -1.06 GB
available Ram: 1.62 GB
Change: -0.20 GB
available Ram: 1.15 GB
Change: -0.46 GB
available Ram: 1.05 GB
Change: -0.10 GB
available Ram: 1.02 GB
Change: -0.03 GB
available Ram: 1.06 GB
Change: 0.04 GB
available Ram: 1.10 GB
Change: 0.04 GB
available Ram: 0.79 GB
Change: -0.31 GB
available Ram: 0.72 GB
Change: -0.07 GB
available Ram: 0.45 GB
Change: -0.27 GB
available Ram: 0.13 GB
Change: -0.32 GB
available Ram: 0.08 GB
Change: -0.05 GB
I have tried tf.keras.backend.clear_session() and tf.compat.v1.reset_default_graph() as commented out in the code but they have no effect.