Converting Deeplab Tensorflow model to TensorRT model increases inference time dramatically, what am I doing wrong in my code?
Here I am doing the conversion from Tensorflow graph to TensorRT graph and saving this new TRT model:
OUTPUT_NAME = ["SemanticPredictions"]
# read Tensorflow frozen graph
with gfile.FastGFile('frozen_inference_graph.pb', 'rb') as tf_model:
tf_graphf = tensorflow.GraphDef()
tf_graphf.ParseFromString(tf_model.read())
# convert (optimize) frozen model to TensorRT model
trt_graph = trt.create_inference_graph(input_graph_def=tf_graphf, outputs=OUTPUT_NAME,
max_batch_size=2, max_workspace_size_bytes=2 * (10 ** 9), precision_mode="INT8")
# write the TensorRT model to be used later for inference
with gfile.FastGFile("TensorRT_model.pb", 'wb') as f:
f.write(trt_graph.SerializeToString())
print("TensorRT model is successfully stored!")
And in another script, I am loading this TRT model again and make semantic segmentation prediction with it but it is about 7 to 8 times slower! Here goes the second script:
with tensorflow.Session(config=tensorflow.ConfigProto(gpu_options=tensorflow.GPUOptions(per_process_gpu_memory_fraction=0.50))) as sess:
img_array = cv2.imread('test.png',1)
# read TensorRT frozen graph
with gfile.FastGFile('TensorRT_model.pb', 'rb') as trt_model:
trt_graph = tensorflow.GraphDef()
trt_graph.ParseFromString(trt_model.read())
# obtain the corresponding input-output tensor
tensorflow.import_graph_def(trt_graph, name='')
input = sess.graph.get_tensor_by_name('ImageTensor:0')
output = sess.graph.get_tensor_by_name('SemanticPredictions:0')
# perform inference
batch_seg_map = sess.run(output, feed_dict={input: [img_array]})
seg_map = batch_seg_map[0]
seg_img = label_to_color_image(seg_map).astype(np.uint8)
Any ideas how should I perform the conversion properly in a way that speeds up the inference?