I trained a “Pornographic image recognition” model with tensorflow and saved as a savemodel. Then I use tf-trt of the container “nvidia/tensorflow:19.02-py3” to optimize the model. Finally, I use the container tensorflow/serving:1.14.0-devel-gpu to serve the TF savemodel and the TRT optimezed TF savemodel. I test with the TF Serving client,I find that both models took the same time.
Here is my tf-trt optimezed code :
import os
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt
flags = tf.app.flags
FLAGS = flags.FLAGS
# Inference with TF-TRT `SavedModel` workflow:
batch_size=8
max_workspace_size=(1<<32)
flags.DEFINE_string("export_dir", '/tftrt_serving_model/', "export_dir")
graph = tf.Graph()
with graph.as_default():
with tf.Session() as sess:
# Create a TensorRT inference graph from a SavedModel:
trt_graph = trt.create_inference_graph(
input_graph_def=None,
outputs=None,
input_saved_model_dir='/tf_serving_model',
input_saved_model_tags=['serve'],
max_batch_size=batch_size,
max_workspace_size_bytes=max_workspace_size,
precision_mode='FP32')
tf.import_graph_def(trt_graph, name='')
for i,n in enumerate(trt_graph.node):
print("Name of the node - [%s]" % n.name)
x = sess.graph.get_tensor_by_name("data:0")
prob = sess.graph.get_tensor_by_name("out/softmax:0")
# label = sess.graph.get_tensor_by_name("out/output:0")
print(x)
print(prob)
# print(label)
values, indices = tf.nn.top_k(prob, 15)
print(values, indices)
#创建模型输出builder
builder = tf.saved_model.builder.SavedModelBuilder(FLAGS.export_dir)
tensor_info_x = tf.saved_model.utils.build_tensor_info(x)
tensor_info_pro = tf.saved_model.utils.build_tensor_info(tf.reshape(values, [15]))
tensor_info_classify = tf.saved_model.utils.build_tensor_info(tf.reshape(indices, [15]))
signature_def_map = {
"predict_image": tf.saved_model.signature_def_utils.build_signature_def(
inputs={"image": tensor_info_x},
outputs={"pro": tensor_info_pro,
"classify": tensor_info_classify
},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)}
builder.add_meta_graph_and_variables(sess,
[tf.saved_model.tag_constants.SERVING],
signature_def_map=signature_def_map)
builder.save()
Here is the model files compare:
TF model size 23M
tf_serving_model/
├── saved_model.pb
└── variables
├── variables.data-00000-of-00001
└── variables.index
1 directory, 3 files
TRT optimeze TF model size 65M
tftrt_serving_model
├── saved_model.pb
└── variables
1 directory, 1 file