I have changed facenet MTCNN and Inception-ResNet v1 to TensorRT graph successfully with tensorRT4, see https://github.com/JerryJiaGit/facenet_trt

But I found some strange issue with checkpoints frozen graph convert. The runtime difference between savedmodel and checkpoints frozen graph. I can see 30% improvement with savedmodel, but no improvment with frozen graph.

Also I compared the frozen graph with SavedModel, looks no different. So not sure if there is some known issue in the tensorRT?

Also post codes for both savedmodel and checkpoints convert for quick reference:

```
if (os.path.isfile(model_exp)):
print('Model filename: %s' % model_exp)
with gfile.FastGFile(model_exp,'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
#JJia TensorRT enable
print('TensorRT Enabled', 2 << 20)
trt_graph = trt.create_inference_graph(input_graph_def=graph_def,
outputs=['embeddings:0'],
max_batch_size = 1,
max_workspace_size_bytes= 2 << 20, # 2GB mem assgined to TRT
precision_mode="FP16", # Precision "FP32","FP16" or "INT8"
minimum_segment_size=1
)
#trt_graph=trt.calib_graph_to_infer_graph(trt_graph)
tf.import_graph_def(trt_graph, input_map=input_map, name='')
else:
print('Model directory: %s' % model_exp)
meta_file, ckpt_file = get_model_filenames(model_exp)
print('Metagraph file: %s' % meta_file)
print('Checkpoint file: %s' % ckpt_file)
saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map)
saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
#JJia TensorRT enable
print('TensorRT Enabled', 2<<20)
frozen_graph = tf.graph_util.convert_variables_to_constants(
tf.get_default_session(),
tf.get_default_graph().as_graph_def(),
output_node_names=["embeddings"])
for node in frozen_graph.node:
if node.op == 'RefSwitch':
node.op = 'Switch'
#for index in range(len(node.input)):
# node.input[index] = node.input[index] + '/read'
elif node.op == 'AssignSub':
node.op = 'Sub'
if 'use_locking' in node.attr: del node.attr['use_locking']
trt_graph = trt.create_inference_graph(
input_graph_def=frozen_graph,
outputs=["embeddings"],
max_batch_size = 1,
max_workspace_size_bytes= 2 << 20,
precision_mode="FP16",
minimum_segment_size=1)
tf.import_graph_def(trt_graph,return_elements=["embeddings:0"])
```