I am trying to convert the ssdlite_mobilenet_v2
model to UFF, and run it to Jetson tx2 using TensorRT, but I build it failed,I need some help
My environment is Jetpack 4.0.1, Ubuntu 18.04
TensorRT 5.0.6.3
Here are the steps that I performed :
-
Download the tensorflow model ssdlite_mobilenet_v2. http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz
-
Unpack the model to get frozen_inference_graph.pb , and rename it as ssd_frozen_inference_graph.pb
-
I write a pb2uff.py cconvett UFF file.
import os
import sys
import tarfile
import requests
import tensorflow as tf
import tensorrt as trt
import graphsurgeon as gs
import ctypes
import uff
def ssd_unsupported_nodes_to_plugin_nodes(ssd_graph):
# Find and remove all Assert Tensorflow nodes from the graph
all_assert_nodes = ssd_graph.find_nodes_by_op("Assert")
ssd_graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
# Find all Identity nodes and forward their inputs
all_identity_nodes = ssd_graph.find_nodes_by_op("Identity")
ssd_graph.forward_inputs(all_identity_nodes)
# Create TRT plugin nodes to replace unsupported ops in Tensorflow graph
channels = 3
height = 300
width = 300
Input = gs.create_plugin_node(name="Input",
op="Placeholder",
dtype=tf.float32,
shape=[1, channels, height, width])
PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
minSize=0.2,
maxSize=0.95,
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1,0.1,0.2,0.2],
featureMapShapes=[19, 10, 5, 3, 2, 1],
numLayers=6
)
NMS = gs.create_plugin_node(
name="NMS",
op="NMS_TRT",
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=1e-8,
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=91,
inputOrder=[1, 0, 2],
confSigmoid=1,
isNormalized=1,
scoreConverter="SIGMOID"
)
concat_priorbox = gs.create_node(
"concat_priorbox",
op="ConcatV2",
dtype=tf.float32,
axis=2
)
concat_box_loc = gs.create_plugin_node(
"concat_box_loc",
op="FlattenConcat_TRT",
dtype=tf.float32,
)
concat_box_conf = gs.create_plugin_node(
"concat_box_conf",
output_nodes='NMS',
op="FlattenConcat_TRT",
dtype=tf.float32,
)
# Create a mapping of namespace names -> plugin nodes.
namespace_plugin_map = {
"MultipleGridAnchorGenerator": PriorBox,
"Postprocessor": NMS,
"Preprocessor": Input,
"ToFloat": Input,
"image_tensor": Input,
"MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
"concat": concat_box_loc,
"concat_1": concat_box_conf
}
# Create a new graph by collapsing namespaces
ssd_graph.collapse_namespaces(namespace_plugin_map)
# Remove the outputs, so we just have a single output node (NMS).
# If remove_exclusive_dependencies is True, the whole graph will be removed!
ssd_graph.remove(ssd_graph.graph_outputs, remove_exclusive_dependencies=False)
return ssd_graph
def model_to_uff(model_path, output_uff_path, silent=False):
dynamic_graph = gs.DynamicGraph(model_path)
dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph)
uff.from_tensorflow(
dynamic_graph.as_graph_def(),
output_filename=output_uff_path,
text=True
)
def main():
model_path = './ssd_frozen_inference_graph.pb'
uff_path = './ssd_frozen_inference_graph.uff'
ctypes.CDLL('./libflattenconcat.so')
model_to_uff(model_path,uff_path)
if __name__ == '__main__':
main()
I use python3 pb2uff.py,and the comline log is
WARNING:tensorflow:From /usr/lib/python3.6/dist-packages/graphsurgeon/StaticGraph.py:123: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.gfile.GFile.
WARNING: To create TensorRT plugin nodes, please use the `create_plugin_node` function instead.
UFF Version 0.5.5
=== Automatically deduced input nodes ===
[name: "Input"
op: "Placeholder"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
attr {
key: "shape"
value {
shape {
dim {
size: 1
}
dim {
size: 3
}
dim {
size: 300
}
dim {
size: 300
}
}
}
}
]
=========================================
=== Automatically deduced output nodes ===
[name: "NMS"
op: "NMS_TRT"
input: "Input"
input: "Squeeze"
input: "concat_box_conf"
attr {
key: "backgroundLabelId_u_int"
value {
i: 0
}
}
attr {
key: "confSigmoid_u_int"
value {
i: 1
}
}
attr {
key: "confidenceThreshold_u_float"
value {
f: 1e-08
}
}
attr {
key: "inputOrder_u_ilist"
value {
list {
i: 1
i: 0
i: 2
}
}
}
attr {
key: "isNormalized_u_int"
value {
i: 1
}
}
attr {
key: "keepTopK_u_int"
value {
i: 100
}
}
attr {
key: "nmsThreshold_u_float"
value {
f: 0.6
}
}
attr {
key: "numClasses_u_int"
value {
i: 91
}
}
attr {
key: "scoreConverter_u_str"
value {
s: "SIGMOID"
}
}
attr {
key: "shareLocation_u_int"
value {
i: 1
}
}
attr {
key: "topK_u_int"
value {
i: 100
}
}
attr {
key: "varianceEncodedInTarget_u_int"
value {
i: 0
}
}
]
==========================================
Using output node NMS
Converting to UFF graph
Warning: No conversion function registered for layer: NMS_TRT yet.
Converting NMS as custom op: NMS_TRT
Warning: No conversion function registered for layer: FlattenConcat_TRT yet.
Converting concat_box_conf as custom op: FlattenConcat_TRT
Warning: No conversion function registered for layer: FlattenConcat_TRT yet.
Converting concat_box_loc as custom op: FlattenConcat_TRT
No. nodes: 589
UFF Output written to ./ssd_frozen_inference_graph.uff
UFF Text Output written to ./ssd_frozen_inference_graph.pbtxt
- I write a engine.py and build_engine.py cconvett UFF file.
import sys
import os
import tensorrt as trt
import pycuda.driver as cuda
import numpy as np
def build_engine(uff_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False):
with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser:
builder.max_workspace_size = 1 << 30
if trt_engine_datatype == trt.DataType.HALF:
builder.fp16_mode = True
builder.max_batch_size = batch_size
parser.register_input("Input",(3, 300, 300))
parser.register_output("MarkOutput_0")
parser.parse(uff_model_path, network)
if not silent:
print("Building TensorRT engine. This may take few minutes.")
return builder.build_cuda_engine(network)
def save_engine(engine, engine_dest_path):
buf = engine.serialize()
with open(engine_dest_path, 'wb') as f:
f.write(buf)
def load_engine(trt_runtime, engine_path):
with open(engine_path, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
import tensorrt as trt
import tensorflow as tf
import engine
import os
import ctypes
def main():
uff_model_path = './ssd_frozen_inference_graph.uff'
trt_engine_path = './ssd_frozen_inference_graph.buf'
ctypes.CDLL('./libflattenconcat.so')
# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# some of them will be needed during inference
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
# Initialize runtime needed for loading TensorRT engine from file
trt_runtime = trt.Runtime(TRT_LOGGER)
# TRT engine placeholder
trt_engine = None
if not os.path.exists(trt_engine_path):
trt_engine = engine.build_engine(
uff_model_path, TRT_LOGGER,
trt_engine_datatype=trt.DataType.FLOAT,
batch_size=1
)
# Save the engine to file
engine.save_engine(trt_engine, trt_engine_path)
if __name__ == '__main__':
main()
I use python3 build_engine.py,The log is :
Building TensorRT engine. This may take few minutes.
python3: nmsPlugin.cpp:135: virtual void nvinfer1::plugin::DetectionOutput::configureWithFormat(const nvinfer1::Dims*, int, const nvinfer1::Dims*, int, nvinfer1::DataType, nvinfer1::PluginFormat, int): Assertion `numPriors * numLocClasses * 4 == inputDims[param.inputOrder[0]].d[0]' failed.
Aborted (core dumped)