Sample UFF_SSD with ssdlite_mobilenet_v2 model

I am trying to convert the ssdlite_mobilenet_v2
model to UFF, and run it to Jetson tx2 using TensorRT, but I build it failed,I need some help
My environment is Jetpack 4.0.1, Ubuntu 18.04
TensorRT 5.0.6.3

Here are the steps that I performed :

  1. Download the tensorflow model ssdlite_mobilenet_v2. http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz

  2. Unpack the model to get frozen_inference_graph.pb , and rename it as ssd_frozen_inference_graph.pb

  3. I write a pb2uff.py cconvett UFF file.

import os
import sys
import tarfile

import requests
import tensorflow as tf
import tensorrt as trt
import graphsurgeon as gs
import ctypes
import uff

def ssd_unsupported_nodes_to_plugin_nodes(ssd_graph):

    # Find and remove all Assert Tensorflow nodes from the graph
    all_assert_nodes = ssd_graph.find_nodes_by_op("Assert")
    ssd_graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
    # Find all Identity nodes and forward their inputs
    all_identity_nodes = ssd_graph.find_nodes_by_op("Identity")
    ssd_graph.forward_inputs(all_identity_nodes)

    # Create TRT plugin nodes to replace unsupported ops in Tensorflow graph
    channels = 3
    height = 300
    width = 300

    Input = gs.create_plugin_node(name="Input",
        op="Placeholder",
        dtype=tf.float32,
        shape=[1, channels, height, width])
    PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
        minSize=0.2,
        maxSize=0.95,
        aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
        variance=[0.1,0.1,0.2,0.2],
        featureMapShapes=[19, 10, 5, 3, 2, 1], 
        numLayers=6
    )
    NMS = gs.create_plugin_node(
        name="NMS",
        op="NMS_TRT",
        shareLocation=1,
        varianceEncodedInTarget=0,
        backgroundLabelId=0,
        confidenceThreshold=1e-8,
        nmsThreshold=0.6,
        topK=100,
        keepTopK=100,
        numClasses=91,
        inputOrder=[1, 0, 2],
        confSigmoid=1,
        isNormalized=1,
        scoreConverter="SIGMOID"
    )
    concat_priorbox = gs.create_node(
        "concat_priorbox",
        op="ConcatV2",
        dtype=tf.float32,
        axis=2
    )
    concat_box_loc = gs.create_plugin_node(
        "concat_box_loc",
        op="FlattenConcat_TRT",
        dtype=tf.float32,
    )
    concat_box_conf = gs.create_plugin_node(
        "concat_box_conf",
        output_nodes='NMS',
        op="FlattenConcat_TRT",
        dtype=tf.float32,
    )

    # Create a mapping of namespace names -> plugin nodes.
    namespace_plugin_map = {
        "MultipleGridAnchorGenerator": PriorBox,
        "Postprocessor": NMS,
        "Preprocessor": Input,
        "ToFloat": Input,
        "image_tensor": Input,
        "MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
        "concat": concat_box_loc,
        "concat_1": concat_box_conf
    }

    # Create a new graph by collapsing namespaces
    ssd_graph.collapse_namespaces(namespace_plugin_map)
    # Remove the outputs, so we just have a single output node (NMS).
    # If remove_exclusive_dependencies is True, the whole graph will be removed!
    ssd_graph.remove(ssd_graph.graph_outputs, remove_exclusive_dependencies=False)
    return ssd_graph

def model_to_uff(model_path, output_uff_path, silent=False):

    dynamic_graph = gs.DynamicGraph(model_path)
    dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph)
    
    uff.from_tensorflow(
        dynamic_graph.as_graph_def(),
        output_filename=output_uff_path,
        text=True
    )

def main():
    model_path = './ssd_frozen_inference_graph.pb'
    uff_path = './ssd_frozen_inference_graph.uff'
    ctypes.CDLL('./libflattenconcat.so')
    model_to_uff(model_path,uff_path)

if __name__ == '__main__':
    main()

I use python3 pb2uff.py,and the comline log is

WARNING:tensorflow:From /usr/lib/python3.6/dist-packages/graphsurgeon/StaticGraph.py:123: FastGFile.__init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.gfile.GFile.
WARNING: To create TensorRT plugin nodes, please use the `create_plugin_node` function instead.
UFF Version 0.5.5
=== Automatically deduced input nodes ===
[name: "Input"
op: "Placeholder"
attr {
  key: "dtype"
  value {
    type: DT_FLOAT
  }
}
attr {
  key: "shape"
  value {
    shape {
      dim {
        size: 1
      }
      dim {
        size: 3
      }
      dim {
        size: 300
      }
      dim {
        size: 300
      }
    }
  }
}
]
=========================================

=== Automatically deduced output nodes ===
[name: "NMS"
op: "NMS_TRT"
input: "Input"
input: "Squeeze"
input: "concat_box_conf"
attr {
  key: "backgroundLabelId_u_int"
  value {
    i: 0
  }
}
attr {
  key: "confSigmoid_u_int"
  value {
    i: 1
  }
}
attr {
  key: "confidenceThreshold_u_float"
  value {
    f: 1e-08
  }
}
attr {
  key: "inputOrder_u_ilist"
  value {
    list {
      i: 1
      i: 0
      i: 2
    }
  }
}
attr {
  key: "isNormalized_u_int"
  value {
    i: 1
  }
}
attr {
  key: "keepTopK_u_int"
  value {
    i: 100
  }
}
attr {
  key: "nmsThreshold_u_float"
  value {
    f: 0.6
  }
}
attr {
  key: "numClasses_u_int"
  value {
    i: 91
  }
}
attr {
  key: "scoreConverter_u_str"
  value {
    s: "SIGMOID"
  }
}
attr {
  key: "shareLocation_u_int"
  value {
    i: 1
  }
}
attr {
  key: "topK_u_int"
  value {
    i: 100
  }
}
attr {
  key: "varianceEncodedInTarget_u_int"
  value {
    i: 0
  }
}
]
==========================================

Using output node NMS
Converting to UFF graph
Warning: No conversion function registered for layer: NMS_TRT yet.
Converting NMS as custom op: NMS_TRT
Warning: No conversion function registered for layer: FlattenConcat_TRT yet.
Converting concat_box_conf as custom op: FlattenConcat_TRT
Warning: No conversion function registered for layer: FlattenConcat_TRT yet.
Converting concat_box_loc as custom op: FlattenConcat_TRT
No. nodes: 589
UFF Output written to ./ssd_frozen_inference_graph.uff
UFF Text Output written to ./ssd_frozen_inference_graph.pbtxt
  1. I write a engine.py and build_engine.py cconvett UFF file.
import sys
import os

import tensorrt as trt
import pycuda.driver as cuda
import numpy as np

def build_engine(uff_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False):
    with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = 1 << 30
        if trt_engine_datatype == trt.DataType.HALF:
            builder.fp16_mode = True
        builder.max_batch_size = batch_size

        parser.register_input("Input",(3, 300, 300))
        parser.register_output("MarkOutput_0")
        parser.parse(uff_model_path, network)

        if not silent:
            print("Building TensorRT engine. This may take few minutes.")

        return builder.build_cuda_engine(network)

def save_engine(engine, engine_dest_path):
    buf = engine.serialize()
    with open(engine_dest_path, 'wb') as f:
        f.write(buf)

def load_engine(trt_runtime, engine_path):
    with open(engine_path, 'rb') as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine
import tensorrt as trt
import tensorflow as tf
import engine
import os
import ctypes

def main():

    uff_model_path = './ssd_frozen_inference_graph.uff'
    trt_engine_path = './ssd_frozen_inference_graph.buf'
    ctypes.CDLL('./libflattenconcat.so')
    
    # TensorRT logger singleton
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # some of them will be needed during inference
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')
    # Initialize runtime needed for loading TensorRT engine from file
    trt_runtime = trt.Runtime(TRT_LOGGER)
    # TRT engine placeholder
    trt_engine = None
    
    if not os.path.exists(trt_engine_path):
        trt_engine = engine.build_engine(
            uff_model_path, TRT_LOGGER,
            trt_engine_datatype=trt.DataType.FLOAT,
            batch_size=1
        )
        # Save the engine to file
        engine.save_engine(trt_engine, trt_engine_path)

if __name__ == '__main__':
    main()

I use python3 build_engine.py,The log is :

Building TensorRT engine. This may take few minutes.
python3: nmsPlugin.cpp:135: virtual void nvinfer1::plugin::DetectionOutput::configureWithFormat(const nvinfer1::Dims*, int, const nvinfer1::Dims*, int, nvinfer1::DataType, nvinfer1::PluginFormat, int): Assertion `numPriors * numLocClasses * 4 == inputDims[param.inputOrder[0]].d[0]' failed.
Aborted (core dumped)

Thank u!

Any soulutions ?