python3: TensorRT nmsPlugin.cpp:54: error

I am trying to run SSdlite_mobilenet_v2_cocoo on jetson nano. I was able to run ssd_inception_v2_coco(2017), ssd_mobilenet_v1_coco and ssd_mobilenet_v2_coco successfully following https://github.com/taka-wang/trt_example repo. But when I am trying to convert .pb file of ssdlite_mobilenet_v2_cocoo modell to tmp.uff and tmp.bin
this is the error:

python3: nmsPlugin.cpp:54: virtual nvinfer1::Dims nvinfer1::plugin::DetectionOutput::getOutputDimensions(int, const nvinfer1::Dims*, int): Assertion `nbInputDims == 3’ failed.
Aborted (core dumped)", this error is coming…

This is file of ssdlite confuguration:

import graphsurgeon as gs

path = ‘frozen_inference_graph.pb’
TRTbin = ‘tmp.bin’
output_name = [‘NMS’]
dims = [3,300,300]
layout = 7

def add_plugin(graph):
all_assert_nodes = graph.find_nodes_by_op(“Assert”)
graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)

all_identity_nodes = graph.find_nodes_by_op("Identity")
graph.forward_inputs(all_identity_nodes)

Input = gs.create_plugin_node(
    name="Input",
    op="Placeholder",
    shape=[1, 3, 300, 300]
)

PriorBox = gs.create_plugin_node(
    name="GridAnchor",
    op="GridAnchor_TRT",
    minSize=0.2,
    maxSize=0.95,
    aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
    variance=[0.1,0.1,0.2,0.2],
    featureMapShapes=[19, 10, 5, 3, 2, 1],
    numLayers=6
)

NMS = gs.create_plugin_node(
    name="NMS",
    op="NMS_TRT",
    shareLocation=1,
    varianceEncodedInTarget=0,
    backgroundLabelId=0,
    confidenceThreshold=1e-8,
    nmsThreshold=0.6,
    topK=100,
    keepTopK=100,
    numClasses=91,
    inputOrder=[1, 2, 0], # its mine
    confSigmoid=1,
    isNormalized=1
)

concat_priorbox = gs.create_node(
    "concat_priorbox",
    op="ConcatV2",
    axis=2
)

concat_box_loc = gs.create_plugin_node(
    "concat_box_loc",
    op="FlattenConcat_TRT",
)

concat_box_conf = gs.create_plugin_node(
    "concat_box_conf",
    op="FlattenConcat_TRT",
)

namespace_plugin_map = {
    "MultipleGridAnchorGenerator": PriorBox,
    "Postprocessor": NMS,
    "Preprocessor": Input,
    "ToFloat": Input,
    "image_tensor": Input,
    "Concatenate": concat_priorbox,
    "concat": concat_box_loc,
    "concat_1": concat_box_conf
}

graph.collapse_namespaces(namespace_plugin_map)
graph.remove(graph.graph_outputs, remove_exclusive_dependencies=False)
graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input")

return graph

Hi,

The output layer between ssdlite_mobilenet_v2 and ssd_mobilenet_v2 are different.
Please update the output name into ‘BoxPredictor_0/ClassPredictor_depthwise/Relu6’.

Here is the change we use for the model ssdlite_mobilenet_v2:

diff --git a/config/model_ssd_mobilenet_v2_coco_2018_03_29.py b/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
index 3c9f3b8..7d8ff82 100644
--- a/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
+++ b/config/model_ssd_mobilenet_v2_coco_2018_03_29.py
@@ -1,8 +1,8 @@
 import graphsurgeon as gs
 
-path = 'model/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb'
-TRTbin = 'TRT_ssd_mobilenet_v2_coco_2018_03_29.bin'
-output_name = ['NMS']
+path = 'model/ssdlite_mobilenet_v2_coco_2018_05_09/frozen_inference_graph.pb'
+TRTbin = 'TRT_ssdlite_mobilenet_v2_coco_2018_05_09.bin'
+output_name = ['BoxPredictor_0/ClassPredictor_depthwise/Relu6']
 dims = [3,300,300]
 layout = 7

Thanks.

I tried what you have suggested but while converting below is the trace back comes at end. whilefFor other three models mentioned in my previous post, it run successfully.

Traceback (most recent call last):
File “convert.py”, line 88, in
cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream)
IndexError: list index out of range

Below is the convert.py file :

import os
import sys
import cv2
import time
import ctypes
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda

import uff
import tensorrt as trt
import graphsurgeon as gs
#from config import model_ssd_inception_v2_coco_2017_11_17 as model
#from config import model_ssd_mobilenet_v1_coco_2018_01_28 as model
#from config import model_ssd_mobilenet_v2_coco_2018_03_29 as model
from config import model_ssdlite_mobilenet_v2_coco_2018_05_09 as model
import utils.coco as coco_utils # COCO dataset descriptors
COCO_LABELS = coco_utils.COCO_CLASSES_LIST # change this

ctypes.CDLL(“lib/libflattenconcat.so”)

initialize

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, ‘’)
runtime = trt.Runtime(TRT_LOGGER)

compile model into TensorRT

if not os.path.isfile(model.TRTbin):
dynamic_graph = model.add_plugin(gs.DynamicGraph(model.path))
uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename=‘tmp.uff’)

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    builder.max_workspace_size = 1 << 28
    builder.max_batch_size = 1
    builder.fp16_mode = True

    parser.register_input('Input', model.dims)
    parser.register_output('MarkOutput_0')
    parser.parse('tmp.uff', network)
    engine = builder.build_cuda_engine(network)

    buf = engine.serialize()
    with open(model.TRTbin, 'wb') as f:
        f.write(buf)

create engine

with open(model.TRTbin, ‘rb’) as f:
buf = f.read()
engine = runtime.deserialize_cuda_engine(buf)

create buffer

host_inputs =
cuda_inputs =
host_outputs =
cuda_outputs =
bindings =
stream = cuda.Stream()

for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)

bindings.append(int(cuda_mem))
if engine.binding_is_input(binding):
    host_inputs.append(host_mem)
    cuda_inputs.append(cuda_mem)
else:
    host_outputs.append(host_mem)
    cuda_outputs.append(cuda_mem)

context = engine.create_execution_context()

inference

#TODO enable video pipeline
#TODO using pyCUDA for preprocess
ori = cv2.imread(sys.argv[1])
image = cv2.cvtColor(ori, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (model.dims[2],model.dims[1]))
image = (2.0/255.0) * image - 1.0
image = image.transpose((2, 0, 1))
np.copyto(host_inputs[0], image.ravel())

start_time = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
print("execute times "+str(time.time()-start_time))

output = host_outputs[0]
height, width, channels = ori.shape
for i in range(int(len(output)/model.layout)):
prefix = i*model.layout
index = int(output[prefix+0])
label = int(output[prefix+1])
conf = output[prefix+2]
xmin = int(output[prefix+3]*width)
ymin = int(output[prefix+4]*height)
xmax = int(output[prefix+5]*width)
ymax = int(output[prefix+6]*height)

if conf > 0.7:
    print("Detected {} with confidence {}".format(COCO_LABELS[label], "{0:.0%}".format(conf)))
    cv2.rectangle(ori, (xmin,ymin), (xmax, ymax), (0,0,255),3)
    cv2.putText(ori, COCO_LABELS[label],(xmin+10,ymin+10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

cv2.imwrite(“result.jpg”, ori)
cv2.imshow(“result”, ori)
cv2.waitKey(0)

Hi @kanakmanjari,
I encountered the same error, Can you solve the problem?

I followed your change for the model ssdlite_mobilenet_v2. But I got the wrong result below, can you check it? thanks!

=== Automatically deduced input nodes ===
[name: "Input"
op: "Placeholder"
attr {
  key: "dtype"
  value {
    type: DT_FLOAT
  }
}
attr {
  key: "shape"
  value {
    shape {
      dim {
        size: 1
      }
      dim {
        size: 3
      }
      dim {
        size: 300
      }
      dim {
        size: 300
      }
    }
  }
}
]
=========================================

Using output node BoxPredictor_0/ClassPredictor_depthwise/Relu6
Converting to UFF graph
DEBUG [/usr/lib/python3.6/dist-packages/uff/converters/tensorflow/converter.py:96] Marking ['BoxPredictor_0/ClassPredictor_depthwise/Relu6'] as outputs
No. nodes: 231
UFF Output written to tmp.uff
[TensorRT] INFO: Some tactics do not have sufficient workspace memory to run. Increasing workspace size may increase performance, please check verbose output.
[TensorRT] INFO: Detected 1 inputs and 1 output network tensors.
infer times 0.00864863395690918
Detected bicycle with confidence 84%
Detected unlabeled with confidence 79%
Detected unlabeled with confidence 138%
Detected unlabeled with confidence 81%
Detected unlabeled with confidence 147%
Detected unlabeled with confidence 171%
Detected unlabeled with confidence 87%
Detected unlabeled with confidence 234%
Detected unlabeled with confidence 97%
Detected unlabeled with confidence 167%
Detected unlabeled with confidence 130%
Detected unlabeled with confidence 113%
Detected unlabeled with confidence 92%
Detected unlabeled with confidence 84%
Detected unlabeled with confidence 314%
Detected unlabeled with confidence 121%
Detected unlabeled with confidence 123%
Detected unlabeled with confidence 117%
Detected unlabeled with confidence 117%
Detected car with confidence 130%
Detected unlabeled with confidence 83%
Detected unlabeled with confidence 107%
Detected unlabeled with confidence 306%
Detected unlabeled with confidence 180%
Detected unlabeled with confidence 153%
Detected unlabeled with confidence 145%
Detected unlabeled with confidence 162%
Detected unlabeled with confidence 178%
Detected unlabeled with confidence 326%
Detected unlabeled with confidence 71%
Detected unlabeled with confidence 319%
Detected person with confidence 252%
Detected unlabeled with confidence 111%
Detected unlabeled with confidence 358%
Detected unlabeled with confidence 74%
Detected unlabeled with confidence 88%
Detected unlabeled with confidence 137%
Detected unlabeled with confidence 149%
Detected unlabeled with confidence 114%
Detected unlabeled with confidence 244%
Detected unlabeled with confidence 145%
Detected unlabeled with confidence 104%
Detected unlabeled with confidence 95%
Detected unlabeled with confidence 98%
Detected unlabeled with confidence 167%
Detected unlabeled with confidence 82%
Detected unlabeled with confidence 216%
Detected unlabeled with confidence 98%
Detected unlabeled with confidence 165%
Detected unlabeled with confidence 100%
Detected unlabeled with confidence 106%
Detected unlabeled with confidence 157%
Detected unlabeled with confidence 163%
Detected unlabeled with confidence 194%
Detected unlabeled with confidence 119%
Detected unlabeled with confidence 158%
Detected unlabeled with confidence 155%
Detected unlabeled with confidence 74%
Detected unlabeled with confidence 136%
Detected unlabeled with confidence 115%
Detected unlabeled with confidence 76%
Detected unlabeled with confidence 113%
Detected unlabeled with confidence 74%
Detected unlabeled with confidence 96%
Detected person with confidence 90%
Detected unlabeled with confidence 268%
Detected unlabeled with confidence 173%
Detected unlabeled with confidence 136%
Detected unlabeled with confidence 74%
Detected unlabeled with confidence 72%
Detected unlabeled with confidence 78%
Detected unlabeled with confidence 79%
Detected unlabeled with confidence 82%
Detected unlabeled with confidence 263%
Detected unlabeled with confidence 161%
Detected unlabeled with confidence 76%
Detected unlabeled with confidence 110%
Detected unlabeled with confidence 90%
Detected unlabeled with confidence 134%
Detected unlabeled with confidence 185%
Detected unlabeled with confidence 235%
Detected unlabeled with confidence 135%
Detected unlabeled with confidence 245%
Detected unlabeled with confidence 112%
Detected unlabeled with confidence 280%
Detected unlabeled with confidence 164%
Detected unlabeled with confidence 186%
Detected unlabeled with confidence 71%
Detected unlabeled with confidence 96%
Detected unlabeled with confidence 109%
Detected unlabeled with confidence 99%
Detected unlabeled with confidence 205%
Detected unlabeled with confidence 72%
Detected unlabeled with confidence 236%
Detected unlabeled with confidence 146%
Detected unlabeled with confidence 163%
Detected unlabeled with confidence 143%
Detected unlabeled with confidence 104%
Detected unlabeled with confidence 110%
Detected unlabeled with confidence 144%
Detected unlabeled with confidence 79%
Detected unlabeled with confidence 174%
Detected unlabeled with confidence 140%
Detected unlabeled with confidence 82%
Detected unlabeled with confidence 217%
Detected unlabeled with confidence 82%
Detected unlabeled with confidence 128%
Detected unlabeled with confidence 200%
Detected unlabeled with confidence 318%
Detected unlabeled with confidence 94%
Detected unlabeled with confidence 84%
Detected unlabeled with confidence 159%
Detected unlabeled with confidence 149%
Detected unlabeled with confidence 108%
Detected person with confidence 76%
Detected unlabeled with confidence 81%
Detected unlabeled with confidence 86%
Detected unlabeled with confidence 310%
Detected unlabeled with confidence 103%
Detected unlabeled with confidence 133%
Detected unlabeled with confidence 120%
Detected unlabeled with confidence 74%
Detected unlabeled with confidence 259%
Detected unlabeled with confidence 93%
Detected unlabeled with confidence 162%
Detected person with confidence 230%
Detected unlabeled with confidence 97%
Detected unlabeled with confidence 237%
Detected unlabeled with confidence 99%
Detected unlabeled with confidence 119%
Detected unlabeled with confidence 90%
Detected unlabeled with confidence 180%
Detected unlabeled with confidence 202%
Detected unlabeled with confidence 101%
Detected unlabeled with confidence 76%
Detected unlabeled with confidence 100%
Detected unlabeled with confidence 220%
Detected unlabeled with confidence 82%
Detected unlabeled with confidence 214%
Detected unlabeled with confidence 199%
Detected unlabeled with confidence 316%
Detected unlabeled with confidence 82%
Detected unlabeled with confidence 88%
Detected unlabeled with confidence 264%
Detected unlabeled with confidence 242%
Detected unlabeled with confidence 161%
Detected unlabeled with confidence 135%
Detected unlabeled with confidence 81%
Detected unlabeled with confidence 97%
Detected unlabeled with confidence 208%
Detected unlabeled with confidence 84%
Detected unlabeled with confidence 106%
Detected unlabeled with confidence 168%
Detected unlabeled with confidence 122%
Detected unlabeled with confidence 103%
Detected unlabeled with confidence 71%
Detected unlabeled with confidence 102%
Detected unlabeled with confidence 99%
Detected unlabeled with confidence 88%
Detected unlabeled with confidence 83%
Detected unlabeled with confidence 126%
Detected unlabeled with confidence 384%
Detected unlabeled with confidence 234%
Detected unlabeled with confidence 76%
Detected unlabeled with confidence 116%
Detected unlabeled with confidence 275%
Detected unlabeled with confidence 116%
Detected unlabeled with confidence 308%
Detected unlabeled with confidence 265%
Detected unlabeled with confidence 121%
Detected unlabeled with confidence 145%
Detected unlabeled with confidence 153%
Detected unlabeled with confidence 193%
Detected unlabeled with confidence 131%
Detected unlabeled with confidence 77%
Detected unlabeled with confidence 153%
Detected unlabeled with confidence 99%
Detected unlabeled with confidence 76%
Detected unlabeled with confidence 73%
Detected unlabeled with confidence 166%
Detected unlabeled with confidence 174%
Detected unlabeled with confidence 97%
Detected bicycle with confiden