Inferencing pretrained custom tensoflow ssd-mobilnetv2 model using tensorRT and detectnet

I have converted the pretrained SSD-mobilnetv2 model to a UFF file using the following file. It has 11 classes and the input resolution is 500x500. When I tried to inference the model using the detectnet it works fine and an engine file is created without any issue. but the F1 of the model decreases from 85% to 75 % even the FP32 is used as precision and there Is no large difference between the F1 measures between the FP16 and FP32 model. Is this normal for a converted model.

import os
import sys
import tarfile

import requests
import tensorflow as tf
import tensorrt as trt
import graphsurgeon as gs
import uff

def ssd_unsupported_nodes_to_plugin_nodes(ssd_graph):

    # Find and remove all Assert Tensorflow nodes from the graph
    all_assert_nodes = ssd_graph.find_nodes_by_op("Assert")
    ssd_graph.remove(all_assert_nodes, remove_exclusive_dependencies=True)
    # Find all MultipleGridAnchorGenerator nodes and forward their inputs
    #all_identity_nodes = ssd_graph.find_nodes_by_op("Identity[0-5]")
    #ssd_graph.forward_inputs(all_identity_nodes)

    # Create TRT plugin nodes to replace unsupported ops in Tensorflow graph
    channels = 3
    height = 500
    width = 500

    Input = gs.create_plugin_node(name="Input",
        op="Placeholder",
        dtype=tf.float32,
        shape=[1, channels, height, width])
    PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
        minSize=0.2,
        maxSize=0.95,
        aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
        variance=[0.1,0.1,0.2,0.2],
        featureMapShapes=[32, 16, 8, 4, 2, 1], 
        numLayers=6
    )
    NMS = gs.create_plugin_node(
        name="NMS",
        op="NMS_TRT",
        shareLocation=1,
        varianceEncodedInTarget=0,
        backgroundLabelId=0,
        confidenceThreshold=1e-8,
        nmsThreshold=0.3,
        topK=100,
        keepTopK=100,
        numClasses=12,
        inputOrder=[0, 2, 1],
        confSigmoid=1,
        IsNormalized=1,
        scoreConverter="SIGMOID"
    )
    concat_priorbox = gs.create_node(
        "concat_priorbox",
        op="ConcatV2",
        dtype=tf.float32,
        axis=2
    )
    concat_box_loc = gs.create_plugin_node(
        "concat_box_loc",
        op="FlattenConcat_TRT",
        dtype=tf.float32,
        axis=1,
        ignoreBatch=0
    )
    concat_box_conf = gs.create_plugin_node(
        "concat_box_conf",
        op="FlattenConcat_TRT",
        dtype=tf.float32,
        axis=1,
        ignoreBatch=0
    )
    #NMS.input.extend([tensor.op.name for tensor in 
[concat_box_loc,concat_box_conf,concat_priorbox]])

    # Create a mapping of namespace names -> plugin nodes.
    namespace_plugin_map = {
        "Postprocessor": NMS,
        "Preprocessor": Input,
        "ToFloat": Input,
        "image_tensor": Input,
        "MultipleGridAnchorGenerator": PriorBox,
        "Concatenate/concat": concat_priorbox,
        "Squeeze": concat_box_loc,
        "concat_1": concat_box_conf
    }

    # Create a new graph by collapsing namespaces
    ssd_graph.collapse_namespaces(namespace_plugin_map)
    # Remove the outputs, so we just have a single output node (NMS).
    # If remove_exclusive_dependencies is True, the whole graph will be removed!
    ssd_graph.remove(ssd_graph.graph_outputs, remove_exclusive_dependencies=False)
    # Disconnect the Input node from NMS, as it expects to have only 3 inputs
    ssd_graph.find_nodes_by_op('NMS_TRT')[0].input.remove('Input')

    return ssd_graph

def model_to_uff(model_path, output_uff_path, silent=False):

    dynamic_graph = gs.DynamicGraph(model_path)
    dynamic_graph = ssd_unsupported_nodes_to_plugin_nodes(dynamic_graph)

    uff.from_tensorflow(
        dynamic_graph.as_graph_def(),
        ['NMS'],
        output_filename=output_uff_path,
        text=False,
        quiet=False
    )

def main():
    model_path = 'ssd500/frozen_inference_graph.pb'
    uff_path = 'ssd_mobilenet_v2_road_500.uff'
    model_to_uff(model_path,uff_path)

if __name__ == '__main__':
    main()

@dusty_nv can you help me in this case

How are you measuring the F1 score? Is it computed from the coordinates of the post-processed bounding boxes, or the raw outputs from the model? If the former, the clustering that I implemented is probably not the same that was being done in TensorFlow (NMS). Also you will want to check that the pre-processing is being done in the same way (i.e. using the same pixel normalization coefficients, any scaling/cropping, ect).

@dusty_nv I use the processed out put coordinates of the bounding boxes that gets returned by the detection pytjon funtion in detectnet.py. (first one you mentioned)so is there any workaround so that i can get a higher F1 score?

You could try re-implementing my clustering (which is optimized for speed) with the same algorithm that TF is using. Here is where it is in my code:

https://github.com/dusty-nv/jetson-inference/blob/2fb798e3e4895b51ce7315826297cf321f4bd577/c/detectNet.cpp#L902

@dusty_nv Thank you for the reply and I’ll try that method and can you also direct me to the location of preprocessing in your repository.

The preprocessing is found here: https://github.com/dusty-nv/jetson-inference/blob/2fb798e3e4895b51ce7315826297cf321f4bd577/c/detectNet.cpp#L716