How adapt Tensorflow object detection for custom dataset to Deepstream 5.0

Please provide complete information as applicable to your setup.
**• Hardware Platform (Jetson / GPU) GPU
**• DeepStream Version 5.0
**• JetPack Version (valid for Jetson only) None
**• TensorRT Version Same as deepstream 5.0 devel docker
**• NVIDIA GPU Driver Version (valid for GPU only) Same as deepstream 5.0 devel docker
Firstly, I run SSD example to Deepstream successfully in opt\nvidia\deepstream\deepstream-5.0\sources\objectDetector_SSD with ssd_inception_v2_coco_2017_11_17.tar.gz (91 class COCO). Then I want to retrain the model with my custom dataset (2 classes). I use Tensorflow Object Detection API (GitHub - tensorflow/models: Models and examples built with TensorFlow, version r1.13.0, TF 1.14). I retrain and export model successfully and get trained model.
My ssd_inception_v2_coco config

model {
ssd {
num_classes: 2
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: true
dropout_keep_probability: 0.8
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
}
}
}
feature_extractor {
type: ‘ssd_inception_v2’
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.001
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}

train_config: {
batch_size: 16
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: “/home/pham_t/Detection/models/research/pretrained_model/model.ckpt”
from_detection_checkpoint: true

Note: The below line limits the training process to 200K steps, which we

empirically found to be sufficient enough to train the pets dataset. This

effectively bypasses the learning rate schedule (the learning rate will

never decay). Remove the below line to train indefinitely.

num_steps: 2000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}

train_input_reader: {
tf_record_input_reader {
input_path: “/home/pham_t/Detection/data/train_labels.record”
}
label_map_path: “/home/pham_t/Detection/data/label_map.pbtxt”
}

eval_config: {
num_examples: 1839

Note: The below line limits the evaluation process to 10 evaluations.

Remove the below line to evaluate indefinitely.

max_evals: 10
}

eval_input_reader: {
tf_record_input_reader {
input_path: “/home/pham_t/Detection/data/test_labels.record”
}
label_map_path: “/home/pham_t/Detection/data/label_map.pbtxt”
shuffle: false
num_readers: 1
}

I inference successfully the trained model and get the good result in the custom dataset. So I want to adapt the trained model to Deepstream
I convert to uff model by python3 /usr/lib/python2.7/dist-packages/uff/bin/convert_to_uff.py frozen_inference_graph.pb -O NMS -p /usr/src/tensorrt/samples/sampleUffSSD/config.py -o model.uff. I only change the num_classes =3 in /usr/src/tensorrt/samples/sampleUffSSD/config.py

import graphsurgeon as gs
import tensorflow as tf

Input = gs.create_node(“Input”,
op=“Placeholder”,
dtype=tf.float32,
shape=[1, 3, 300, 300])
PriorBox = gs.create_plugin_node(name=“GridAnchor”, op=“GridAnchor_TRT”,
numLayers=6,
minSize=0.2,
maxSize=0.95,
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1,0.1,0.2,0.2],
featureMapShapes=[19, 10, 5, 3, 2, 1])
NMS = gs.create_plugin_node(name=“NMS”, op=“NMS_TRT”,
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=1e-8,
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=3,
inputOrder=[0, 2, 1],
confSigmoid=1,
isNormalized=1)
concat_priorbox = gs.create_node(name=“concat_priorbox”, op=“ConcatV2”, dtype=tf.float32, axis=2)
concat_box_loc = gs.create_plugin_node(“concat_box_loc”, op=“FlattenConcat_TRT”, dtype=tf.float32, axis=1, ignoreBatch=0)
concat_box_conf = gs.create_plugin_node(“concat_box_conf”, op=“FlattenConcat_TRT”, dtype=tf.float32, axis=1, ignoreBatch=0)

namespace_plugin_map = {
“MultipleGridAnchorGenerator”: PriorBox,
“Postprocessor”: NMS,
“Preprocessor”: Input,
“ToFloat”: Input,
“image_tensor”: Input,
“MultipleGridAnchorGenerator/Concatenate”: concat_priorbox,
“MultipleGridAnchorGenerator/Identity”: concat_priorbox,
“concat”: concat_box_loc,
“concat_1”: concat_box_conf
}

def preprocess(dynamic_graph):
# Now create a new graph by collapsing namespaces
dynamic_graph.collapse_namespaces(namespace_plugin_map)
# Remove the outputs, so we just have a single output node (NMS).
dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False

When I get the uff model, I adapt to Deepstream but I got the error:

ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: Parameter check failed at: …/builder/Layers.h::setAxis::367, condition: axis >= 0
ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: Concatenate/concat: all concat input tensors must have the same dimensions except on the concatenation axis (0), but dimensions mismatched at input 1 at index 1. Input 0 shape: [2,7668,1], Input 1 shape: [2,4332,1]
ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: Concatenate/concat: all concat input tensors must have the same dimensions except on the concatenation axis (0), but dimensions mismatched at input 1 at index 1. Input 0 shape: [2,7668,1], Input 1 shape: [2,4332,1]
ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: Concatenate/concat: all concat input tensors must have the same dimensions except on the concatenation axis (0), but dimensions mismatched at input 1 at index 1. Input 0 shape: [2,7668,1], Input 1 shape: [2,4332,1]
ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: Concatenate/concat: all concat input tensors must have the same dimensions except on the concatenation axis (0), but dimensions mismatched at input 1 at index 1. Input 0 shape: [2,7668,1], Input 1 shape: [2,4332,1]
ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: Concatenate/concat: all concat input tensors must have the same dimensions except on the concatenation axis (0), but dimensions mismatched at input 1 at index 1. Input 0 shape: [2,7668,1], Input 1 shape: [2,4332,1]
ERROR: …/nvdsinfer/nvdsinfer_func_utils.cpp:31 [TRT]: UffParser: Parser error: BoxPredictor_0/ClassPredictor/BiasAdd: The input to the Scale Layer is required to have a minimum of 3 dimensions.
ERROR: …/nvdsinfer/nvdsinfer_model_builder.cpp:370 Failed to parse UFF file: /root/ssd_deepstream/model.uff, incorrect file or incorrect input/output blob names
ERROR: …/nvdsinfer/nvdsinfer_model_builder.cpp:971 failed to build network since parsing model errors.
ERROR: …/nvdsinfer/nvdsinfer_model_builder.cpp:872 failed to build network.

My infer_primary_ssd config

[pipeline]
height=720
width=1280
fd_height=360
fd_width=640
batch_size=1
framerate=30

[property]
gpu-id=0
net-scale-factor=0.0039215697906911373

onnx-file=models/model.onnx

model-engine-file=models/mm_ssd_b1_fp32.engine
labelfile-path=coco_ssd.txt
src_uri=file:///root/ssd_deepstream/video/gun.mp4
uff-file=model.uff
uff-input-dims=3;300;300;0
uff-input-blob-name=Input

0=Group Rectangles, 1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)

cluster-mode=4
parse-bbox-func-name=NvDsInferParseCustomSSD
custom-lib-path=libs/libnvds_infermmparser.so

batch-size=1
process-mode=1
model-color-format=0

0=FP32, 1=INT8, 2=FP16 mode

network-mode=0
num-detected-classes=2
interval=0
gie-unique-id=1
output-blob-names=MarkOutput_0

The difference between my trained model and the original model is num_classes. Can anyone give me some recommendations to fix this error? Thanks!

My trained model .pb: https://drive.google.com/file/d/11QdMl_n5UmQ0Iior3Jb4wMXtoF_twwDu/view?usp=sharing

Hi,

It looks like this issue is not from Deepstream but TensorRT.
Would you mind to try your model with trtexec to see if the same error first?

$ /usr/src/tensorrt/bin/trtexec [your model config]

Thanks.

Hi @AastaLLL, Thank you for your feedback.
I run /usr/src/tensorrt/bin/trtexec --uff=model.uff and got the error
image
My CUDA version:
image
My TensorRT version:


Do you know how to solve it. Maybe I must downgrade tensorrt version to 6.
Thank you!

I tried /usr/src/tensorrt/bin/trtexec with onnx format. It is OK. It is problem with uff format

hi @AastaLLL. Can you check it? Thank you!

Hi,

uff model requires more input argument than onnx model.
Please feed uffInput and output as well.
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec
For example:

$ /usr/src/tensorrt/bin/trtexec --uff=./sample_ssd_relu6.uff --uffInput=Input,3,300,300. --output=NMS

Thanks.

hi @AastaLLL. The issue is from TensorRT.


Can you kindly check it!

Hi @AastaLLL. Do you have any update? Thank you!

Hi,

Could you try the config.py shared in this comment to see if works first?

Thanks.

hi @AastaLLL. I already used your config but the above error still happens. Can you kindly check it? My trained model .pb: https://drive.google.com/file/d/11QdMl_n5UmQ0Iior3Jb4wMXtoF_twwDu/view?usp=sharing

HI,

We are checking this for you.
How many class you are training for? (including background class)

Thanks.

Thanks @AastaLLL
I trained with 3 classes (including background class)

Hi,

There are some extra auxiliary input tensor within your model.
This requires you to do the corresponding update in the config.py to make it compatible.

We can run the model with this config.py.txt (2.6 KB) without issue.
Please help to give it a try.

$ sudo python3 /usr/lib/python3.6/dist-packages/uff/bin/convert_to_uff.py frozen_inference_graph.pb -o sample_ssd_relu6.uff -O NMS -p config.py
$ /usr/src/tensorrt/bin/trtexec --uff=./sample_ssd_relu6.uff --uffInput=Input,3,300,300 --output=NMS

Thanks.

HI @AastaLLL. I already run successfully. Thank you for your help!

I modified the input size when I fine-tuned the model.
fixed_shape_resizer {
height: 600
width: 600
and with same config.py:
import graphsurgeon as gs
import tensorflow as tf

Input = gs.create_node(“Input”,
op=“Placeholder”,
dtype=tf.float32,
shape=[1, 3, 600, 600])
PriorBox = gs.create_plugin_node(name=“GridAnchor”, op=“GridAnchor_TRT”,
numLayers=6,
minSize=0.2,
maxSize=0.95,
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1,0.1,0.2,0.2],
featureMapShapes=[19, 10, 5, 3, 2, 1])
NMS = gs.create_plugin_node(name=“NMS”, op=“NMS_TRT”,
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=1e-8,
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=32,
inputOrder=[0, 2, 1],
confSigmoid=1,
isNormalized=1)
concat_priorbox = gs.create_node(name=“concat_priorbox”, op=“ConcatV2”, dtype=tf.float32, axis=2)
concat_box_loc = gs.create_plugin_node(“concat_box_loc”, op=“FlattenConcat_TRT”, dtype=tf.float32, axis=1, ignoreBatch=0)
concat_box_conf = gs.create_plugin_node(“concat_box_conf”, op=“FlattenConcat_TRT”, dtype=tf.float32, axis=1, ignoreBatch=0)

namespace_plugin_map = {
“MultipleGridAnchorGenerator”: PriorBox,
“Postprocessor”: NMS,
“Preprocessor”: Input,
“Cast”: Input,
“ToFloat”: Input,
“image_tensor”: Input,
“MultipleGridAnchorGenerator/Concatenate”: concat_priorbox,
“MultipleGridAnchorGenerator/Identity”: concat_priorbox,
“Concatenate/concat”: concat_priorbox,
“concat”: concat_box_loc,
“concat_1”: concat_box_conf
}

def preprocess(dynamic_graph):

dynamic_graph.remove(dynamic_graph.find_nodes_by_path(namespace_remove), remove_exclusive_dependencies=False)

# Now create a new graph by collapsing namespaces
dynamic_graph.collapse_namespaces(namespace_plugin_map)
# Remove the outputs, so we just have a single output node (NMS).
dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False)

Squeeze = dynamic_graph.find_nodes_by_op('Squeeze')
dynamic_graph.forward_inputs(Squeeze)
dynamic_graph.find_nodes_by_op("NMS_TRT")[0].input.remove("Input")

then,I get the following error:

[07/22/2021-14:19:16] [I] [TRT] Some tactics do not have sufficient workspace memory to run. Increasing workspace size may increase performance, please check verbose output.
[07/22/2021-14:20:42] [I] [TRT] Detected 1 inputs and 2 output network tensors.
#assertionnmsPlugin.cpp,249
Aborted (core dumped)
Do the relevant parameters in config.py need to be adjusted?

Hi 407139362,

Please help to open a new topic. Thanks