I have trained a 512x512 ssd-inception-v2 model from tensorflow and also convert its pb file into uff format. But the thing is it occurs error when running this uff model.
Begin parsing model...
End parsing model...
Begin building engine...
sample_uff_ssd: nmsPlugin.cpp:135: virtual void nvinfer1::plugin::DetectionOutput::configureWithFormat(const nvinfer1::Dims*, int, const nvinfer1::Dims*, int, nvinfer1::DataType, nvinfer1::PluginFormat, int): Assertion `numPriors * numLocClasses * 4 == inputDims[param.inputOrder[0]].d[0]' failed.
Aborted (core dumped)
I know that the offical sample in sampleUffSSD is a 300x300 model, so I change property in config.py. Here is my config file:
import graphsurgeon as gs
import tensorflow as tf
Input = gs.create_node("Input",
op="Placeholder",
dtype=tf.float32,
shape=[1, 3, 512, 512])
PriorBox = gs.create_plugin_node(name="GridAnchor", op="GridAnchor_TRT",
numLayers=6,
minSize=0.2,
maxSize=0.95,
aspectRatios=[1.0, 2.0, 0.5, 3.0, 0.33],
variance=[0.1,0.1,0.2,0.2],
#featureMapShapes=[19, 10, 5, 3, 2, 1])
featureMapShapes=[33, 18, 9, 6, 4, 2])
NMS = gs.create_plugin_node(name="NMS", op="NMS_TRT",
shareLocation=1,
varianceEncodedInTarget=0,
backgroundLabelId=0,
confidenceThreshold=1e-8,
nmsThreshold=0.6,
topK=100,
keepTopK=100,
numClasses=9,
inputOrder=[0, 2, 1],
#inputOrder=[0, 1,2],
confSigmoid=1,
isNormalized=1,
scoreConverter="SIGMOID")
concat_priorbox = gs.create_node(name="concat_priorbox", op="ConcatV2", dtype=tf.float32, axis=2)
concat_box_loc = gs.create_plugin_node("concat_box_loc", op="FlattenConcat_TRT", dtype=tf.float32, axis=1,
ignoreBatch=0)
concat_box_conf = gs.create_plugin_node("concat_box_conf", op="FlattenConcat_TRT", dtype=tf.float32, axis=1,
ignoreBatch=0)
namespace_plugin_map = {
"MultipleGridAnchorGenerator": PriorBox,
"Postprocessor": NMS,
"Preprocessor": Input,
# "ToFloat": Input,
# "image_tensor": Input,
"MultipleGridAnchorGenerator/Concatenate": concat_priorbox,
#"Concatenate/concat": concat_priorbox,
"concat": concat_box_loc,
"concat_1": concat_box_conf,
}
namespace_remove = {
"ToFloat",
"image_tensor",
"Preprocessor/map/TensorArrayStack_1/TensorArrayGatherV3",
}
def preprocess(dynamic_graph):
# remove the unrelated or error layers
dynamic_graph.remove(dynamic_graph.find_nodes_by_path(namespace_remove), remove_exclusive_dependencies=False)
# Now create a new graph by collapsing namespaces
dynamic_graph.collapse_namespaces(namespace_plugin_map)
# Remove the outputs, so we just have a single output node (NMS).
dynamic_graph.remove(dynamic_graph.graph_outputs, remove_exclusive_dependencies=False)
# Remove the Squeeze to avoid "Assertion `isPlugin(layerName)' failed"
Squeeze = dynamic_graph.find_node_inputs_by_name(dynamic_graph.graph_outputs[0], 'Squeeze')
dynamic_graph.forward_inputs(Squeeze)
And also this is my training pipeline
# SSD with Inception v2 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 8
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: true
dropout_keep_probability: 0.5
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
}
}
}
feature_extractor {
type: 'ssd_inception_v2'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
anchorwise_output: true
}
}
localization_loss {
weighted_smooth_l1 {
anchorwise_output: true
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 16
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.001
decay_steps: 150720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "/home/hite/Downloads/oldverison_TFmodel/models/research/object_detection/ssd_model/ssd_inception_v2_coco_2017_11_17/model.ckpt"
from_detection_checkpoint: true
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 500000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "/home/hite/Downloads/oldverison_TFmodel/models/research/object_detection/ssd_model/pascal_train.record"
}
label_map_path: "/home/hite/Downloads/oldverison_TFmodel/models/research/object_detection/ssd_model/pascal_label_map.pbtxt"
}
eval_config: {
num_examples: 8000
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
eval_input_reader: {
tf_record_input_reader {
input_path: "/home/hite/Downloads/oldverison_TFmodel/models/research/object_detection/ssd_model/pascal_train.record"
}
label_map_path: "/home/hite/Downloads/oldverison_TFmodel/models/research/object_detection/ssd_model/pascal_label_map.pbtxt"
shuffle: false
num_readers: 1
num_epochs: 1
}
Is there anyone has methods to fix this problem