Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU) : Jetson
• DeepStream Version : DeepStream 6
• JetPack Version (valid for Jetson only) : Jetpack 4.6 (32.6.1)
• TensorRT Version : 8.0.6.1
• Issue Type( questions, new requirements, bugs) : Question
Hello Everyone, when I am performing inference on the video stream I get multiple bounding boxes for the same detection !!
However, this behavior started after using a different model file which was trained on a comparatively large number of images as compared to the previous model file !!
For the previous model file, the inference configuration was used as follows !!
[property]
gpu-id=0
gie-unique-id=1
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=1
network-type=0
process-mode=1
#force-implicit-batch-dim=1
#batch-size=1
model-color-format=0
#maintain-aspect-ratio=1
net-scale-factor=0.0039215697906911373
## 1=DBSCAN, 2=NMS, 3= DBSCAN+NMS Hybrid, 4 = None(No clustering)
cluster-mode=3
infer-dims=3;544;960
uff-input-blob-name=input_1
output-blob-names=output_bbox/BiasAdd;output_cov/Sigmoid
num-detected-classes=3
interval=3
enable-dla=0
use-dla-core=0
[class-attrs-all]
group-threshold=1
pre-cluster-threshold=0.23
#post-cluster-threshold=0.1
#nms-iou-threshold=0.2
nms-iou-threshold=0.6
minBoxes=3
dbscan-min-score=1.1
eps=0.1
detected-min-w=20
detected-min-h=20
I would like to know which parameters can be tweaked so that I can get only one bounding box per detection for the new model file!!
The spec file used for training the new model file with a large number of images is as follows:
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: "/workspace/tao-experiments/data/tfrecords/kitti_trainval/*"
image_directory_path: "/workspace/tao-experiments/data/training_coco"
}
image_extension: "jpg"
target_class_mapping {
key: "person"
value: "person"
}
target_class_mapping {
key: "face"
value: "face"
}
target_class_mapping {
key: "bag"
value: "bag"
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 960
output_image_height: 544
crop_right: 960
crop_bottom: 544
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
rotate_rad_max: 0.174
}
color_augmentation {
color_shift_stddev: 0.0
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: "person"
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: "bag"
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: "face"
value {
clustering_config {
coverage_threshold: 0.005
dbscan_eps: 0.15
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
}
model_config {
pretrained_model_file: "/workspace/tao-experiments/detectnet_v2/tlt_peoplenet_vunpruned_v2.1/resnet34_peoplenet.tlt"
arch: "resnet"
num_layers: 34
load_graph: true
all_projections: true
# use_pooling: false
use_batch_norm: true
# freeze_bn = true
freeze_blocks: 0
freeze_blocks: 1
freeze_blocks: 2
freeze_blocks: 3
freeze_blocks: 4
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
}
evaluation_config {
validation_period_during_training: 10
first_validation_epoch: 1
minimum_detection_ground_truth_overlap {
key: "person"
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: "bag"
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: "face"
value: 0.5
}
evaluation_box_config {
key: "person"
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: "bag"
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: "face"
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: "person"
class_weight: 10.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: "bag"
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: "face"
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 32
num_epochs: 120
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 2e-07
max_learning_rate: 5e-04
soft_start: 0.1
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 5
}
bbox_rasterizer_config {
target_class_config {
key: "person"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: "bag"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: "face"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}
Looking forward to your responses !!