TLT Detectnet with Standford Drone Dataset Low Average Precision

Hi,

I trained the TLT Detecnet_v2 with Stanford Drone Dataset. Before training I converted the Standford Drone Dataset’s Annotations to Kitti format and then I resized the all images as 960*540 and the corresponding bounding boxes. All of the strings(labels) are in lower case as recommended in the TLT Metropolis Documentation. I also changed the necessary stuffs in the spec folder/detectnet_v2_train_resnet18_kitti.txt w,th 250 epoch & 8 batch size.
However, here are the training results:

Validation cost: 0.000267
Mean average_precision (in %): 1.5709

class name average precision (in %)


biker 7.64553
bus 0
car 0
pedestrian 0.208886
skater 0

I do not understand why his happened, could you please help me?
Thanks in advance

2 Likes

I also suffered the same issue. Somehow tlt transfer learning is really unsuccessful at training detectNet. I have a Jetson Nano with DS 5.0.

2 Likes

@ucuzovaesra
Can you share your training spec?

Sure. Here is detectnet_v2_train_resnet18_kitti.txt
###START#####
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: “/workspace/tlt-experiments/data/tfrecords/kitti_trainval/*”
image_directory_path: “/workspace/tlt-experiments/data/training”
}
image_extension: “png”
target_class_mapping {
key: “pedestrian”
value: “pedestrian”
}
target_class_mapping {
key: “biker”
value: “biker”
}
target_class_mapping {
key: “car”
value: “car”
}
target_class_mapping {
key: “skater”
value: “skater”
}
target_class_mapping {
key: “bus”
value: “bus”
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 1248
output_image_height: 384
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: “pedestrian”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “biker”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “car”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “skater”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “bus”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
}
model_config {
pretrained_model_file: “/workspace/tlt-experiments/detectnet_v2/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.hdf5”
num_layers: 18
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
arch: “resnet”
}
evaluation_config {
validation_period_during_training: 10
first_validation_epoch: 30
minimum_detection_ground_truth_overlap {
key: “pedestrian”
value: 0.699999988079
}
minimum_detection_ground_truth_overlap {
key: “biker”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “car”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “skater”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “bus”
value: 0.5
}
evaluation_box_config {
key: “pedestrian”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “biker”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “car”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “skater”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “bus”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: “pedestrian”
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “biker”
class_weight: 8.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 1.0
}
}
target_classes {
name: “car”
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “skater”
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “bus”
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 8
num_epochs: 250
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 5e-04
soft_start: 0.10000000149
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: “pedestrian”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “biker”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “car”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “skater”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “bus”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}
####END####

From your spec as below. It is not expected. Need to modify to 960 x540 which is the resolution of your resized images/labels.

output_image_width: 1248
output_image_height: 384

More, Stanford Drone Dataset’s objects are very small.
Suggesting modify as below.

  1. Set a lower “minimum_bounding_box_height” for all the classes. For example,
    minimum_bounding_box_height: 4

  2. Set lower “minimum_width” and “minimum_width”. For example,
    minimum_width: 4
    minimum_height: 4

1 Like

Thank you I’ll try them all :)

Hi again,
Although I did all the things that you mentioned before, the model still performs really low accuracy .
Here is the spec file codes :

####START####
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: “/workspace/tlt-experiments/data/tfrecords/kitti_trainval/*”
image_directory_path: “/workspace/tlt-experiments/data/training”
}
image_extension: “png”
target_class_mapping {
key: “pedestrian”
value: “pedestrian”
}
target_class_mapping {
key: “biker”
value: “biker”
}
target_class_mapping {
key: “car”
value: “car”
}
target_class_mapping {
key: “skater”
value: “skater”
}
target_class_mapping {
key: “bus”
value: “bus”
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 960
output_image_height: 544
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: “pedestrian”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “biker”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “car”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “skater”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “bus”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
}
model_config {
pretrained_model_file: “/workspace/tlt-experiments/detectnet_v2/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.hdf5”
num_layers: 18
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
arch: “resnet”
}
evaluation_config {
validation_period_during_training: 10
first_validation_epoch: 30
minimum_detection_ground_truth_overlap {
key: “pedestrian”
value: 0.699999988079
}
minimum_detection_ground_truth_overlap {
key: “biker”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “car”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “skater”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “bus”
value: 0.5
}
evaluation_box_config {
key: “pedestrian”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “biker”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “car”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “skater”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “bus”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: “pedestrian”
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “biker”
class_weight: 8.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 1.0
}
}
target_classes {
name: “car”
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “skater”
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “bus”
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 8
num_epochs: 250
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 5e-04
soft_start: 0.10000000149
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: “pedestrian”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “biker”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “car”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “skater”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “bus”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}

END####

Could you please help me about this issue?
Thanks

What is the mAP now? Can you paste the metric here?

More, if detectnet_v2 is not a must, you can try to train with yolo_v3 network.
This standford drone dataset seems to have lots of small objects.

1 Like

Here is the score:

Validation cost: 0.000346
Mean average_precision (in %): 1.2702

class name average precision (in %)


biker 5.46505
bus 0.852878
car 0
pedestrian 0.0330187
skater 0

I wanted to train my dataset with detectnet because i realized that dashcamnet, peoplenet are all trained with resnet18. Hence, i am not sure if yolov3 will be better. Moreover, yolov3 is really slow on jetson platforms especially in Jetson nano.
Thanks for your help by the way :)

I will dig out more for this standford drone dataset.

1 Like

Thanks, I am waiting for news from you.

Hi again,
Is there any progress about the stanford drone dataset? I am waiting to hear something from you.

Best

Hi @ucuzovaesra,
Sorry for late reply. As we known, the Standford Drone dataset’s objects are very small. If your images are resized to 960*540, that will lead to smaller object. DetectNet_v2 generates 2 tensors, cov and bbox . The image is divided into 16x16 grid cells. If objects are very small, it will make training difficult.
From the slide https://info.nvidia.com/rs/156-OFN-742/images/FEDTLTWebinarFinal.pdf, it just crops the image instead of resizing.
So, I suggest make a larger resolution of images and try to train with detectnet_v2.
Currently, I am also running a small part of the dataset’s video(using deathCircle/video0) for training.
I train with 1632x1952(the video is 1630x1948) resolution.
Will update the result if I get.

1 Like

Hi @Morganh
Thanks for your attention and support. I will try again with 1632x1952 resolution. Moreover, I will be waiting for your training result :)

Share an experimental result for you.

  • Select a small part of jpgs( Using scene—deathCircle – video0 )
  • Train with 2gpus, bs4, 1632x1952 resolution , 80epochs
  • Below is the result.
    Mean average_precision (in %): 38.0967
    class name average precision (in %)
    ------------ --------------------------
    biker 46.4008
    car 94.6011
    cart 5.43446
    pedestrian 30.3374
    skater 13.7094
1 Like

Hi,
Did you train the model with frames or video?
Did you convert the annotation format to kitti format? If did so, could you please an example annotation format? Because I’m not sure if I convert it with the right way.
Also, is it possible to share your spec files?
Thanks in advance

Yes, I convert one video to jpgs via ffmpeg. Then convert the annotation into KITTI format which looks like below.

biker 0.0 0 0.0 0.0 1051.0 18.0 1132.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
pedestrian 0.0 0 0.0 914.0 876.0 959.0 937.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 1607.0 1111.0 1625.0 1180.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 0.0 1059.0 47.0 1111.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
pedestrian 0.0 0 0.0 672.0 1387.0 706.0 1444.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 0.0 1103.0 33.0 1168.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
pedestrian 0.0 0 0.0 1224.0 1233.0 1263.0 1306.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 955.0 0.0 989.0 44.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 1018.0 0.0 1061.0 60.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 774.0 1866.0 794.0 1943.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 1582.0 1095.0 1625.0 1168.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
biker 0.0 0 0.0 1582.0 1136.0 1625.0 1205.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
pedestrian 0.0 0 0.0 1068.0 1416.0 1102.0 1477.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

Attach the spec for your reference.

random_seed: 42
dataset_config {
data_sources {
tfrecords_path: “/workspace/standford_campus_data/videos/deathCircle/tfrecords/*”
image_directory_path: “/workspace/standford_campus_data/videos/deathCircle”
}
image_extension: “jpg”
target_class_mapping {
key: “pedestrian”
value: “pedestrian”
}
target_class_mapping {
key: “biker”
value: “biker”
}
target_class_mapping {
key: “car”
value: “car”
}
target_class_mapping {
key: “skater”
value: “skater”
}
target_class_mapping {
key: “cart”
value: “cart”
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 1632
output_image_height: 1952
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: “pedestrian”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “biker”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “car”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “skater”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: “cart”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 4
}
}
}
}
model_config {
pretrained_model_file: “/workspace/lpd/resnet18.hdf5”
num_layers: 18
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
arch: “resnet”
}
evaluation_config {
validation_period_during_training: 40
first_validation_epoch: 30
minimum_detection_ground_truth_overlap {
key: “pedestrian”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “biker”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “car”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “skater”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “cart”
value: 0.5
}
evaluation_box_config {
key: “pedestrian”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “biker”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “car”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “skater”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
evaluation_box_config {
key: “cart”
value {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: “pedestrian”
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “biker”
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “car”
class_weight: 6.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “skater”
class_weight: 12.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “cart”
class_weight: 20.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 4
num_epochs: 80
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 5e-04
soft_start: 0.10000000149
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: “pedestrian”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “biker”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “car”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “skater”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “cart”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}

1 Like

Hi,
Thank you very much for your help and response.
My model’s mAP scores increase thanks to your help :)