I’m working with TrafficCamNet and have couple of questions:
1) I am testing TrafficCamNet on part of COCO dataset, which contains only cars category, however there are images without cars in this part. I evaluate the model on the dataset and receive AP - 4.1 with iou = 0.5. It seems that AP is lower that it supposed to be. Could anyone shed light on the cause of the problem?
Evaluation config:
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: “/workspace/detector/data/mscoco_with_labels/tfrecords/*”
image_directory_path: “/workspace/detector/data/mscoco_with_labels/”
}
image_extension: “jpg”
target_class_mapping {
key: “car”
value: “car”
}
target_class_mapping {
key: “two-weeler”
value: “two-weeler”
}
target_class_mapping {
key: “person”
value: “person”
}
target_class_mapping {
key: “road_sign”
value: “road_sign”
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 960
output_image_height: 544
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: “car”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “two-weeler”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “person”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “road_sign”
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}}}}
model_config {
pretrained_model_file: “/workspace/detector/models/tlt_trafficcamnet_vunpruned_v1.0/resnet18_trafficcamnet.tlt”
num_layers: 18
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
arch: “resnet”
}
evaluation_config {
validation_period_during_training: 10
first_validation_epoch: 30
minimum_detection_ground_truth_overlap {
key: “car”
value: 0.699999988079
}
minimum_detection_ground_truth_overlap {
key: “two-weeler”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “person”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “road_sign”
value: 0.5
}
evaluation_box_config {
key: “car”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “two-weeler”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “person”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: “road_sign”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
2) Then I finetune the model on kitti dataset(cars category as in the above dataset). The model hits AP - 77.56 on validation dataset from kitti. However, the trained model on kitti test set with config above produces the following results:
What can be the cause for difference in AP on validation data and test data?
Train config:
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: “/workspace//kitti/tfrecords/”
image_directory_path: “/workspace/*/kitti/train/”
}
image_extension: “png”
target_class_mapping {
key: “car”
value: “car”
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 960
output_image_height: 544
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: “car”
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}}}}
model_config {
num_layers: 18
pretrained_model_file: “/workspace/ssd/m.ageeva/nvidia_toolkit/detector/tlt_trafficcamnet_vunpruned_v1.0/resnet18_trafficcamnet.tlt”
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
arch: “resnet”
all_projections: true
}
evaluation_config {
validation_period_during_training: 5
first_validation_epoch: 5
minimum_detection_ground_truth_overlap {
key: “car”
value: 0.699999988079
}
evaluation_box_config {
key: “car”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: “car”
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 4
num_epochs: 120
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 1e-05
max_learning_rate: 1e-03
soft_start: 0.10000000149
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: “car”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}
Inference config:
inferencer_config{
target_classes: “car”
image_width: 960
image_height: 544
image_channels: 3
batch_size: 16
gpu_index: 2
tlt_config{
model: “/workspace/detector/models/kitti_1class.tlt”
}
}
bbox_handler_config{
kitti_dump: true
disable_overlay: false
overlay_linewidth: 2
classwise_bbox_handler_config{
key:“car”
value: {
confidence_model: “aggregate_cov”
output_map: “car”
confidence_threshold: 0.9
bbox_color{
R: 255
G: 255
B: 0
}
clustering_config{
coverage_threshold: 0.00
dbscan_eps: 0.3
dbscan_min_samples: 0.05
minimum_bounding_box_height: 4
}}}
classwise_bbox_handler_config{
key:“default”
value: {
confidence_model: “aggregate_cov”
confidence_threshold: 0.9
bbox_color{
R: 255
G: 0
B: 0
}
clustering_config{
coverage_threshold: 0.00
dbscan_eps: 0.3
dbscan_min_samples: 0.05
minimum_bounding_box_height: 4
}}}}