thanks Morgan.
I started another round of training with part of data from public Open-Image, these public data only contains class: Bicycle
and People
, so the summary for mixed dataset of mine is:
- Total images count
3296
- Images source
2296 from propriertary
1000 from Open-Image
- Overall labels distribution
“electric_bicycle”: 2041,
“people”: 4439, 30% from Open-Image
“another_custom_obj”: 2242,
“bicycle”: 1975, 99% from Open-Image
Training specs
detectnet_v2_tfrecords_kitti_trainval.txt:
TFrecords conversion spec file for kitti training
kitti_config {
root_directory_path: “/workspace/tao-experiments/data/training”
image_dir_name: “image_2”
label_dir_name: “label_2”
image_extension: “.jpg”
partition_mode: “random”
num_partitions: 2
val_split: 8
num_shards: 10
}
image_directory_path: “/workspace/tao-experiments/data/training”
detectnet_v2_train_resnet18_kitti.txt:
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: “/workspace/tao-experiments/data/tfrecords/kitti_trainval/*”
image_directory_path: “/workspace/tao-experiments/data/training”
}
image_extension: “jpg”
target_class_mapping {
key: “another_custom_obj”
value: “another_custom_obj”
}
target_class_mapping {
key: “people”
value: “people”
}
target_class_mapping {
key: “electric_bicycle”
value: “electric_bicycle”
}
target_class_mapping {
key: “bicycle”
value: “bicycle”
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 960
output_image_height: 1280
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 0.5
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: “another_custom_obj”
value {
clustering_config {
clustering_algorithm: DBSCAN
dbscan_confidence_threshold: 0.9
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 10
}
}
}
target_class_config {
key: “people”
value {
clustering_config {
clustering_algorithm: DBSCAN
dbscan_confidence_threshold: 0.9
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “electric_bicycle”
value {
clustering_config {
clustering_algorithm: DBSCAN
dbscan_confidence_threshold: 0.9
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
target_class_config {
key: “bicycle”
value {
clustering_config {
clustering_algorithm: DBSCAN
dbscan_confidence_threshold: 0.9
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 20
}
}
}
}
model_config {
pretrained_model_file: “/workspace/tao-experiments/detectnet_v2/pretrained_resnet18/pretrained_detectnet_v2_vresnet18/resnet18.hdf5”
num_layers: 18
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
arch: “resnet”
}
evaluation_config {
validation_period_during_training: 10
first_validation_epoch: 20
minimum_detection_ground_truth_overlap {
key: “another_custom_obj”
value: 0.4
}
minimum_detection_ground_truth_overlap {
key: “people”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “electric_bicycle”
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: “bicycle”
value: 0.5
}
evaluation_box_config {
key: “another_custom_obj”
value {
minimum_height: 10
maximum_height: 9999
minimum_width: 14
maximum_width: 9999
}
}
evaluation_box_config {
key: “people”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 20
maximum_width: 9999
}
}
evaluation_box_config {
key: “electric_bicycle”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 20
maximum_width: 9999
}
}
evaluation_box_config {
key: “bicycle”
value {
minimum_height: 20
maximum_height: 9999
minimum_width: 20
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: “another_custom_obj”
class_weight: 10.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “people”
class_weight: 5.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 1.0
}
}
target_classes {
name: “electric_bicycle”
class_weight: 10.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: “bicycle”
class_weight: 10.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: “cov”
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: “bbox”
initial_weight: 10.0
weight_target: 10.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 8
num_epochs: 120
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 5e-04
soft_start: 0.10000000149
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: “another_custom_obj”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: “people”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “electric_bicycle”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: “bicycle”
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}
after the train, the AP is:
…
…
…
2022-03-03 05:41:11,786 [INFO] tensorflow: epoch = 119.96052631578947, learning_rate = 5.025307e-06, loss = 0.0001229787, step = 45585 (5.471 sec)
INFO:tensorflow:Saving checkpoints for step-45600.
2022-03-03 05:41:14,930 [INFO] tensorflow: Saving checkpoints for step-45600.
WARNING:tensorflow:Ignoring: /tmp/tmp15bx64zr; No such file or directory
2022-03-03 05:41:15,051 [WARNING] tensorflow: Ignoring: /tmp/tmp15bx64zr; No such file or directory
2022-03-03 05:41:17,648 [INFO] iva.detectnet_v2.evaluation.evaluation: step 0 / 32, 0.00s/step
2022-03-03 05:41:25,002 [INFO] iva.detectnet_v2.evaluation.evaluation: step 10 / 32, 0.74s/step
2022-03-03 05:41:30,688 [INFO] iva.detectnet_v2.evaluation.evaluation: step 20 / 32, 0.57s/step
2022-03-03 05:41:34,682 [INFO] iva.detectnet_v2.evaluation.evaluation: step 30 / 32, 0.40s/step
Matching predictions to ground truth, class 1/4.: 100%|█| 280/280 [00:00<00:00, 40967.14it/s]
Matching predictions to ground truth, class 2/4.: 100%|█| 4877/4877 [00:00<00:00, 48597.64it/s]
Matching predictions to ground truth, class 3/4.: 100%|█| 384/384 [00:00<00:00, 52199.41it/s]
Matching predictions to ground truth, class 4/4.: 100%|█| 1038/1038 [00:00<00:00, 43879.58it/s]
Epoch 120/120
Validation cost: 0.000135
Mean average_precision (in %): 69.6327
class name average precision (in %)
bicycle 34.853
another_custom_obj 95.5677
electric_bicycle 82.0849
people 66.0253
after the retrain, with same spec (only epoch reduced to 80), the AP:
validation cost: 0.000956
Mean average_precision (in %): 69.2883
class name average precision (in %)
bicycle 33.8405
another_custom_obj 95.7922
electric_bicycle 82.9376
people 64.5828
before this round, I actually did another one with same dataset structure, only bicycle with labels count 500, and that time the AP data is actually much better for all classes:
Epoch 120/120
Validation cost: 0.000101
Mean average_precision (in %): 74.1200
class name average precision (in %)
bicycle 43.7898
another_custom_obj 96.8945
electric_bicycle 80.209
people 75.5866
question 1:
Is it make sense that the more data come worse AP?
question 2:
why the bicycle
has so low AP, as I understand the training and validation actually against on public dataset data.