Hello all,
I’m trying to train a detectnet_v2 model with resnet10 backbone on a small dataset containing 171 images with classes-“car”, “bus”, and “twowheeler”. When I’m running tlt-train command, I’m ending up with the following error when the first step of validation evaluation is happening:
2019-09-30 22:23:39,841 [INFO] tensorflow: global_step/sec: 6.59319
INFO:tensorflow:global_step/sec: 6.65591
2019-09-30 22:23:40,292 [INFO] tensorflow: global_step/sec: 6.65591
INFO:tensorflow:global_step/sec: 6.76914
2019-09-30 22:23:40,735 [INFO] tensorflow: global_step/sec: 6.76914
2019-09-30 22:23:41,048 [INFO] iva.detectnet_v2.evaluation.evaluation: step 0 / 4, 0.00s/step
Traceback (most recent call last):
File "/usr/local/bin/tlt-train-g1", line 10, in <module>
sys.exit(main())
File "./common/magnet_train.py", line 37, in main
File "</usr/local/lib/python2.7/dist-packages/decorator.pyc:decorator-gen-2>", line 2, in main
File "./detectnet_v2/utilities/timer.py", line 46, in wrapped_fn
File "./detectnet_v2/scripts/train.py", line 632, in main
File "./detectnet_v2/scripts/train.py", line 556, in run_experiment
File "./detectnet_v2/scripts/train.py", line 490, in train_gridbox
File "./detectnet_v2/scripts/train.py", line 136, in run_training_loop
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 676, in run
run_metadata=run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1270, in run
raise six.reraise(*original_exc_info)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1255, in run
return self._sess.run(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1335, in run
run_metadata=run_metadata))
File "./detectnet_v2/tfhooks/validation_hook.py", line 69, in after_run
File "./detectnet_v2/tfhooks/validation_hook.py", line 75, in validate
File "./detectnet_v2/evaluation/evaluation.py", line 164, in evaluate
File "./detectnet_v2/postprocessor/postprocessing.py", line 146, in cluster_predictions
File "./detectnet_v2/postprocessor/cluster.py", line 43, in cluster_predictions
AssertionError
My specification file for the training task is as follows:
# Sample model config for to instantiate a resnet18 model with pretrained weights and freeze blocks 0, 1
# with all shortcuts having projection layers.
dataset_config {
data_sources: {
tfrecords_path: "/workspace/tlt-docker/stock_vid_model/tfRecords/*"
image_directory_path: "/workspace/tlt-docker/stock_vid_model/stockVidDatasetNew"
}
image_extension: "jpg"
target_class_mapping {
key: "car"
value: "car"
}
target_class_mapping {
key: "bus"
value: "bus"
}
target_class_mapping {
key: "twowheeler"
value: "twowheeler"
}
validation_fold: 0
}
augmentation_config {
preprocessing {
output_image_width: 640
output_image_height: 368
output_image_channel: 3
min_bbox_width: 1.0
min_bbox_height: 1.0
}
spatial_augmentation {
hflip_probability: 0.1
zoom_min: 1.0
zoom_max: 1.0
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 10.0
saturation_shift_max: 0.2
contrast_scale_max: 0.1
contrast_center: 0.5
}
}
model_config {
pretrained_model_file: "/workspace/tlt-docker/lmu_street_model/tlt_resnet10_detectnet_v2_v1/resnet10.hdf5"
num_layers: 10
freeze_blocks: 0
arch: "resnet"
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
}
training_config {
batch_size_per_gpu: 8
num_epochs: 30
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 0.0005
soft_start: 0.1
annealing: 0.7
}
}
regularizer {
type: L1
weight: 3e-09
}
optimizer {
adam {
epsilon: 9.9e-09
beta1: 0.9
beta2: 0.999
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
evaluation_config {
average_precision_mode: INTEGRATE
validation_period_during_training: 10
first_validation_epoch: 1
minimum_detection_ground_truth_overlap {
key: "car"
value: 0.8
}
evaluation_box_config {
key: "car"
value: {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
minimum_detection_ground_truth_overlap {
key: "bus"
value: 0.8
}
evaluation_box_config {
key: "bus"
value: {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
minimum_detection_ground_truth_overlap {
key: "twowheeler"
value: 0.8
}
evaluation_box_config {
key: "twowheeler"
value: {
minimum_height: 4
maximum_height: 9999
minimum_width: 4
maximum_width: 9999
}
}
}
bbox_rasterizer_config {
target_class_config {
key: "car"
value: {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.4
cov_radius_y: 0.4
bbox_min_radius: 1.0
}
}
target_class_config {
key: "bus"
value: {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.4
cov_radius_y: 0.4
bbox_min_radius: 1.0
}
}
target_class_config {
key: "twowheeler"
value: {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.67
}
postprocessing_config {
target_class_config {
key: "car"
value: {
clustering_config {
coverage_threshold: 0.005
dbscan_eps: 0.1
dbscan_min_samples: 0.05
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: "bus"
value: {
clustering_config {
coverage_threshold: 0.005
dbscan_eps: 0.1
dbscan_min_samples: 0.05
minimum_bounding_box_height: 4
}
}
}
target_class_config {
key: "car"
value: {
clustering_config {
coverage_threshold: 0.005
dbscan_eps: 0.1
dbscan_min_samples: 0.05
minimum_bounding_box_height: 4
}
}
}
}
cost_function_config {
target_classes {
name: "car"
class_weight: 1.0
coverage_foreground_weight: 0.05
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: "bus"
class_weight: 1.0
coverage_foreground_weight: 0.05
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: "twowheeler"
class_weight: 1.0
coverage_foreground_weight: 0.05
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
}
Any ideas on tackling the issue are welcome. Thanks :)