Tlt detectnet training focusing on a particular class?

Hi,
I’m currently trying to fine tune trafficnet object detector to a custom dataset I have, however the issue that I currently is that for some reason the model is only training for a single class, here is an example of the evaluation after 10th epoch,

Epoch 10/10
=========================

Validation cost: 0.000024
Mean average_precision (in %): 14.6148

class name      average precision (in %)
------------  --------------------------
bike                              0
car                              43.8443
pedestrian                        0

Is there something wrong with the train configuration file that I’m currently using. I’m linking the file below.
detectnet_v2_train_resnet18_kitti.txt (5.5 KB)

random_seed: 42
dataset_config {
data_sources {
    tfrecords_path: "/workspace/tlt-experiments/Detector/dataset/tfrecords/kitti_trainval/*"
    image_directory_path: "/workspace/tlt-experiments/Detector/Dataset/train/"
 }
 image_extension: "jpg"
 target_class_mapping {
    key: "autorickshaw"
    value: "car"
 }
 target_class_mapping {
    key: "bus"
    value: "car"
 }
 target_class_mapping {
    key: "car"
    value: "car"
 }
 target_class_mapping {
    key: "truck"
    value: "car"
 }
 target_class_mapping {
    key: "bicycle"
    value: "bike"
 }
 target_class_mapping {
    key: "motorcycle"
    value: "bike"
  }
  target_class_mapping {
     key: "person"
     value: "pedestrian"
  }
  target_class_mapping {
     key: "rider"
     value: "pedestrian"
  }
  validation_fold: 0
}
 augmentation_config {
   preprocessing {
    output_image_width: 960
    output_image_height: 544
    crop_right: 960
    crop_bottom: 544
    min_bbox_width: 1.0
    min_bbox_height: 1.0
    output_image_channel: 3
  }
  spatial_augmentation {
    hflip_probability: 0.5
    zoom_min: 1.0
    zoom_max: 1.0
    translate_max_x: 8.0
    translate_max_y: 8.0
  }
  color_augmentation {
    hue_rotation_max: 25.0
    saturation_shift_max: 0.20000000298
    contrast_scale_max: 0.10000000149
    contrast_center: 0.5
  }
}
 postprocessing_config {
 target_class_config {
    key: "car"
    value {
       clustering_config {
       coverage_threshold: 0.00499999988824
        dbscan_eps: 0.20000000298
        dbscan_min_samples: 0.0500000007451
       minimum_bounding_box_height: 20
     }
  }
 }
  target_class_config {
   key: "bike"
   value {
  clustering_config {
    coverage_threshold: 0.00499999988824
    dbscan_eps: 0.20000000298
    dbscan_min_samples: 0.0500000007451
    minimum_bounding_box_height: 20
   }
  }
 }
  target_class_config {
    key: "pedestrian"
    value {
     clustering_config {
        coverage_threshold: 0.00749999983236
        dbscan_eps: 0.230000004172
        dbscan_min_samples: 0.0500000007451
        minimum_bounding_box_height: 20
      }
    }
  }
 }
model_config {
  pretrained_model_file: "/workspace/tlt-experiments/Detector/pretrained_resnet18/        tlt_pretrained_detectnet_v2_vresnet18/resnet18.tlt"
 num_layers: 18
 freeze_blocks: [0, 1, 2, 3]
 use_batch_norm: true
 objective_set {
    bbox {
     scale: 35.0
     offset: 0.5
}
cov {
}
  }
  training_precision {
backend_floatx: FLOAT32
  }
  arch: "resnet"
}
evaluation_config {
  validation_period_during_training: 10
  first_validation_epoch: 2
  minimum_detection_ground_truth_overlap {
key: "car"
value: 0.699999988079
  }
  minimum_detection_ground_truth_overlap {
key: "bike"
value: 0.5
  }
  minimum_detection_ground_truth_overlap {
key: "pedestrian"
value: 0.5
  }
  evaluation_box_config {
key: "car"
value {
  minimum_height: 20
  maximum_height: 9999
  minimum_width: 10
  maximum_width: 9999
}
  }
  evaluation_box_config {
key: "bike"
value {
  minimum_height: 20
  maximum_height: 9999
  minimum_width: 10
  maximum_width: 9999
}
  }
  evaluation_box_config {
key: "pedestrian"
value {
  minimum_height: 20
  maximum_height: 9999
  minimum_width: 10
  maximum_width: 9999
}
  }
  average_precision_mode: INTEGRATE
}
cost_function_config {
  target_classes {
name: "car"
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
  name: "cov"
  initial_weight: 1.0
  weight_target: 1.0
}
objectives {
  name: "bbox"
  initial_weight: 10.0
  weight_target: 10.0
}
  }
  target_classes {
name: "bike"
class_weight: 8.0
coverage_foreground_weight: 0.0500000007451
objectives {
  name: "cov"
  initial_weight: 1.0
  weight_target: 1.0
}
objectives {
  name: "bbox"
  initial_weight: 10.0
  weight_target: 1.0
}
  }
  target_classes {
name: "pedestrian"
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
  name: "cov"
  initial_weight: 1.0
  weight_target: 1.0
}
objectives {
  name: "bbox"
  initial_weight: 10.0
  weight_target: 10.0
}
  }
  enable_autoweighting: true
  max_objective_weight: 0.999899983406
  min_objective_weight: 9.99999974738e-05
}
training_config {
  batch_size_per_gpu: 8
  num_epochs: 10
  learning_rate {
    soft_start_annealing_schedule {
      min_learning_rate: 5e-06
      max_learning_rate: 5e-04
      soft_start: 0.10000000149
      annealing: 0.699999988079
    }
  }
  regularizer {
    type: L1
    weight: 3.00000002618e-09
  }
  optimizer {
    adam {
      epsilon: 9.99999993923e-09
      beta1: 0.899999976158
      beta2: 0.999000012875
    }
  }
  cost_scaling {
    initial_exponent: 20.0
    increment: 0.005
    decrement: 1.0
  }
  checkpoint_interval: 10
}
bbox_rasterizer_config {
  target_class_config {
    key: "car"
    value {
      cov_center_x: 0.5
      cov_center_y: 0.5
      cov_radius_x: 0.40000000596
      cov_radius_y: 0.40000000596
      bbox_min_radius: 1.0
    }
  }
  target_class_config {
    key: "bike"
    value {
      cov_center_x: 0.5
      cov_center_y: 0.5
      cov_radius_x: 1.0
      cov_radius_y: 1.0
      bbox_min_radius: 1.0
    }
  }
  target_class_config {
    key: "pedestrian"
    value {
      cov_center_x: 0.5
      cov_center_y: 0.5
      cov_radius_x: 1.0
      cov_radius_y: 1.0
      bbox_min_radius: 1.0
    }
  }
  deadzone_radius: 0.400000154972
}

Incase this gets pointed out,

  • I’ve verified the labels in the tfrecords with my train specification file, they match.

  • Regarding the aspect ratio of bbox to images, I’ve resized all images to 960*544 and the scaled the bbox accordingly.

My dataset distribution is as follows,

b'person': 70319
b'truck': 20759
b'autorickshaw': 24498
b'rider': 73108
b'motorcycle': 78119
b'bus': 13829
b'car': 65676
b'bicycle': 2573

Please refer to the spcification file attached, I’ve combined some classes to eventually train the network on only three classes.

What is /workspace/tlt-experiments/Detector/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.tlt ?

Hey @Morganh, I think there’s a bug in parsing the specification file. I’m not sure. But these are my findings.
The resnet18.tlt is an unpruned traffic camnet tlt model.
The target class mapping that is used in the spec file doesn’t work I think, looking at the way it was working or my understanding of it could be different.
What I was guessing is that, let’s say in the default model trained has two classes named pedestrian and person sitting being mapped to pedestrian, I was expecting it to merge both class into a single class for training because the default specification file looked like it.
So here in my case I was planning to merge all 4-wheelers into a single class and other classes appropriately. And the cost functions for target classes are for the value fields in the key value mappings from above. But that wasn’t the case happening. When I was training the reason why I had only car class being considered for precision was that (which was entirely my luck) both key and value for car class matched, as in there was a key car and value car but any other class bus -> car, person -> pedestrian etc wasn’t being considered.
Could this be a possible bug or am I mistaken?

Hi @beefshepherd,
The traffic camnet tlt model should be downloaded from https://ngc.nvidia.com/catalog/models/nvidia:tlt_trafficcamnet/files

Can you double check?

I did not find 4-wheelers in your spec. Please share your latest training spec. Thanks.

@Morganh, I believe that I should’ve been more clearer, what I was trying to make in the above config file is map bus, truck, car (all vehicles with 4 wheels) to a single class car. The config file attached is the latest one.

It is fine to merge all 4-wheelers into a single class.
Reference in tlt user guide.

target_class_mapping : This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For eg: car, van, heavy_truck etc may be grouped under automobile. The “key” field is the value of the class name in the tfrecords file, and “value” field corresponds to the value that the network is expected to learn.

Hey @Morganh, I think that is the issue that I’m trying to point out. I fell there is some issue in the mapping in tlt. It doesn’t get mapped when I used it. Could you please check if there is any issue in the cfg I had attached earlier. Thanks.

@Morganh, what are the approaches that could be taken when the validation accuracy isn’t improving or is at a local minima? My current accuracy is varying between 71 and 58 to 68 and 50 respectively for two classes. Any approaches that I could take to improve it?

Hey @Morganh, I think that is the issue that I’m trying to point out. I fell there is some issue in the mapping in tlt. It doesn’t get mapped when I used it. Could you please check if there is any issue in the cfg I had attached earlier. Thanks.

No issue in your spec’s target_class_mapping. It is ok.

@Morganh, what are the approaches that could be taken when the validation accuracy isn’t improving or is at a local minima? My current accuracy is varying between 71 and 58 to 68 and 50 respectively for two classes. Any approaches that I could take to improve it?

Please try to finetune the hyper-parameters.
You can try bs 16. And enlarge the epoch. Seems that 10 epochs is a bit short.

You can also modify learning rate. More experiment are needed.

Hmm yes increasing batch size, I’m right now constrained by a machine. And I’ve increased the epochs to 70 and from 30 th epoch I’ve noticed this behaviour. The precision jumping around global optimum. Right now I can change the batch size from 4-8 but for some reason I can change it only during start when trying to stop and change batch size the train process seems to fail. Any idea on it?

I cannot understand the meaning of “but for some reason I can change it only during start when trying to stop and change batch size the train process seems to fail.”
Can you share the failed log? Thanks.

@Morganh, here is the log that I get when I start the training let’s say with batch size 4 and then stop the train process and restart the training process with an increased batch size 8.

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1365, in _do_call
    return fn(*args)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1350, in _run_fn
    target_list, run_metadata)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1443, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: assertion failed: [2.66666675]
	 [[{{node Assert/AssertGuard/Assert}}]]
	 [[resnet18_nopool_bn_detectnet_v2/block_4b_bn_2/AssignMovingAvg/_4217]]
  (1) Invalid argument: assertion failed: [2.66666675]
	 [[{{node Assert/AssertGuard/Assert}}]]
0 successful operations.
0 derived errors ignored.

The moment I revert the batch size the training continues from where it was left off.

Please create a new result folder and retry.
Refer to TLT training error : Key cost_sums/cyclist-bbox not found in checkpoint