I am working on a custom Faster RCNN model on TLT 3.0. I previously ran this model with identical data and identical network configurations on TLT 2.0, getting an mAP of %96.5. Now the network either learns nothing and stabilizes at a loss of ~1.1, where the prior experiment stabilized at .04, or reaches an nan loss and fails. Here is the experiment spec for the TLT 3.0 experiment
random_seed: 42
enc_key: 'cPPm_vUm4qGaRpd6kgQX5Dp5S-RKRgh9vp1Y_rQYX2U'
verbose: True
model_config {
input_image_config {
image_type: RGB
image_channel_order: 'bgr'
size_height_width {
height: 540
width: 960
image_channel_mean {
key: 'b'
value: 114.54486766972353
image_channel_mean {
key: 'g'
value: 118.13145483368518
image_channel_mean {
key: 'r'
value: 117.67608453228597
image_scaling_factor: 1
max_objects_num_per_image: 10
arch: "resnet:34"
anchor_box_config {
scale: 20
scale: 40
scale: 90
ratio: 1.0
ratio: 0.5
ratio: 2.0
freeze_bn: False
roi_mini_batch: 256
rpn_stride: 16
use_bias: True
roi_pooling_config {
pool_size: 7
pool_size_2x: False
all_projections: True
dataset_config {
data_sources: {
tfrecords_path: "/workspace/TLT/T_23/tfrecords/tfrecord*"
image_directory_path: "/workspace/DAB/D_7"
image_extension: 'jpg'
target_class_mapping {
key: 'p_1'
value: 'p'
target_class_mapping {
key: 'p_2'
value: 'p'
target_class_mapping {
key: 'p_3'
value: 'p'
target_class_mapping {
key: 'p_4'
value: 'p'
target_class_mapping {
key: 'p_5'
value: 'p'
target_class_mapping {
key: 'p_6'
value: 'p'
target_class_mapping {
key: 'p_7'
value: 'p'
target_class_mapping {
key: 'p_8'
value: 'p'
target_class_mapping {
key: 'r_1'
value: 'r'
target_class_mapping {
key: 'r_2'
value: 'r'
target_class_mapping {
key: 'r_3'
value: 'r'
target_class_mapping {
key: 'r_4'
value: 'r'
target_class_mapping {
key: 'r_5'
value: 'r'
target_class_mapping {
key: 'r_6'
value: 'r'
target_class_mapping {
key: 'r_7'
value: 'r'
target_class_mapping {
key: 'r_8'
value: 'r'
validation_fold: 0
augmentation_config {
preprocessing {
output_image_width: 960
output_image_height: 540
output_image_channel: 3
min_bbox_width: 1.0
min_bbox_height: 1.0
enable_auto_resize: True
spatial_augmentation {
hflip_probability: 0.1
vflip_probability: 0.1
zoom_min: 0.9
zoom_max: 1.1
translate_max_x: 96
translate_max_y: 54
color_augmentation {
hue_rotation_max: 0
saturation_shift_max: 0.0
contrast_scale_max: 0
contrast_center: 0.5
training_config {
checkpoint_interval: 1
output_model: "/workspace/TLT/T_23/weights/faster_rcnn_resnet_34.tlt"
enable_augmentation: True
enable_qat: False
batch_size_per_gpu: 16
num_epochs: 20
rpn_min_overlap: 0.3
rpn_max_overlap: 0.7
classifier_min_overlap: 0.0
classifier_max_overlap: 0.5
gt_as_roi: False
std_scaling: 1.0
classifier_regr_std {
key: 'x'
value: 10
classifier_regr_std {
key: 'y'
value: 10
classifier_regr_std {
key: 'w'
value: 5
classifier_regr_std {
key: 'h'
value: 5
rpn_mini_batch: 256
rpn_pre_nms_top_N: 12000
rpn_nms_max_boxes: 2000
rpn_nms_overlap_threshold: 0.7
regularizer {
type: L2
weight: 0.0001
optimizer {
adam {
lr: 0.00001
beta_1: 0.9
beta_2: 0.999
decay: 0.0
learning_rate {
soft_start {
start_lr: 0.0001
base_lr: 0.0001
soft_start: 0.0001
annealing_points: [0.05, 0.1, 0.15]
annealing_divider: 1.5
lambda_rpn_regr: 1.0
lambda_rpn_class: 1.0
lambda_cls_regr: 1.0
lambda_cls_class: 1.0
inference_config {
images_dir: '/workspace/DAB/D_7/test/images'
model: 'weights/fasterrcnn_resnet34_epoch_001.tlt'
batch_size: 2
detection_image_output_dir: '/workspace/TLT/T_23/infer/images'
labels_dump_dir: '/workspace/TLT/T_23/infer/labels'
rpn_pre_nms_top_N: 6000
rpn_nms_max_boxes: 300
rpn_nms_overlap_threshold: 0.7
object_confidence_thres: 0.0001
bbox_visualize_threshold: 0.6
classifier_nms_max_boxes: 100
classifier_nms_overlap_threshold: 0.3
evaluation_config {
model: 'weights/fasterrcnn_resnet34_epoch_001.tlt'
batch_size: 16
validation_period_during_training: 1
rpn_pre_nms_top_N: 6000
rpn_nms_max_boxes: 300
rpn_nms_overlap_threshold: 0.7
classifier_nms_max_boxes: 100
classifier_nms_overlap_threshold: 0.3
object_confidence_thres: 0.0001
use_voc07_11point_metric: False
gt_matching_iou_threshold: 0.5
And the corresponding TLT 2.0 spec
random_seed: 42
enc_key: "tlt"
verbose: True
network_config {
input_image_config {
image_type: RGB
image_channel_order: "bgr"
size_height_width {
height: 540
width: 960
image_channel_mean {
key: 'b'
value: 114.54486766972353
image_channel_mean {
key: 'g'
value: 118.13145483368518
image_channel_mean {
key: 'r'
value: 117.67608453228597
image_scaling_factor: 1.0
max_objects_num_per_image: 10
feature_extractor: "resnet:34"
anchor_box_config {
scale: 20
scale: 40
scale: 90
ratio: 1
ratio: 0.5
ratio: 2
freeze_bn: False
roi_mini_batch: 256
rpn_stride: 16
conv_bn_share_bias: True
roi_pooling_config: {
pool_size: 7
pool_size_2x: False
all_projections: True
use_pooling: False
training_config {
kitti_data_config {
data_sources: {
tfrecords_path: "/ze/data/Experiments/TLT/T_113/tfrecords/tfrecords*"
image_directory_path: "/ze/data/Experiments/DAB/D_30"
image_extension: 'jpg'
target_class_mapping {
key: 'r_2'
value: 'R'
target_class_mapping {
key: 'p_8'
value: 'P'
target_class_mapping {
key: 'r_8'
value: 'R'
target_class_mapping {
key: 'r_4'
value: 'R'
target_class_mapping {
key: 'r_3'
value: 'R'
target_class_mapping {
key: 'p_2'
value: 'P'
target_class_mapping {
key: 'p_7'
value: 'P'
target_class_mapping {
key: 'p_3'
value: 'P'
target_class_mapping {
key: 'r_6'
value: 'R'
target_class_mapping {
key: 'p_6'
value: 'P'
target_class_mapping {
key: 'r_7'
value: 'R'
target_class_mapping {
key: 'p_4'
value: 'P'
target_class_mapping {
key: 'p_1'
value: 'P'
target_class_mapping {
key: 'p_5'
value: 'P'
target_class_mapping {
key: 'r_1'
value: 'R'
target_class_mapping {
key: 'r_5'
value: 'R'
validation_fold: 0
data_augmentation {
preprocessing {
output_image_width: 960
output_image_height: 540
output_image_channel: 3
min_bbox_width: 0.0
min_bbox_height: 0.0
spatial_augmentation {
hflip_probability: 0.1
vflip_probability: 0.1
zoom_min: 0.9
zoom_max: 1.1
translate_max_x: 96
translate_max_y: 54
rotate_rad_max: 0.261799
color_augmentation {
hue_rotation_max: 0.0
saturation_shift_max: 0.0
contrast_scale_max: 0.0
contrast_center: 0.0
enable_augmentation: True
batch_size_per_gpu: 2
num_epochs: 20
pretrained_weights: "/ze/data/pretrained_models/resnet_34.hdf5"
output_model: "/ze/data/Experiments/TLT/T_113/models/model.tlt"
rpn_min_overlap: 0.3
rpn_max_overlap: 0.7
classifier_min_overlap: 0
classifier_max_overlap: 0.5
gt_as_roi: False
std_scaling: 1
classifier_regr_std {
key: 'x'
value: 10
classifier_regr_std {
key: 'y'
value: 10
classifier_regr_std {
key: 'w'
value: 5
classifier_regr_std {
key: 'h'
value: 5
rpn_mini_batch: 256
rpn_pre_nms_top_N: 3000
rpn_nms_max_boxes: 500
rpn_nms_overlap_threshold: 0.6
reg_config {
type: L2
weight: 0.0001
optimizer {
adam {
lr: 0.0001
beta_1: 0.9
beta_2: 0.999
decay: 0.0
lr_scheduler {
soft_start {
base_lr: 0.0001
start_lr: 0.0001
soft_start: 0.0001
annealing_points: 0.05
annealing_points: 0.1
annealing_points: 0.15
annealing_points: 0.2
lambda_rpn_regr: 1.0
lambda_rpn_class: 1.0
lambda_cls_regr: 1.0
lambda_cls_class: 1.0
inference_config {
images_dir: '/ze/data/Experiments/DAB/D_30/test/images'
model: '/ze/data/Experiments/TLT/T_113/models/model.epoch17.tlt'
detection_image_output_dir: '/ze/data/Experiments/DAB/D_30/infer/images'
labels_dump_dir: '/ze/data/Experiments/DAB/D_30/infer/labels'
rpn_pre_nms_top_N: 6000
rpn_nms_max_boxes: 300
rpn_nms_overlap_threshold: 0.7
bbox_visualize_threshold: 0.6
classifier_nms_max_boxes: 300
classifier_nms_overlap_threshold: 0.3
evaluation_config {
model: '/ze/data/Experiments/TLT/T_113/models/model.epoch17.tlt'
labels_dump_dir: '/ze/data/Experiments/TLT/T_113/eval/labels'
rpn_pre_nms_top_N: 3000
rpn_nms_max_boxes: 500
rpn_nms_overlap_threshold: 0.6
classifier_nms_max_boxes: 300
classifier_nms_overlap_threshold: 0.3
object_confidence_thres: 0.0001
use_voc07_11point_metric: False