BBox is a little off to the upper left when you run it on the DS

Hi,

I did transfer learning with DetectNet v2.
Checking the results with tlt-infer was very good.
However, when I export to DeepStream and check, the BBox is slightly misaligned to the top left.

How can I make it as accurate as tlt-infer?

We use the following

TLT v1.0.1
DetectNet v2 (ResNet18)
DeepStream SDK 4.0.2

@m.kawakami
Could you please paste your finding with more details? Thanks.

BTW, do you have chance to check TLT 2.0_dp too?

I’m using the following configs.
I’ll try TLT2.0.

train config.

# Sample model config for to instantiate a resnet18 model with pretrained weights and freeze blocks 0, 1
# with all shortcuts having projection layers.
model_config {
  pretrained_model_file: "/root/work/tlt_work/pretrained_models/tlt_resnet18_detectnet_v2_v1/resnet18.hdf5"
  num_layers: 18
  #load_graph: True
  #freeze_blocks: 0
  #freeze_blocks: 1
  #freeze_blocks: 2
  #freeze_blocks: 3
  arch: "resnet"
  use_batch_norm: True
  activation {
    activation_type: "relu"
  }
  dropout_rate: 0.1
  objective_set: {
    cov {}
    bbox {
      scale: 35.0
      offset: 0.5
    }
  }
  training_precision: {
    backend_floatx: FLOAT32
  }
}

# Sample rasterizer configs to instantiate a 1 class bbox rasterizer
bbox_rasterizer_config {
  target_class_config {
    key: "object"
    value: {
      cov_center_x: 0.5
      cov_center_y: 0.5
      cov_radius_x: 0.4
      cov_radius_y: 0.4
      bbox_min_radius: 1.0
    }
  }
  deadzone_radius: 0.67
}

postprocessing_config {
  target_class_config {
    key: "object"
    value: {
      clustering_config {
        coverage_threshold: 0.005
        dbscan_eps: 0.15
        #dbscan_eps: 1.00
        dbscan_min_samples: 0.05
        minimum_bounding_box_height: 20
      }
    }
  }
}

cost_function_config {
  target_classes {
    name: "object"
    class_weight: 1.0
    coverage_foreground_weight: 0.05
    objectives {
      name: "cov"
      initial_weight: 1.0
      weight_target: 1.0
    }
    objectives {
      name: "bbox"
      initial_weight: 10.0
      weight_target: 10.0
    }
  }
  enable_autoweighting: True
  max_objective_weight: 0.9999
  min_objective_weight: 0.0001
}

training_config {
  batch_size_per_gpu: 16
  num_epochs: 80
  learning_rate {
    soft_start_annealing_schedule {
      min_learning_rate: 5e-6
      max_learning_rate: 5e-4
      soft_start: 0.1
      annealing: 0.7
    }
  }
  regularizer {
    type: L1
    weight: 3e-9
  }
  optimizer {
    adam {
      epsilon: 1e-08
      beta1: 0.9
      beta2: 0.999
    }
  }
  cost_scaling {
    enabled: False
    initial_exponent: 20.0
    increment: 0.005
    decrement: 1.0
  }
  checkpoint_interval: 10
}

# Sample augementation config for 
augmentation_config {
  preprocessing {
    output_image_width: 512
    output_image_height: 288
    output_image_channel: 3
    min_bbox_width: 1.0
    min_bbox_height: 1.0
  }
  spatial_augmentation {
    hflip_probability: 0.5
    vflip_probability: 0.0
    zoom_min: 1.0
    zoom_max: 1.0
    translate_max_x: 8.0
    translate_max_y: 8.0
  }
  color_augmentation {
    color_shift_stddev: 0.3
    hue_rotation_max: 25.0
    saturation_shift_max: 0.2
    contrast_scale_max: 0.1
    contrast_center: 0.5
  }
}

# Sample evaluation config to run evaluation in integrate mode for the given 3 class model, 
# at every 10th epoch starting from the epoch 1.
evaluation_config {
  average_precision_mode: INTEGRATE
  validation_period_during_training: 10
  first_validation_epoch: 1
  minimum_detection_ground_truth_overlap {
    key: "object"
    value: 0.5
  }
  evaluation_box_config {
    key: "object"
    value {
      minimum_height: 4
      maximum_height: 9999
      minimum_width: 4
      maximum_width: 9999
    }
  }
}

dataset_config {
  data_sources: {
    tfrecords_path: "/root/work/tlt_work/tf_records/*"
    image_directory_path: "/root/work/tlt_work/resized"
  }
  image_extension: "png"
  target_class_mapping {
      key: "object"
      value: "object"
  }
  validation_fold: 0
}

train config after prune

model_config {
  pretrained_model_file: "/root/work/tlt_work/pruned_model/resnet18_nopool_bn_detectnet_v2_pruned.tlt"
  num_layers: 18
  load_graph: True
  #freeze_blocks: 0
  #freeze_blocks: 1
  #freeze_blocks: 2
  #freeze_blocks: 3
  arch: "resnet"
  use_batch_norm: True
  activation {
    activation_type: "relu"
  }
  dropout_rate: 0.1
  objective_set: {
    cov {}
    bbox {
      scale: 35.0
      offset: 0.5
    }
  }
  training_precision: {
    backend_floatx: FLOAT32
  }
}

# Sample rasterizer configs to instantiate a 1 class bbox rasterizer
bbox_rasterizer_config {
  target_class_config {
    key: "object"
    value: {
      cov_center_x: 0.5
      cov_center_y: 0.5
      cov_radius_x: 0.4
      cov_radius_y: 0.4
      bbox_min_radius: 1.0
    }
  }
  deadzone_radius: 0.67
}

postprocessing_config {
  target_class_config {
    key: "object"
    value: {
      clustering_config {
        coverage_threshold: 0.005
        dbscan_eps: 0.15
        dbscan_min_samples: 0.05
        minimum_bounding_box_height: 20
      }
    }
  }
}

cost_function_config {
  target_classes {
    name: "object"
    class_weight: 1.0
    coverage_foreground_weight: 0.05
    objectives {
      name: "cov"
      initial_weight: 1.0
      weight_target: 1.0
    }
    objectives {
      name: "bbox"
      initial_weight: 10.0
      weight_target: 10.0
    }
  }
  enable_autoweighting: True
  max_objective_weight: 0.9999
  min_objective_weight: 0.0001
}

training_config {
  batch_size_per_gpu: 16
  num_epochs: 80
  learning_rate {
    soft_start_annealing_schedule {
      min_learning_rate: 5e-6
      max_learning_rate: 5e-4
      soft_start: 0.1
      annealing: 0.7
    }
  }
  regularizer {
    type: L1
    weight: 3e-9
  }
  optimizer {
    adam {
      epsilon: 1e-08
      beta1: 0.9
      beta2: 0.999
    }
  }
  cost_scaling {
    enabled: False
    initial_exponent: 20.0
    increment: 0.005
    decrement: 1.0
  }
  checkpoint_interval: 10
}

# Sample augementation config for 
augmentation_config {
  preprocessing {
    output_image_width: 512
    output_image_height: 288
    output_image_channel: 3
    min_bbox_width: 1.0
    min_bbox_height: 1.0
  }
  spatial_augmentation {
    hflip_probability: 0.5
    vflip_probability: 0.0
    zoom_min: 1.0
    zoom_max: 1.0
    translate_max_x: 8.0
    translate_max_y: 8.0
  }
  color_augmentation {
    color_shift_stddev: 0.3
    hue_rotation_max: 25.0
    saturation_shift_max: 0.2
    contrast_scale_max: 0.1
    contrast_center: 0.5
  }
}

# Sample evaluation config to run evaluation in integrate mode for the given 3 class model, 
# at every 10th epoch starting from the epoch 1.
evaluation_config {
  validation_period_during_training: 10
  first_validation_epoch: 1
  minimum_detection_ground_truth_overlap {
    key: "object"
    value: 0.5
  }
  evaluation_box_config {
    key: "object"
    value {
      minimum_height: 4
      maximum_height: 9999
      minimum_width: 4
      maximum_width: 9999
    }
  }
}

dataset_config {
  data_sources: {
    tfrecords_path: "/root/work/tlt_work/tf_records/*"
    image_directory_path: "/root/work/tlt_work/resized"
  }
  image_extension: "png"
  target_class_mapping {
      key: "object"
      value: "object"
  }
  validation_fold: 0
}

inference config

{
    "dbscan_criterion": "IOU",
    "dbscan_eps": {
        "object": 0.25,
        "default": 0.15
    },
    "dbscan_min_samples": {
        "object": 0.05,
        "default": 0.0
    },
    "min_cov_to_cluster": {
        "object": 0.075,
        "default": 0.005
    },
    "min_obj_height": {
        "object": 4,
        "default": 2
    },
    "target_classes": ["object"],
    "confidence_th": {
        "object": 0.3
    },
    "confidence_model": {
        "object": { "kind": "aggregate_cov"},
        "default": { "kind": "aggregate_cov"}
    },
    "output_map": {
        "object" : "object"
    },
    "color": {
        "object": "green"
    },
    "postproc_classes": ["object"],
    "image_height": 288,
    "image_width": 512,
    "stride": 16
}

DeepStream config

[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
model-engine-file=object_detect_b1_fp16.engine
labelfile-path=labels.txt
#int8-calib-file=calibration.bin
uff-input-blob-name=input_1
batch-size=1
input-dims=3;288;512;0
process-mode=1
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
num-detected-classes=1
interval=0
gie-unique-id=1
output-blob-names=output_cov/Sigmoid;output_bbox/BiasAdd
is-classifier=0
#maintain-aspect-ratio=1

[class-attrs-all]
eps=0.15
group-threshold=1
minBoxes=0
detected-min-w=80
detected-min-h=40

Incidentally, can the model created with TLT2.0 also work with DeepStream 4.0?

No, the model created with TLT2.0 is compatible with DS5.