Hey thanks for the reply,
since my last message I’ve managed to run the model (onnx) and get results! But I had to do a couple of things. One of them was to skip the dbscan part, as it asks for a dbscan_min_samples parameter >1 and in your proto file you mention it has to be between 0-1. When I tried changing it to like 1, it didn’t produce any candidates…
The next thing I did was lower the confidences to like 0.002 to manage to get 2 detections… do you have any idea why my confidences are so low?
In the end, from a picture with 3 cars, I got 2 bounding boxes on only one of the license plates, both are not quite tight - this makes me believe that the dbscan is used as some sort of NMS, which right now I’m skipping.
This is my postprocessor class, I’ve made some changes to the original code you linked to;
class LPDNetPostprocessor(object):
"""Post processor for LPDNet ONNX outputs."""
def __init__(self, batch_size, frames,
output_path, data_format, classes, target_shape):
"""Initialize a post processor class for a classification model.
Args:
batch_size (int): Number of images in the batch.
frames (list): List of images.
output_path (str): Unix path to the output rendered images and labels.
data_format (str): Order of the input model dimensions.
"channels_first": CHW order.
"channels_last": HWC order.
classes (list): List of the class names.
postprocessing_config (proto): Configuration elements of the dbscan postprocessor.
target_shape (tuple): Shape of the model input.
"""
# self.pproc_config = load_clustering_config(postprocessing_config)
self.classes = classes
self.output_names = ["output_cov/Sigmoid:0",
"output_bbox/BiasAdd:0"]
self.bbox_norm = [35., 35]
self.offset = 0.5
self.scale_h = 1
self.scale_w = 1
self.target_shape = target_shape
self.stride = 16
self.linewidth = 4
# super().__init__(batch_size, frames, output_path, data_format)
self.batch_size = batch_size
self.frames = frames
self.output_path = output_path
self.data_format = data_format
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
self.initialized = True
# Format the dbscan elements into classwise configurations for rendering.
self.configure()
def configure(self):
"""Configure the post processor object."""
self.dbscan_elements = {}
self.coverage_thresholds = {}
self.box_color = {}
self.classwise_clustering_config = {
"LicensePlate": {
'coverage_threshold': 0.002,
'minimum_bounding_box_height': 4,
'dbscan_config': {
'dbscan_eps': 0.3,
'dbscan_min_samples': 1,
'dbscan_confidence_threshold': 0.002
},
'bbox_color':{
'R': 0,
'G': 255,
'B': 0
}
}
}
for class_name in self.classes:
if class_name not in self.classwise_clustering_config.keys():
raise KeyError("Cannot find class name {} in {}".format(
class_name, self.classwise_clustering_config.keys()
))
self.dbscan_elements[class_name] = dbscan(
eps=self.classwise_clustering_config[class_name]['dbscan_config']['dbscan_eps'],
min_samples=self.classwise_clustering_config[class_name]['dbscan_config']['dbscan_min_samples'],
)
self.coverage_thresholds[class_name] = self.classwise_clustering_config[class_name]['coverage_threshold']
self.box_color[class_name] = self.classwise_clustering_config[class_name]['bbox_color']
def apply(self, results, this_id, render=True):
"""Apply the post processing to the outputs tensors.
This function takes the raw output tensors from the detectnet_v2 model
and performs the following steps:
1. Denormalize the output bbox coordinates
2. Threshold the coverage output to get the valid indices for the bboxes.
3. Filter out the bboxes from the "output_bbox/BiasAdd" blob.
4. Cluster the filterred boxes using DBSCAN.
5. Render the outputs on images and save them to the output_path/images
6. Serialize the output bboxes to KITTI Format label files in output_path/labels.
"""
output_array = {}
this_id = int(this_id)
for i, output_name in enumerate(self.output_names):
output_array[output_name] = results[i].transpose(0, 1, 3, 2)
assert len(self.classes) == output_array["output_cov/Sigmoid:0"].shape[1], (
"Number of classes {} != number of dimensions in the output_cov/Sigmoid: {}".format(
len(self.classes), output_array["output_cov/Sigmoid:0"].shape[1]
)
)
abs_bbox = denormalize_bounding_bboxes(
output_array["output_bbox/BiasAdd:0"], self.stride,
self.offset, self.bbox_norm, len(self.classes), self.scale_w,
self.scale_h, self.data_format, self.target_shape, self.frames,
this_id - 1
)
valid_indices = thresholded_indices(
output_array["output_cov/Sigmoid:0"], len(self.classes),
self.classes,
self.coverage_thresholds
)
batchwise_boxes = []
for image_idx, indices in enumerate(valid_indices):
covs = output_array["output_cov/Sigmoid:0"][image_idx, :, :, :]
bboxes = abs_bbox[image_idx, :, :, :]
imagewise_boxes = []
for class_idx in range(len(self.classes)):
clustered_boxes = []
cw_config = self.classwise_clustering_config[
self.classes[class_idx]
]
classwise_covs = covs[class_idx, :, :].flatten()
classwise_covs = classwise_covs[indices[class_idx]]
if classwise_covs.size == 0:
continue
classwise_bboxes = bboxes[4*class_idx:4*class_idx+4, :, :]
classwise_bboxes = classwise_bboxes.reshape(
classwise_bboxes.shape[:1] + (-1,)
).T[indices[class_idx]]
pairwise_dist = \
1.0 * (1.0 - iou_vectorized(classwise_bboxes))
# labeling = self.dbscan_elements[self.classes[class_idx]].fit_predict(
# X=pairwise_dist,
# sample_weight=classwise_covs
# )
labeling = np.asarray(range(len(classwise_covs)))
labels = np.unique(labeling[labeling >= 0])
for label in labels:
w = classwise_covs[labeling == label]
aggregated_w = np.sum(w)
w_norm = w / aggregated_w
n = len(w)
w_max = np.max(w)
w_min = np.min(w)
b = classwise_bboxes[labeling == label]
mean_bbox = np.sum((b.T*w_norm).T, axis=0)
# Compute coefficient of variation of the box coords
mean_box_w = mean_bbox[2] - mean_bbox[0]
mean_box_h = mean_bbox[3] - mean_bbox[1]
bbox_area = mean_box_w * mean_box_h
valid_box = aggregated_w > cw_config['dbscan_config']['dbscan_confidence_threshold'] \
and mean_box_h > cw_config['minimum_bounding_box_height']
if valid_box:
clustered_boxes.append(
KittiBbox(
self.classes[class_idx], 0, 0, 0,
mean_bbox, 0, 0, 0, 0,
0, 0, 0, confidence_score=aggregated_w
)
)
else:
continue
imagewise_boxes.extend(clustered_boxes)
batchwise_boxes.append(imagewise_boxes)
if render:
processes = []
with pool_context(self.batch_size) as pool:
for image_idx in range(self.batch_size):
current_idx = (this_id - 1) * self.batch_size + image_idx
if current_idx >= len(self.frames):
break
current_frame = self.frames[current_idx]
filename = os.path.basename('tmp.png')
output_label_file = os.path.join(
self.output_path, "infer_labels",
"{}.txt".format(os.path.splitext(filename)[0])
)
output_image_file = os.path.join(
self.output_path, "infer_images",
"{}.jpg".format(os.path.splitext(filename)[0])
)
if not os.path.exists(os.path.dirname(output_label_file)):
os.makedirs(os.path.dirname(output_label_file))
if not os.path.exists(os.path.dirname(output_image_file)):
os.makedirs(os.path.dirname(output_image_file))
processes.append(
pool.apply_async(
write_kitti_annotation, (output_label_file, batchwise_boxes[image_idx])
)
)
processes.append(
pool.apply_async(
render_image,
(current_frame, batchwise_boxes[image_idx],
output_image_file, self.box_color,
self.linewidth)
)
)
for p in processes:
p.wait()