Issue in LPD while running with custom python script

Hi @Morganh,

I am trying to run LPD model with custom python script and passing car image which contains single number plate but with script I am getting 1200 number plate.

Please find the image ans script below:

Script :

import os
import time

import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil

trt_engine_path = "/NumberPlateDetection/number_plate_detection_b8_int8.engine"

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem): = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str( + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data =
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine, batch_size=1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream

def do_model_1_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device,, stream) for inp in inputs]
    # Run inference.
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(, out.device, stream) for out in outputs]
    # Synchronize the stream
    # Return only the host outputs.
    return [ for out in outputs]

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # NPR input shape
    input_shape = (3,480,640)
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context

inputs_model_1, outputs_model_1, bindings_model_1, stream_model_1, context_model_1 = model_loading(trt_engine_path)

def execute_model_1(image):
    np.copyto(inputs_model_1[0].host, image.ravel())
    outputs = do_model_1_inference(context_model_1, bindings=bindings_model_1, inputs=inputs_model_1, outputs=outputs_model_1, stream=stream_model_1)
    return outputs

image_path = "/home/smarg/Documents/Pritam/VEHICLE-INFER/DATA/INPUT-IMAGE/HYD/Test22---3_HR03W5626.jpg"
image = [cv2.imread(image_path)]
image = np.array([(cv2.resize(img, ( 480 , 640 )))/ 255.0 for img in image], dtype=np.float32)
output = execute_model_1(image)

print("output[0] : ",len(output[0]))
print("output[1] : ",len(output[1]))

print("Output : ",output)

Output :

output[0] :  4800
output[1] :  1200
Output :  [array([0.30908203, 0.1352539 , 0.11230469, ..., 0.69091797, 0.67041016,
       0.6381836 ], dtype=float32), array([5.9301865e-08, 2.7535691e-05, 8.1396920e-06, ..., 8.6465229e-07,
       1.8020886e-06, 1.2501529e-09], dtype=float32)]

Please let me know where is the mistake and what would be the post processing steps.


Officially, please see Integrating TAO CV Models with Triton Inference Server — TAO Toolkit 3.0 documentation and then leverage tao-toolkit-triton-apps/ at main · NVIDIA-AI-IOT/tao-toolkit-triton-apps · GitHub and tao-toolkit-triton-apps/ at fc7e222c036354498e53a8ed11b5cf7c0a3e5239 · NVIDIA-AI-IOT/tao-toolkit-triton-apps · GitHub

Also, there are similar topics shared by other customers in TAO fourm.

Thanks @Morganh.