LPRNet with TensorRT

I’m working on running the LPRNet with TensorRT. I am able to run my model but am having troubles understanding the output. How can I decode the output of this model?

I used the TAO toolkit tool create my engine file.

# Export in FP32 mode. 
!mkdir -p $LOCAL_EXPERIMENT_DIR/export 
!tao lprnet export --gpu_index=$GPU_INDEX -m $USER_EXPERIMENT_DIR/experiment_dir_unpruned/weights/lprnet_epoch-24.tlt \
                   -k $KEY \
                   -e $SPECS_DIR/tutorial_spec.txt \
                   -o $USER_EXPERIMENT_DIR/export/lprnet_epoch-24.etlt \
                   --data_type fp32 \
                   --engine_file $USER_EXPERIMENT_DIR/export/lprnet_epoch-24.engine

Below is the code I am using for inferencing on the engine file.

import os
import time

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
    input_shape = (1,3,48,96)
    context.set_binding_shape(0, input_shape)
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    # Return only the host outputs.
    return [out.host for out in outputs]

# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_engine_path = "lprnet/data/tao-experiments/lprnet/export/lprnet_epoch-24.engine"

trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
# Execution context is needed for inference
context = trt_engine.create_execution_context()
# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)

image = cv2.imread("lprnet/data/tao-experiments/data/openalpr/val/image/car12.jpg")
image = cv2.resize(image, (96, 48))

np.copyto(inputs[0].host, image.ravel())

outputs1 = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)


Output and image being inferred:

When I ran this in the TAO Toolkit, the output of this image was “5UVR090”

Please help me make sense of the model output?

Thank you!

We recommend you to raise this query in TLT forum for better assistance.


Here was a solution I found in case this helps someone else:

Working Code

import os
import time

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
    #input_shape = (1,3,48,96)
    #context.set_binding_shape(0, input_shape)
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
            outputs.append(HostDeviceMem(host_mem, device_mem))
            print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    # Return only the host outputs.
    return [out.host for out in outputs]

# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_engine_path = "lprnet/data/tao-experiments/lprnet/export/lprnet_epoch-24.engine"

trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
# Execution context is needed for inference
context = trt_engine.create_execution_context()
# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
print("Inputs: " + str(inputs))

#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car1.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car12.jpg"
imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car19.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car21.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car3.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car7.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car8.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car18.jpg"
image = cv2.imread(imageFile)

image = [cv2.imread(imageFile)]
newImage = np.array([(cv2.resize(img, ( 96 , 48 )))/ 255.0 for img in image], dtype=np.float32)
newImage= newImage.transpose( 0 , 3 , 1 , 2 )

input_shape = (1,3,48,96)
context.set_binding_shape(0, input_shape)

np.copyto(inputs[0].host, newImage.ravel())
modelOutputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
print(modelOutputs )

Use this to decode the output:

0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
A 10
B 11
C 12
D 13
E 14
F 15
G 16
H 17
I 18
J 19
K 20
L 21
M 22
N 23

P 24
Q 25
R 26
S 27
T 28
U 29
V 30
W 31
X 32
Y 33
Z 34
1 Like