LPRNet with TensorRT

afisk · September 16, 2021, 7:59pm

Hi
I’m working on running the LPRNet with TensorRT. I am able to run my model but am having troubles understanding the output. How can I decode the output of this model?

I used the TAO toolkit tool create my engine file.


# Export in FP32 mode. 
!mkdir -p $LOCAL_EXPERIMENT_DIR/export 
!tao lprnet export --gpu_index=$GPU_INDEX -m $USER_EXPERIMENT_DIR/experiment_dir_unpruned/weights/lprnet_epoch-24.tlt \
                   -k $KEY \
                   -e $SPECS_DIR/tutorial_spec.txt \
                   -o $USER_EXPERIMENT_DIR/export/lprnet_epoch-24.etlt \
                   --data_type fp32 \
                   --engine_file $USER_EXPERIMENT_DIR/export/lprnet_epoch-24.engine

Below is the code I am using for inferencing on the engine file.


import os
import time

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
    input_shape = (1,3,48,96)
    context.set_binding_shape(0, input_shape)
    
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream



def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_engine_path = "lprnet/data/tao-experiments/lprnet/export/lprnet_epoch-24.engine"

trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
# Execution context is needed for inference
context = trt_engine.create_execution_context()
# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)

image = cv2.imread("lprnet/data/tao-experiments/data/openalpr/val/image/car12.jpg")
image = cv2.resize(image, (96, 48))

np.copyto(inputs[0].host, image.ravel())

outputs1 = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
print(outputs1)

plt.imshow(image)

Output and image being inferred:

When I ran this in the TAO Toolkit, the output of this image was “5UVR090”

Please help me make sense of the model output?

Thank you!

spolisetty · September 17, 2021, 10:30am

Hi,
We recommend you to raise this query in TLT forum for better assistance.

Thanks!

afisk · September 17, 2021, 1:39pm

Here was a solution I found in case this helps someone else:

Working Code



import os
import time

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
    #input_shape = (1,3,48,96)
    #context.set_binding_shape(0, input_shape)
    
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream



def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

# TensorRT logger singleton
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_engine_path = "lprnet/data/tao-experiments/lprnet/export/lprnet_epoch-24.engine"

trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
# Execution context is needed for inference
context = trt_engine.create_execution_context()
# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
print("Inputs: " + str(inputs))


#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car1.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car12.jpg"
imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car19.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car21.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car3.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car7.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car8.jpg"
#imageFile = "lprnet/data/tao-experiments/data/openalpr/val/image/car18.jpg"
image = cv2.imread(imageFile)
plt.imshow(image)


image = [cv2.imread(imageFile)]
newImage = np.array([(cv2.resize(img, ( 96 , 48 )))/ 255.0 for img in image], dtype=np.float32)
newImage= newImage.transpose( 0 , 3 , 1 , 2 )

input_shape = (1,3,48,96)
context.set_binding_shape(0, input_shape)

np.copyto(inputs[0].host, newImage.ravel())
modelOutputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
print(modelOutputs )

Use this to decode the output:

Topic		Replies	Views
Running nvidia pretrained models in Tensorrt inference TAO Toolkit	14	892	October 6, 2022
Very bad result on tlt mobilenetv2 tensorrt TensorRT	5	1034	January 5, 2022
Falure to do inference TAO Toolkit tensorrt	9	1068	January 11, 2022
Not Getting Correct output while running inference using TensorRT on LPRnet fp16 Model TAO Toolkit	6	674	July 22, 2021
Issue in LPD while running with custom python script TAO Toolkit	3	1050	December 11, 2021
Tensorrt lpdnet output Jetson Nano jetson-inference	4	1448	September 12, 2021
Cannot use TensorRT model exported by NVIDIA TAO TAO Toolkit	8	1089	May 17, 2022
Help on python Tensorrt Inference for yolov4_tiny model trained on custom dataset TAO Toolkit tensorrt , yolo , tao	3	331	March 25, 2024
Tensorrt Batch Inference TensorRT tensorrt	8	1561	December 1, 2020
TensoRT export of PoseNet batch size problems TAO Toolkit tensorrt	7	671	April 20, 2023

LPRNet with TensorRT

Output and image being inferred:

Working Code

Use this to decode the output:

Related topics