Error while running inference, model generated through TLT using Opencv-Python

Hi,

I am getting the error while running inference using custom python script.

Below is the Code :

import os
import time

import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil

NPR_LABEL = {'0-15','16-35','36-55','55+'}
input_shape = (3,224,224)

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# def allocate_buffers(engine, batch_size=-1):
def allocate_buffers(engine, batch_size=4):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream



def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

def post_processing(label_ids):
    # iterate label using label ids
    number = ''
    i = 0
    for label in label_ids[0]:
        if str(label) != '35':
            number = number + NPR_LABEL[str(label)]
            print("Number : {}  Accuracy : {}".format(NPR_LABEL[str(label)],label_ids[1][i]))
        i += 1
    return number

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # NPR input shape
    # input_shape = (3,48,96)
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context

trt_engine_path = "/home/smarg/Documents/Pritam/AGE-GROUP-MODEL-ANALYSIS/MODELS/Age_Epoch_117_b4_FP16_224_224.engine"
inputs, outputs, bindings, stream, context = model_loading(trt_engine_path)


# pdb.set_trace()
# image = [cv2.imread("/home/smarg/Downloads/Images/resized/img/IMG_20210719_160022_cropped_batch_code_image_imgGB3_BATO007_.jpg")]

# Run inference on folder 
image_folder_path = "/home/smarg/Documents/Pritam/AGE-GROUP-MODEL-ANALYSIS/INPUT-IMAGES/gender_image_9Aug/gender_image/"
# output_folder_path = "/home/smarg/Documents/Pritam/TRT-INFER-NPR/OutPutImg/Cropped/"
image_count = 0
start_time = datetime.datetime.now()
for image_path in glob.glob(image_folder_path + "*.jpg"):
    
    print("Image name :",image_path)
    image = [cv2.imread(image_path)]

    image = np.array([(cv2.resize(img, ( 224 , 224 )))/ 255.0 for img in image], dtype=np.float32)
    image= image.transpose( 0 , 3 , 1 , 2 )

    np.copyto(inputs[0].host, image.ravel())

    output = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    # number = post_processing(output)
    image_count += 1
    # shutil.copy(image_path,output_folder_path+str(image_count)+"_"+number+".jpg")

end_time = datetime.datetime.now()
total_time = end_time - start_time

print("Total image processed : {} Total Time : {} ".format(image_count,total_time))

Error :

Traceback (most recent call last):
  File "inference_trt_age_classification.py", line 128, in <module>
    np.copyto(inputs[0].host, image.ravel())
  File "<__array_function__ internals>", line 6, in copyto
ValueError: could not broadcast input array from shape (150528) into shape (602112)

Can someone please suggest where I am wrong ?

Thanks.

1 Like

Could you try

def allocate_buffers(engine, batch_size=1)

Thanks @Morganh.