Unable to batch TensorRT

yoshifumi_watanabe_aa · March 23, 2021, 7:49am

I want to modify onnx_resnet50.py in tensorrt / samples / python / introductory_parser_samples to allow batch processing. The process was executed with the following program.

By the way, batch processing is set to 100 pieces.

The model is converted to onnx with python3 -m tf2onnx.convert --saved-model /home/effi_colab/effi_saved_model --output effi_saved_model.onnx --inputs input_1: 0 [1,224,224,3]. In addition, the output of the model is softmax and there are two types, OK and NG.

This sample uses an ONNX ResNet50 Model to create a TensorRT Inference Engine

import random
from PIL import Image
import numpy as np

import pycuda.driver as cuda

This import causes pycuda to automatically manage CUDA context creation and cleanup.

import pycuda.autoinit

import tensorrt as trt

import sys, os, glob
sys.path.insert(1, os.path.join(sys.path[0], “…”))
import common

import time

class ModelData(object):
#MODEL_PATH = “ResNet50.onnx”
#INPUT_SHAPE = (3, 224, 224)
MODEL_PATH = “effi_saved_model.onnx”
INPUT_SHAPE = (3, 224, 224)
# We can convert TensorRT data types to numpy types with trt.nptype()
DTYPE = trt.float32

You can set the logger severity higher to suppress messages (or lower to display more messages).

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

The Onnx path is used for Onnx models.

def build_engine_onnx(model_file):
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser ,builder.create_builder_config() as config:
builder.max_workspace_size = common.GiB(1)
    builder.max_batch_size=100
    config = builder.create_builder_config()
    profile = builder.create_optimization_profile()
    profile.set_shape('input_1:0',(1,224,224,3),(1,224,224,3),(1,224,224,3))
    config.add_optimization_profile(profile)

    # Load the Onnx model and parse it in order to populate the TensorRT network.
    with open(model_file, 'rb') as model:
        if not parser.parse(model.read()):
            print ('ERROR: Failed to parse the ONNX file.')
            for error in range(parser.num_errors):
                print (parser.get_error(error))
            return None       
    return builder.build_cuda_engine(network)
def load_normalized_test_case(test_image, pagelocked_buffer):
# Converts the input image to a CHW Numpy array
def normalize_image(image):
# Resize, antialias and transpose the image to CHW.
c, h, w = ModelData.INPUT_SHAPE
image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
    # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
    #return (image_arr / 255.0 - 0.45) / 0.225
    return image_arr

# Normalize the image and copy to pagelocked memory.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
def img_load(pagelocked_buffer):
X=
c, h, w = ModelData.INPUT_SHAPE
for f in glob.glob(‘/home/effi_study_demo/val/*.png’):
image=Image.open(f)
c, h, w = ModelData.INPUT_SHAPE
image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
X.extend(image_arr)
#print(X)
np.copyto(pagelocked_buffer, np.asarray(X))

def main():
# Set the data path to the directory that contains the trained models and test images for inference.
#_, data_files = common.find_sample_data(description=“Runs a ResNet50 network with a TensorRT inference engine.”, subfolder=“efficientnet”, find_files=[“binoculars.jpeg”, “reflex_camera.jpeg”, “tabby_tiger_cat.jpg”, ModelData.MODEL_PATH, “class_labels.txt”])
_, data_files = common.find_sample_data(description=“Runs a ResNet50 network with a TensorRT inference engine.”, subfolder=“efficientnet2”, find_files=[“T1TBB420C183001_R_3_9_001.png”, “T1TBB420C183001_R_3_9_001.png”, “T1TBB420C183001_R_3_9_001.png”, ModelData.MODEL_PATH, “class_labels.txt”])
#print(‘\ndata_files:\n{}’.format(data_files))
# Get test images, models and labels.
#test_images = data_files[0:3]
onnx_model_file, labels_file = data_files[3:]
labels = open(labels_file, 'r').read().split('\n')

# Build a TensorRT engine.
with build_engine_onnx(onnx_model_file) as engine:
    # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
    # Allocate buffers and create a CUDA stream.
    
    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
    print('inputs:{}'.format(inputs))
    print('outputs:{}'.format(outputs))
    print('bindings:{}'.format(bindings))
    print('stream:{}'.format(stream))

    print('engine.get_binding_shape(0):{}'.format(engine.get_binding_shape(0)))

    # Contexts are used to perform inference.
    with engine.create_execution_context() as context:
        # Load a normalized test case into the host input page-locked buffer.

        ####context.set_optimization_profile_async(0,stream)
        ####context.set_binding_shape(0,(244,244,3))
        print('context:{}'.format(context))

        #test_image = random.choice(test_images)
        #test_case = load_normalized_test_case(test_image, inputs[0].host)
        img_load(inputs[0].host)

        # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
        # probability that the image corresponds to that label

        start = time.time()
        trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=100)
        print('\ntrt_outputs:\n{}'.format(trt_outputs[0]))
            
        # We use the highest probability as our prediction. Its index corresponds to the predicted label.
        trt_outputs = trt_outputs[0]
        for ii in range(100):
            pred = labels[np.argmax(trt_outputs[ii:ii+2])]
            print('count:{0} pred:{1}'.format(ii,pred))
            ii = ii*2
            
        #if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
            #print("Correctly recognized " + test_case + " as " + pred)
            #print("Correctly recognized " + " as " + pred)
        #else:
            #print("Incorrectly recognized " + test_case + " as " + pred)
            #print("Incorrectly recognized " + " as " + pred)

        elapsed_time = time.time() - start
        print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")
if name == ‘main’:
main()

The result is as follows, but it seems that only one is judged.
Is there anything wrong with the program?

root@0681adfc9360:/home/tensorrt/samples/python/introductory_parser_samples# python3 onnx_effi2_1.py
1
[TensorRT] WARNING: onnx2trt_utils.cpp:220: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
binding:input_1:0
engine.max_batch_size:100
size:15052800
binding:Identity:0
engine.max_batch_size:100
size:200
inputs:[Host:
[0. 0. 0. … 0. 0. 0.]
Device:
<pycuda._driver.DeviceAllocation object at 0x7f70146350>]
outputs:[Host:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0.]
Device:
<pycuda._driver.DeviceAllocation object at 0x7f70146440>]
bindings:[8974196736, 8901396480]
stream:<pycuda._driver.Stream object at 0x7f701485e0>
engine.get_binding_shape(0):(1, 224, 224, 3)
context:<tensorrt.tensorrt.IExecutionContext object at 0x7f701484c8>

trt_outputs:
[0.9023158 0.09768426 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. 0. ]
count:0 pred:NG
count:1 pred:NG
count:2 pred:NG
count:3 pred:NG
count:4 pred:NG
count:5 pred:NG
count:6 pred:NG
count:7 pred:NG
count:8 pred:NG
count:9 pred:NG
count:10 pred:NG
count:11 pred:NG
count:12 pred:NG
count:13 pred:NG
count:14 pred:NG
count:15 pred:NG
count:16 pred:NG
count:17 pred:NG
count:18 pred:NG
count:19 pred:NG
count:20 pred:NG
count:21 pred:NG
count:22 pred:NG
count:23 pred:NG
count:24 pred:NG
count:25 pred:NG
count:26 pred:NG
count:27 pred:NG
count:28 pred:NG
count:29 pred:NG
count:30 pred:NG
count:31 pred:NG
count:32 pred:NG
count:33 pred:NG
count:34 pred:NG
count:35 pred:NG
count:36 pred:NG
count:37 pred:NG
count:38 pred:NG
count:39 pred:NG
count:40 pred:NG
count:41 pred:NG
count:42 pred:NG
count:43 pred:NG
count:44 pred:NG
count:45 pred:NG
count:46 pred:NG
count:47 pred:NG
count:48 pred:NG
count:49 pred:NG
count:50 pred:NG
count:51 pred:NG
count:52 pred:NG
count:53 pred:NG
count:54 pred:NG
count:55 pred:NG
count:56 pred:NG
count:57 pred:NG
count:58 pred:NG
count:59 pred:NG
count:60 pred:NG
count:61 pred:NG
count:62 pred:NG
count:63 pred:NG
count:64 pred:NG
count:65 pred:NG
count:66 pred:NG
count:67 pred:NG
count:68 pred:NG
count:69 pred:NG
count:70 pred:NG
count:71 pred:NG
count:72 pred:NG
count:73 pred:NG
count:74 pred:NG
count:75 pred:NG
count:76 pred:NG
count:77 pred:NG
count:78 pred:NG
count:79 pred:NG
count:80 pred:NG
count:81 pred:NG
count:82 pred:NG
count:83 pred:NG
count:84 pred:NG
count:85 pred:NG
count:86 pred:NG
count:87 pred:NG
count:88 pred:NG
count:89 pred:NG
count:90 pred:NG
count:91 pred:NG
count:92 pred:NG
count:93 pred:NG
count:94 pred:NG
count:95 pred:NG
count:96 pred:NG
count:97 pred:NG
count:98 pred:NG
count:99 pred:NG
elapsed_time:0.07502055168151855[sec]

Topic		Replies	Views
Batch Inference Wrong in Python API TensorRT	15	3692	October 12, 2021
Tensorrt inference with batch > 1 TensorRT	4	1477	October 13, 2022
Work with batch in TensorRT TensorRT tensorrt , opencv , cuda , tensorflow	20	4100	July 20, 2021
TensorRT runtime batch processing in C++ TensorRT tensorrt	5	1688	September 8, 2021
Question about Python tutorial TensorRT	3	596	October 12, 2021
A problem of batchsize when convert from onnx to engine file General Topics and Other SDKs tensorrt	1	424	December 6, 2021
Creating a TensorRT Engine with different batch sizes TensorRT python , onnx	12	2970	August 18, 2020
TensorRT Batch Inference: different results TensorRT	4	4414	December 1, 2021
TenorRT with python: execution return zeros if batch_size > 1 TensorRT	1	865	November 20, 2020
Dynamic batch Tensor-RT inference output is incorrect TensorRT tensorrt , python	2	1404	May 25, 2023

Unable to batch TensorRT

This sample uses an ONNX ResNet50 Model to create a TensorRT Inference Engine

This import causes pycuda to automatically manage CUDA context creation and cleanup.

You can set the logger severity higher to suppress messages (or lower to display more messages).

The Onnx path is used for Onnx models.

Related topics