Description
This is a very basic question. When I run the attached source code, it takes time to convert the ONNX model to a TensorRT model every time. How can I eliminate the time it takes to convert to TensorRT? I’m implementing it while looking at the sample source code below, but I don’t understand.
For example, can I use the model.trt generated by running trtexec instead of ONNX?
Environment
TensorRT Version: 10.3.0.30
GPU Type: NVIDIA Jetson Orin NX 8GB(VIA AMOS-9100)
Nvidia Driver Version: JetPack 6.1?
CUDA Version: 12.6.68
CUDNN Version: 9.3.0.75
Operating System + Version: JetPack 6.1 [L4T 36.4.0]
Python Version (if applicable): 3.10.12
TensorFlow Version (if applicable): None
PyTorch Version (if applicable): None
Baremetal or Container (if container which image + tag): Baremetal
Relevant Files
import tensorrt as trt
import cv2
import numpy as np
import common
ENGINE_FILE_PATH = "/home/via/sandbox/python/segmentation/model.trt"
ONNX_FILE_PATH = "/home/via/sandbox/python/segmentation/model.onnx"
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
class ModelData(object):
MODEL_PATH = ONNX_FILE_PATH
INPUT_SHAPE = (3, 288, 288)
# We can convert TensorRT data types to numpy types with trt.nptype()
DTYPE = trt.float32
# The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(0)
config = builder.create_builder_config()
parser = trt.OnnxParser(network, TRT_LOGGER)
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))
# Load the Onnx model and parse it in order to populate the TensorRT network.
with open(model_file, "rb") as model:
if not parser.parse(model.read()):
print("ERROR: Failed to parse the ONNX file.")
for error in range(parser.num_errors):
print(parser.get_error(error))
return None
engine_bytes = builder.build_serialized_network(network, config)
runtime = trt.Runtime(TRT_LOGGER)
return runtime.deserialize_cuda_engine(engine_bytes)
def get_input_image_tensor():
# PreProcess
bgr_image = cv2.imread("./dog.jpg")
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
height, width, channel = rgb_image.shape
size = min(height, width)
top = int((height - size) / 2)
left = int((width - size) / 2)
bottom = top + size
right = left + size
crop_img = rgb_image[top:bottom, left:right]
rgb_ds = cv2.resize(crop_img,(288, 288))
rgb_nchw = np.transpose(rgb_ds, (2, 0, 1))
rgb_nchw = (rgb_nchw / 128.0) - 1.0
rgb_batch = rgb_nchw[np.newaxis,:]
return rgb_batch
def main():
onnx_model_file = ONNX_FILE_PATH
engine = build_engine_onnx(onnx_model_file)
inputs, outputs, bindings, stream = common.allocate_buffers(engine)
context = engine.create_execution_context()
input_tensor = get_input_image_tensor()
inputs[0].host = np.array(input_tensor, dtype='<f4')
trt_outputs = common.do_inference(
context,
engine=engine,
bindings=bindings,
inputs=inputs,
outputs=outputs,
stream=stream,
)
print(trt_outputs)
quit()
if __name__ == "__main__":
main()
Steps To Reproduce
The problem occurs when you unpack the above tar.gz file, go to “sandbox/python/segmentation”, and run “python trt_resnet.py”. common.py and common_runtime.py were copied from the sample source code below.