I started implementing this file, however without the engine creation as I already created one with ./tlt-converter:
I’ve converted it to look like this atm:
import os
import cv2
import sys
import time
import tensorrt as trt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
initialize
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, ‘’)
runtime = trt.Runtime(TRT_LOGGER)
create engine
with open(‘resnet10_fp16_0_5.engine’, ‘rb’) as f:
buf = f.read()
engine = runtime.deserialize_cuda_engine(buf)
create buffer
host_inputs =
cuda_inputs =
host_outputs =
cuda_outputs =
bindings =
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float16)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(cuda_mem))
if engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
context = engine.create_execution_context()
inference
ori = cv2.imread(sys.argv[1])
image = cv2.cvtColor(ori, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (720,1280))
image = (2.0/255.0) * image - 1.0
image = image.transpose((2, 0, 1))
np.copyto(host_inputs[0], image.ravel())
and when i run: python3 thisFile.py image.jpg
i get this:
[TensorRT] INFO: Glob Size is 2190768 bytes.
[TensorRT] INFO: Added linear block of size 176947200
[TensorRT] INFO: Added linear block of size 117964800
[TensorRT] INFO: Added linear block of size 44236800
[TensorRT] INFO: Deserialize required 2796280 microseconds.
Traceback (most recent call last):
File “testTensorRT.py”, line 49, in
np.copyto(host_inputs[0], image.ravel())
File “<array_function internals>”, line 6, in copyto
ValueError: could not broadcast input array from shape (2764800) into shape (44236800)