Hello!
In my project, I use a trained resnet18 network. Via transfer learning toolkit.
But I need to use it without using deepstream.
How do I convert the output tensor (host_outpurs) to format a box [x_min, x_max, y_min, y_max] and class ID.
My code:
import cv2
import time
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import sys
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
runtime = trt.Runtime(TRT_LOGGER)
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
# create engine
with open('/opt/nvidia/deepstream/deepstream-5.0/samples/models/detectnet_v2/resnet18.engine', 'rb') as f:
buf = f.read()
engine = runtime.deserialize_cuda_engine(buf)
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(cuda_mem))
if engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
context = engine.create_execution_context()
# inference
ori = cv2.imread(sys.argv[1])
image = cv2.cvtColor(ori, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (512, 512))
image = (2.0/255.0) * image - 1.0
image = image.transpose((2, 0, 1))
np.copyto(host_inputs[0], image.ravel())
start_time = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
print("execute times "+str(time.time()-start_time))