hello,
I am trying to run object detection using tensorrt using resnet10.caffemodel_b1_int8.engine file. but I am getting always same value of h_output[0] with different different image.
below i am attaching the codebase.
import tensorrt as trt
import os
import sys
import cv2
import time
import ctypes
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
labels = [‘Car’,‘Person’,‘Bicycle’,‘Roadsign’]
# initialize
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, ‘’)
runtime = trt.Runtime(TRT_LOGGER)
with open(“resnet10.caffemodel_b1_int8.engine”, ‘rb’) as f:
** buf = f.read()**
** engine = runtime.deserialize_cuda_engine(buf)**
# create buffer
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
** size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size**
** print("Binding shape is : ",engine.get_binding_shape(binding))**
** # host_mem = cuda.pagelocked_empty(size, np.int8)**
** host_mem = cuda.pagelocked_empty(size, np.float32)**
** cuda_mem = cuda.mem_alloc(host_mem.nbytes)**
** bindings.append(int(cuda_mem))**
** if engine.binding_is_input(binding):**
** host_inputs.append(host_mem)**
** cuda_inputs.append(cuda_mem)**
** else:**
** host_outputs.append(host_mem)**
** cuda_outputs.append(cuda_mem)**
context = engine.create_execution_context()
#TODO enable video pipeline
#TODO using pyCUDA for preprocess
ori = cv2.imread(“sample_720p.jpg”)
image = cv2.cvtColor(ori, cv2.COLOR_BGR2RGB)
# image = cv2.resize(image, (model.dims[2],model.dims[1]))
image = cv2.resize(image, (640,368))
image = image.astype(np.int8)
# print("Output shape of the model is : ",engine.output_shape)
# image = cv2.resize(image, (1280,720))
image = (2.0/255.0) * image - 1.0
image = image.transpose((2, 0, 1))
print("image_ravel_valus is : ",image.ravel())
print("value of host_inputs is : ",host_inputs[0])
# image = np.uint8(image)
np.copyto(host_inputs[0], image.ravel())
start_time = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
print("execute times "+str(time.time()-start_time))
output = host_outputs[0]
height, width, channels = ori.shape
print("output is : ",len(output))
model_layout = 7
for i in range(int(len(output)/model_layout)):
** prefix = i*model_layout**
** index = int(output[prefix+0])**
** label = int(output[prefix+1])**
** conf = output[prefix+2]**
** xmin = int(output[prefix+3]width)*
** ymin = int(output[prefix+4]height)*
** xmax = int(output[prefix+5]width)*
** ymax = int(output[prefix+6]height)*
** print("index : ",index)**
** print("label : ",label)**
** print("conf : ",conf)**
** print(“xmin , ymin, xmax, ymax”, xmin,ymin,xmax,ymax)**
** if conf > 0.7:**
** print(“Detected {} with confidence {}”.format(labels[label], “{0:.0%}”.format(conf)))**
** cv2.rectangle(ori, (xmin,ymin), (xmax, ymax), (0,0,255),3)**
** cv2.putText(ori, labels[label],(xmin+10,ymin+10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)**
cv2.imwrite(“result.jpg”, ori)
cv2.imshow(“result”, ori)
cv2.waitKey(0)
please help me where i am wrong.
an if there is another tensorrt sample by which i can run object detection using .engine file used in DS and .trt file generated by Transfer learning toolkit please let me know.
Thanks.