Description
Hello all
I trained yolov3-tiny model with custom class. When i run with deepstream-app objectDection-Yolo very well.
- Weight: yolov3-tiny.weights (33.1 MB)
- Cfg: yolov3-tiny.cfg (1.9 KB)
- Labels: labels.txt (42 Bytes)
- Engine: model_b1_gpu0_fp32.engine (90.9 MB)
Good result predict with cv2.dnn :
But when i use engine encode by deepstream-app on tensorrt with python api the result not good.
Please help me
-
Image use for test:
-
Python code:
import ctypes
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
import numpy as np
import cv2
ctypes.CDLL("/opt/nvidia/deepstream/deepstream-5.0/sources/objectDetector_Yolo_test/nvdsinfer_custom_impl_Yolo/libnvdsinfer_custom_impl_Yolo.so")
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
def extract_boxes_confidences_classids(outputs, confidence, width, height):
boxes = []
confidences = []
classIDs = []
for output in outputs:
for detection in output:
# Extract the scores, classid, and the confidence of the prediction
scores = detection[5:]
classID = np.argmax(scores)
conf = scores[classID]
# Consider only the predictions that are above the confidence threshold
if conf > confidence:
# Scale the bounding box back to the size of the image
box = detection[0:4] * np.array([width, height, width, height])
centerX, centerY, w, h = box.astype('int')
# Use the center coordinates, width and height to get the coordinates of the top left corner
x = int(centerX - (w / 2))
y = int(centerY - (h / 2))
boxes.append([x, y, int(w), int(h)])
confidences.append(float(conf))
classIDs.append(classID)
return boxes, confidences, classIDs
with open("model_b1_gpu0_fp32.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
inputs, outputs, bindings, stream = allocate_buffers(engine)
imgRaw = cv2.imread('testimg/test.jpg')
imgResize = cv2.cvtColor(imgRaw, cv2.COLOR_BGR2RGB)
imgResize = cv2.resize(imgResize, (416, 416),interpolation=cv2.INTER_LINEAR)
imgResize = imgResize.astype(np.float32)
imgResize *= (1/255.0)
imgResize = np.transpose(imgResize, [2,0,1])
imgResize = np.expand_dims(imgResize, axis=0)
imgResize = np.array(imgResize, dtype=np.float32, order='C')
width, height, _ = imgRaw.shape
print(width, height, _)
inputs[0].host = imgResize
outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
# print(trt_outputs[0])
output_reshape = list()
for output in outputs:
len, = output.shape
output = np.reshape(output,(len//10,10))
output_reshape.append(output)
# Extract bounding boxes, confidences and classIDs
confidence = 0.5
boxes, confidences, classIDs = extract_boxes_confidences_classids(output_reshape, confidence, width, height)
print(boxes, confidences, classIDs)
Environment
TensorRT: 7.1.3
GPU Type: Jetson NX
Nvidia Driver Version:
CUDA Version: 10.2.89
CUDNN Version: 8.0.0.180
Operating System + Version: Jetpack 4.4.1
Deepstream: 5.0
Python Version (if applicable): 3.6.9