Hi,
I am working with RepVGG classification network recently, and I want to integrate the classification model as a secondary GIE in DeepStream. The pipeline I am using is a primary GIE for person detection, and a secondary GIE to classify whether the person is smoking.
I converted the original RepVGG PyTorch model to onnx and then to tensorrt. I checked the result by using tensorrt python api, which worked fine, it produced good result. But when I integrated the model into DeepStream, it produced bad result. I checked the image preprocess, it only contains resize and normalization which is divided by 255.
I can not figure out where els may cause this inaccuray.
Hope someone could help. Thanks
Here is my full system configuration:
• Hardware Platform (Jetson / GPU): Xavier NX
• DeepStream Version: 5.0
• JetPack Version (valid for Jetson only): 4.4
• TensorRT Version: 7.1.3
Here is the python api code I used for classification model test:
import os
import cv2
from PIL import Image
import numpy as np
INPUT_W = 224
INPUT_H = 448
CONF_THRESH = 0.5
IOU_THRESHOLD = 0.5
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import pycuda.driver as cuda
import time
class RepVGGTRT(object):
def __init__(self, engine_file_path):
# Create a Context on this device,
self.cfx = cuda.Device(0).make_context()
stream = cuda.Stream()
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
runtime = trt.Runtime(TRT_LOGGER)
# Deserialize the engine from file
with open(engine_file_path, "rb") as f:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(cuda_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
# Store
self.stream = stream
self.context = context
self.engine = engine
self.host_inputs = host_inputs
self.cuda_inputs = cuda_inputs
self.host_outputs = host_outputs
self.cuda_outputs = cuda_outputs
self.bindings = bindings
def infer(self, frame):
# Make self the active context, pushing it on top of the context stack.
self.cfx.push()
# Restore
stream = self.stream
context = self.context
engine = self.engine
host_inputs = self.host_inputs
cuda_inputs = self.cuda_inputs
host_outputs = self.host_outputs
cuda_outputs = self.cuda_outputs
bindings = self.bindings
# Do image preprocess
input_image, image_raw, origin_h, origin_w = self.preprocess_image(frame)
np.copyto(host_inputs[0], input_image.ravel())
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
self.cfx.pop()
output = host_outputs[0]
return output
def destroy(self):
# Remove any context from the top of the context stack, deactivating it.
self.cfx.pop()
def preprocess_image(self, image_raw):
"""
description: Read an image from image path, convert it to RGB,
resize and pad it to target size, normalize to [0,1],
transform to NCHW format.
param:
image_raw: image array
return:
image: the processed image
image_raw: the original image
h: original height
w: original width
"""
h, w, c = image_raw.shape
# image = image_raw
image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
# Calculate widht and height and paddings
r_w = INPUT_W / w
r_h = INPUT_H / h
if r_h > r_w:
tw = INPUT_W
th = int(r_w * h)
tx1 = tx2 = 0
ty1 = int((INPUT_H - th) / 2)
ty2 = INPUT_H - th - ty1
else:
tw = int(r_h * w)
th = INPUT_H
tx1 = int((INPUT_W - tw) / 2)
tx2 = INPUT_W - tw - tx1
ty1 = ty2 = 0
# Resize the image with long side while maintaining ratio
image = cv2.resize(image, (tw, th))
# Pad the short side with (128,128,128)
image = cv2.copyMakeBorder(
image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
)
image = image.astype(np.float32)
# Normalize to [0,1]
image /= 255.0
image = np.transpose(image, [2, 0, 1])
image = np.expand_dims(image, axis=0)
image = np.ascontiguousarray(image)
return image, image_raw, h, w
cls_trt = RepVGGTRT('RepVGG-A2.engine')
def main(input_dir):
names = []
labels = []
topk_ids = []
probs_all = []
for root, dirs, files in os.walk(input_dir):
for image_name in files:
if os.path.basename(root) == 'normal':
labels.append(0)
else:
labels.append(1)
names.append(image_name)
input = cv2.imread(os.path.join(root, image_name))
probs = cls_trt.infer(input)
probs_all.append(probs)
topk = np.where(probs[1] > CONF_THRESH , 1, 0)
topk_ids.append([topk])
topk_ids = np.concatenate(topk_ids, axis=0)
probs_all = np.concatenate(probs_all, axis=0)
with open(os.path.join('./topk_ids.csv'), 'w') as out_file:
for name, label, cls, prob in zip(names, labels, topk_ids, probs_all):
out_file.write('{0},{1},{2}\n'.format(
name, cls, label))
if __name__ == '__main__':
main('/opt/mot_test_videos/smoke_person')
Here is the secondary gie configuration I used:
[property]
gpu-id=0
net-scale-factor=0.0039215697906911373
labelfile-path=./labels_smoking.txt
force-implicit-batch-dim=1
batch-size=4
model-color-format=0
network-mode=2
process-mode=2
is-classifier=1
output-blob-names=prob
classifier-async-mode=0
input-object-min-width=64
input-object-min-height=64
operate-on-gie-id=1
operate-on-class-ids=0
classifier-threshold=0.5