Environemnt for deepstream
• Hardware Platform: GPU
• DeepStream Version: 6.3
• TensorRT Version: 8.5.3-1+cuda11.8
• NVIDIA GPU Driver Version: 525.89.02
Environment for tritonserver
• Hardware Platform: GPU
• TensorRT Version: 8.5.1-1+cuda11.8
• NVIDIA GPU Driver Version: 525.89.02
-
File onnx to convert
This is the onnx file that I used to do inference for both tritonserver and deepstream. I used the onnx to .trt tool to run tritonserver and deepstream will automatically convert the onnx file to .engine file the first time I run deepstream
-
Command convert onnx → trt
/usr/src/tensorrt/bin/trtexec --onnx=convnext_base_w_checkpoint_448001.onnx \
--saveEngine=convnext_base_w_checkpoint_448001_fp16.trt \
--explicitBatch \
--minShapes=input:1x3x256x256 \
--optShapes=input:128x3x256x256 \
--maxShapes=input:128x3x256x256 \
--verbose \
--device=2 \
--fp16 \
--inputIOFormats=fp16:chw \
--outputIOFormats=fp16:chw
- Code tritonclient
import numpy as np
import cv2
import os
import requests
import tritonclient.grpc as grpcclient
from PIL import Image
from io import BytesIO
from base_triton_client import TritonBaseClient
class TritonCarBrandClient(TritonBaseClient):
def __init__(self,
triton_host="localhost:1000",
triton_model_name="car_brand_convnext_v9",
connection="GRPC",
preprocess_input_size=(256, 256),
max_batch_size=128,
subtraction=[0.4815, 0.458, 0.408],
std=[0.269, 0.261, 0.276],
classes_name="",
**kwargs):
super().__init__(triton_host, connection)
self.triton_model_name = triton_model_name
self.preprocess_input_size = preprocess_input_size
self.max_batch_size = max_batch_size
self.classes_name = classes_name
self.sub = subtraction
self.std = std
self.kwargs = kwargs
self.labels = self.mapping_label()
print(self.labels)
self.model_is_ready()
def model_is_ready(self):
"""
check model is ready to run
"""
if not self.model.is_model_ready(self.triton_model_name):
raise ConnectionAbortedError
def mapping_label(self):
# mapping int -> values
MAPPING_LABEL = {}
try:
with open(self.classes_name, "r") as f:
str_car_brand = f.readlines()[0]
list_car_brand = str_car_brand.split(";")[:-1]
for idx, car_brand in enumerate(list_car_brand):
MAPPING_LABEL[idx] = car_brand
except Exception as err:
MAPPING_LABEL = {}
return MAPPING_LABEL
def preprocess_image(self, images):
"""
Preprocess image car brand
"""
total_images = len(images)
batch_images = []
for i, image in enumerate(images):
if isinstance(image, str):
if os.path.exists(image):
image = cv2.imread(image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
elif image.startswith("http"):
response = requests.get(image)
if response.status_code != 200:
continue
else:
image_data = BytesIO(response.content)
image = Image.open(image_data)
image = np.array(image)
elif isinstance(image, np.ndarray):
image = image
else:
raise ValueError
height, width, _ = image.shape
image = cv2.resize(image, self.preprocess_input_size)
image = image.astype(np.float32)
image /= 255.0
image = (image - np.array(self.sub)) / np.array(self.std)
batch_images.append(image)
batch_images = np.array(batch_images, dtype=np.float16)
batch_images = batch_images.transpose((0, 3, 1, 2))
return batch_images
def postprocess_image(self, output):
"""
Postprocess image (use output from model to process continue)
"""
print(np.max(output, axis=1))
output = list(np.argmax(output, axis=1))
labels = [self.labels.get(item, "Unknown") for item in output]
return labels
def inference(self,
images,
meta_inputs = [('input', 'FP16')],
meta_outputs = [('output', 'FP16')]):
"""
Predict batch image
"""
total_images = len(images)
total_batch = int(total_images/self.max_batch_size) if total_images % self.max_batch_size == 0 else int(total_images/self.max_batch_size) + 1
predict = []
for iter_batch in range(total_batch):
inputs = []
outputs = []
lower = iter_batch * self.max_batch_size
higher = min((iter_batch + 1) * self.max_batch_size, total_images)
batch_preprocess = self.preprocess_image(images[lower:higher])
if self.connection == "GRPC":
for ix, input_tuple in enumerate(meta_inputs):
inputs.append(grpcclient.InferInput(input_tuple[0], batch_preprocess.shape, input_tuple[1])) # <name, shape, dtype>
inputs[ix].set_data_from_numpy(batch_preprocess)
for ix, output_tuple in enumerate(meta_outputs):
outputs.append(grpcclient.InferRequestedOutput(output_tuple[0]))
results = self.model.infer(
model_name=self.triton_model_name,
inputs=inputs,
outputs=outputs,
client_timeout=None
)
results = results.as_numpy(output_tuple[0])
results = self.postprocess_image(results)
predict.append(results)
return predict
- File config.txt file when I run on deepstream
################################################################################
# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
# Following properties are mandatory when engine files are not specified:
# int8-calib-file(Only in INT8)
# Caffemodel mandatory properties: model-file, proto-file, output-blob-names
# UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
# ONNX: onnx-file
#
# Mandatory properties for detectors:
# num-detected-classes
#
# Optional properties for detectors:
# cluster-mode(Default=Group Rectangles), interval(Primary mode only, Default=0)
# custom-lib-path,
# parse-bbox-func-name
#
# Mandatory properties for classifiers:
# classifier-threshold, is-classifier, classifier-type
#
# Optional properties for classifiers:
# classifier-async-mode(Secondary mode only, Default=false)
#
# Optional properties in secondary mode:
# operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
# input-object-min-width, input-object-min-height, input-object-max-width,
# input-object-max-height
#
# Following properties are always recommended:
# batch-size(Default=1)
#
# Other optional properties:
# net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
# model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
# mean-file, gie-unique-id(Default=0), offsets, process-mode (Default=1 i.e. primary),
# custom-lib-path, network-mode(Default=0 i.e FP32)
#
# The values in the config file are overridden by values set through GObject
# properties.
[property]
gpu-id=0
net-scale-factor=0.00392156862745098
offsets=0.0;0.0;0.0
input-dims=3;256;256;0
onnx-file=/deepstream/weights/secondary/car_brand_convnext_v9/convnext_base_w_checkpoint_448001.onnx
model-engine-file=/deepstream/weights/secondary/car_brand_convnext_v9/convnext_base_w_checkpoint_448001.onnx_b8_gpu0_fp16.engine
labelfile-path=/deepstream/weights/secondary/car_brand_convnext_v9/classes.txt
batch-size=8
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
process-mode=2
is-classifier=1
uff-input-blob-name=input
output-blob-names=output
#classifier-async-mode=0
#classifier-threshold=0.7
#input-object-min-width=32
#input-object-min-height=32
gie-unique-id=4
operate-on-gie-id=1
operate-on-class-ids=3
maintain-aspect-ratio=0
symmetric-padding=0
classifier-async-mode=1
classifier-threshold=0.7
secondary-reinfer-interval=10
Can someone tell me what step I am going wrong?