Why after i use tritonserver and when i run deepstream the classification result is different even though using the same weight file?

Environemnt for deepstream
• Hardware Platform: GPU
• DeepStream Version: 6.3
• TensorRT Version: 8.5.3-1+cuda11.8
• NVIDIA GPU Driver Version: 525.89.02

Environment for tritonserver
• Hardware Platform: GPU
• TensorRT Version: 8.5.1-1+cuda11.8
• NVIDIA GPU Driver Version: 525.89.02

  1. File onnx to convert
    This is the onnx file that I used to do inference for both tritonserver and deepstream. I used the onnx to .trt tool to run tritonserver and deepstream will automatically convert the onnx file to .engine file the first time I run deepstream

  2. Command convert onnx → trt

/usr/src/tensorrt/bin/trtexec --onnx=convnext_base_w_checkpoint_448001.onnx \
                                --saveEngine=convnext_base_w_checkpoint_448001_fp16.trt \
                                --explicitBatch \
                                --minShapes=input:1x3x256x256 \
                                --optShapes=input:128x3x256x256 \
                                --maxShapes=input:128x3x256x256 \
                                --verbose \
                                --device=2 \
                                --fp16 \
                                --inputIOFormats=fp16:chw \
                                --outputIOFormats=fp16:chw
  1. Code tritonclient
import numpy as np
import cv2
import os
import requests
import tritonclient.grpc as grpcclient
from PIL import Image
from io import BytesIO
from base_triton_client import TritonBaseClient


class TritonCarBrandClient(TritonBaseClient):

    def __init__(self,
                triton_host="localhost:1000",
                triton_model_name="car_brand_convnext_v9",
                connection="GRPC",
                preprocess_input_size=(256, 256),
                max_batch_size=128,
                subtraction=[0.4815, 0.458, 0.408],
                std=[0.269, 0.261, 0.276],
                classes_name="",
                **kwargs):
        
        super().__init__(triton_host, connection)
        self.triton_model_name = triton_model_name
        self.preprocess_input_size = preprocess_input_size
        self.max_batch_size = max_batch_size
        self.classes_name = classes_name
        self.sub = subtraction
        self.std = std
        self.kwargs = kwargs
        self.labels = self.mapping_label()
        print(self.labels)
        self.model_is_ready()

    def model_is_ready(self):
        """
            check model is ready to run 
        """
        if not self.model.is_model_ready(self.triton_model_name):
            raise ConnectionAbortedError
        
    def mapping_label(self):
        # mapping int -> values
        MAPPING_LABEL = {}
        try:
            with open(self.classes_name, "r") as f:
                str_car_brand = f.readlines()[0]
                list_car_brand = str_car_brand.split(";")[:-1]
                for idx, car_brand in enumerate(list_car_brand):
                    MAPPING_LABEL[idx] = car_brand

        except Exception as err:
            MAPPING_LABEL = {}
        return MAPPING_LABEL

    def preprocess_image(self, images):
        """
            Preprocess image car brand
        """
        total_images = len(images)
        batch_images = []
        for i, image in enumerate(images):
            if isinstance(image, str):
                if os.path.exists(image):
                    image = cv2.imread(image)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                elif image.startswith("http"):
                    response = requests.get(image)
                    if response.status_code != 200:
                        continue
                    else:
                        image_data = BytesIO(response.content)
                        image = Image.open(image_data)
                        image = np.array(image)
            elif isinstance(image, np.ndarray):
                image = image
            else:
                raise ValueError
            height, width, _ = image.shape
            image = cv2.resize(image, self.preprocess_input_size)
            image = image.astype(np.float32)
            image /= 255.0
            image = (image - np.array(self.sub)) / np.array(self.std)
            batch_images.append(image)
        batch_images = np.array(batch_images, dtype=np.float16)
        batch_images = batch_images.transpose((0, 3, 1, 2))
        return batch_images
    
    def postprocess_image(self, output):
        """
            Postprocess image (use output from model to process continue)
        """
        print(np.max(output, axis=1))
        output = list(np.argmax(output, axis=1)) 
        labels = [self.labels.get(item, "Unknown") for item in output]
        return labels 
    
    def inference(self, 
            images,
            meta_inputs = [('input', 'FP16')],
            meta_outputs = [('output', 'FP16')]):
        """
            Predict batch image
        """
        total_images = len(images)
        total_batch = int(total_images/self.max_batch_size) if total_images % self.max_batch_size == 0 else int(total_images/self.max_batch_size) + 1
        predict = []
        for iter_batch in range(total_batch):
            inputs = []
            outputs = []
            lower = iter_batch * self.max_batch_size
            higher = min((iter_batch + 1) * self.max_batch_size, total_images)
            batch_preprocess = self.preprocess_image(images[lower:higher])
            if self.connection == "GRPC":
                for ix, input_tuple in enumerate(meta_inputs):
                    inputs.append(grpcclient.InferInput(input_tuple[0], batch_preprocess.shape, input_tuple[1])) # <name, shape, dtype>
                    inputs[ix].set_data_from_numpy(batch_preprocess)
                for ix, output_tuple in enumerate(meta_outputs):
                    outputs.append(grpcclient.InferRequestedOutput(output_tuple[0]))
            results = self.model.infer(
                model_name=self.triton_model_name,
                inputs=inputs,
                outputs=outputs,
                client_timeout=None
            )

            results = results.as_numpy(output_tuple[0])
            results = self.postprocess_image(results)
            predict.append(results)
        return predict
  1. File config.txt file when I run on deepstream
################################################################################
# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################

# Following properties are mandatory when engine files are not specified:
#   int8-calib-file(Only in INT8)
#   Caffemodel mandatory properties: model-file, proto-file, output-blob-names
#   UFF: uff-file, input-dims, uff-input-blob-name, output-blob-names
#   ONNX: onnx-file
#
# Mandatory properties for detectors:
#   num-detected-classes
#
# Optional properties for detectors:
#   cluster-mode(Default=Group Rectangles), interval(Primary mode only, Default=0)
#   custom-lib-path,
#   parse-bbox-func-name
#
# Mandatory properties for classifiers:
#   classifier-threshold, is-classifier, classifier-type
#
# Optional properties for classifiers:
#   classifier-async-mode(Secondary mode only, Default=false)
#
# Optional properties in secondary mode:
#   operate-on-gie-id(Default=0), operate-on-class-ids(Defaults to all classes),
#   input-object-min-width, input-object-min-height, input-object-max-width,
#   input-object-max-height
#
# Following properties are always recommended:
#   batch-size(Default=1)
#
# Other optional properties:
#   net-scale-factor(Default=1), network-mode(Default=0 i.e FP32),
#   model-color-format(Default=0 i.e. RGB) model-engine-file, labelfile-path,
#   mean-file, gie-unique-id(Default=0), offsets, process-mode (Default=1 i.e. primary),
#   custom-lib-path, network-mode(Default=0 i.e FP32)
#
# The values in the config file are overridden by values set through GObject
# properties.

[property]
gpu-id=0
net-scale-factor=0.00392156862745098
offsets=0.0;0.0;0.0
input-dims=3;256;256;0
onnx-file=/deepstream/weights/secondary/car_brand_convnext_v9/convnext_base_w_checkpoint_448001.onnx
model-engine-file=/deepstream/weights/secondary/car_brand_convnext_v9/convnext_base_w_checkpoint_448001.onnx_b8_gpu0_fp16.engine
labelfile-path=/deepstream/weights/secondary/car_brand_convnext_v9/classes.txt
batch-size=8
model-color-format=0
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=2
process-mode=2
is-classifier=1
uff-input-blob-name=input
output-blob-names=output
#classifier-async-mode=0
#classifier-threshold=0.7
#input-object-min-width=32
#input-object-min-height=32
gie-unique-id=4
operate-on-gie-id=1
operate-on-class-ids=3
maintain-aspect-ratio=0
symmetric-padding=0
classifier-async-mode=1
classifier-threshold=0.7
secondary-reinfer-interval=10

Can someone tell me what step I am going wrong?

  1. what is model used to do? how did you test in DeepStream? what is the whole media pipeline?
  2. please refer to this topic for debugging accuracy Issue.