@Morganh
I have followed the TLT multi-task classification tutorial and developed a model using opensource dataset. I am able to load the model, but when I tried to do the inference I am not getting any output from the model. Apart from that I am getting below mentioned TRTERROR. :
[TensorRT] ERROR: Parameter check failed at: engine.cpp::enqueue::451, condition: bindings != nullptr
root@ahamad:/workspace/analytics/jothi# python class_clf_trtmodel.py
Using TensorFlow backend.
2022-04-05 17:15:02.328958: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
model dimensions: (14400,)
[TensorRT] ERROR: Parameter check failed at: engine.cpp::enqueue::451, condition: bindings[x] != nullptr
h_output, h_input [0. 0. 0. 0.] [151.061 151.061 151.061 ... 131.32 131.32 131.32 ]
Traceback (most recent call last):
File "class_clf_trtmodel.py", line 151, in <module>
output = classifier.predict(image)
File "class_clf_trtmodel.py", line 97, in predict
pred = self.class_mapping[np.argmax(h_output)]
KeyError: 0
Here I am sharing the script and the model file for your reference.
import os
import time
import cv2
# import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
from keras.applications.imagenet_utils import preprocess_input
import logging
lg = logging.getLogger(__name__)
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
class TLTClfModel(object):
def __init__(self,
engine_path,
model_w=80,
model_h=60,
classid_map=None
):
self.trt_engine_path = engine_path
if "132" in engine_path:
model_w = 132
model_h = 132
self.model_w=model_w
self.model_h=model_h
self.labels = classid_map
self.output_name_0 = "base_color/Softmax"
self.output_name_1 = "category/Softmax"
self.output_name_2 = "season/Softmax"
self.task_name = ["base_color", "category", "season"]
self.class_mapping = {"base_color": {"0": "Black", "1": "Blue", "2": "Brown", "3": "Green", \
"4": "Grey", "5": "Navy Blue", "6": "Pink", "7": "Purple", "8": "Red", \
"9": "Silver", "10": "White"},
"category": {"0": "Bags", "1": "Bottomwear", "2": "Eyewear", "3": "Fragrance", \
"4": "Innerwear", "5": "Jewellery", "6": "Sandal", "7": "Shoes", "8": "Topwear", \
"9": "Watches"},
"season": {"0": "Fall", "1": "Spring", "2": "Summer", "3": "Winter"}}
self.trt_runtime = trt.Runtime(TRT_LOGGER)
self.trt_engine = self.load_engine(self.trt_runtime, self.trt_engine_path)
self.inputs, self.dinputs, self.outputs, self.doutputs, self.stream = self.allocate_buffers(self.trt_engine)
self.context = self.trt_engine.create_execution_context()
self.cuda_ctx = cuda.Context.attach()
@staticmethod
def load_engine(trt_runtime, engine_path):
with open(engine_path, "rb") as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
@staticmethod
def allocate_buffers(engine):
# Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(trt.float32))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(trt.float32))
# Allocate device memory for inputs and outputs.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
def predict(self, image, confidence_thresh=0.5):
self.load_image_to_pagelocked_memory(image,
self.inputs,
self.model_w,
self.model_h)
h_output, h_input = self.do_inference(self.context,
self.inputs,
self.dinputs,
self.outputs,
self.doutputs,
self.stream)
print("h_output, h_input", h_output, h_input)
confidence = np.max(h_output)
pred = self.class_mapping[np.argmax(h_output)]
lg.info("prediction result: %s, %0.2f"%(h_output, confidence))
#if confidence > confidence_thresh:
return pred, confidence
#else:
# return "unclear", confidence
@staticmethod
def load_image_to_pagelocked_memory(test_image, pagelocked_buffer, w, h):
# Converts the input image to a CHW Numpy array
def normalize_image(image, w, h):
# Resize, antialias and transpose the image to CHW.
# return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
lg.info("model dimensions: %d, %d"%(w, h))
normalized_image = preprocess_input(
cv2.resize(image, (w, h))
.transpose([2, 0, 1])
.astype(trt.nptype(trt.float32)),
mode='caffe', data_format='channels_first').ravel()
print("model dimensions:", normalized_image.shape)
return normalized_image
# Normalize the image and copy to pagelocked memory.
np.copyto(pagelocked_buffer, normalize_image(test_image[:,:,::-1], w, h))
return test_image
@staticmethod
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# Transfer input data to the GPU.
cuda.memcpy_htod_async(d_input, h_input, stream)
# Run inference.
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# Transfer predictions back from the GPU.
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# Synchronize the stream
stream.synchronize()
return h_output, h_input
def __del__(self):
self.cuda_ctx.pop()
# self.cuda_ctx.detach()
if __name__ == '__main__':
classifier = TLTClfModel(
engine_path="./assets/multitask_classifier/mcls_export.etlt.engine",
model_w=60,
model_h=80
)
image = cv2.imread('10000.jpg')
# rects = pp.start(image)
image = cv2.resize(image, (80, 60))
output = classifier.predict(image)
class_clf_trtmodel.py (6.0 KB)
mcls_export.etlt (1.8 MB)
etlt_to_engine_mltsk.sh (535 Bytes)