How to do inference of trt engine trained using TLTK in python without deepstream?
Let’s say I have trained DetectNet_v2 + ResNet50 using TLTK.
Thanks
How to do inference of trt engine trained using TLTK in python without deepstream?
Let’s say I have trained DetectNet_v2 + ResNet50 using TLTK.
Thanks
Hi rohit167,
What do you mean by “TLTK”?
TLTK (Transfer Learning Toolkit)
Is it possible to convert the trained model to trt engine with TLT toolkit or need deepstream sdk for this step?
For this step, tlt provides tlt-converter tool. It can convert the etlt model to trt engine.
DS is not needed.
Is it possible to run generated engine file of TLT on jetson nano using the below codes?
class Engine(object):
def _load_plugins(self):
if trt.__version__[0] < '7':
ctypes.CDLL("ssd/libflattenconcat.so")
trt.init_libnvinfer_plugins(self.trt_logger, '')
def _load_engine(self):
TRTbin = 'ssd/TRT_%s.bin' % self.model
with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def _create_context(self):
for binding in self.engine:
size = trt.volume(self.engine.get_binding_shape(binding)) * \
self.engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
self.bindings.append(int(cuda_mem))
if self.engine.binding_is_input(binding):
self.host_inputs.append(host_mem)
self.cuda_inputs.append(cuda_mem)
else:
self.host_outputs.append(host_mem)
self.cuda_outputs.append(cuda_mem)
return self.engine.create_execution_context()
def __init__(self, model, input_shape, output_layout=7):
"""Initialize TensorRT plugins, engine and conetxt."""
self.model = model
self.input_shape = input_shape
self.output_layout = output_layout
self.trt_logger = trt.Logger(trt.Logger.INFO)
self._load_plugins()
self.engine = self._load_engine()
self.host_inputs = []
self.cuda_inputs = []
self.host_outputs = []
self.cuda_outputs = []
self.bindings = []
self.stream = cuda.Stream()
self.context = self._create_context()
def __del__(self):
"""Free CUDA memories."""
del self.stream
del self.cuda_outputs
del self.cuda_inputs
def detect(self, img, conf_th=0.3):
"""Detect objects in the input image."""
img_resized = _preprocess_trt(img, self.input_shape)
np.copyto(self.host_inputs[0], img_resized.ravel())
cuda.memcpy_htod_async(
self.cuda_inputs[0], self.host_inputs[0], self.stream)
self.context.execute_async(
batch_size=1,
bindings=self.bindings,
stream_handle=self.stream.handle)
cuda.memcpy_dtoh_async(
self.host_outputs[1], self.cuda_outputs[1], self.stream)
cuda.memcpy_dtoh_async(
self.host_outputs[0], self.cuda_outputs[0], self.stream)
self.stream.synchronize()
output = self.host_outputs[0]
return _postprocess_trt(img, output, conf_th, self.output_layout)
def _preprocess_tf(img, shape=(300, 300)):
"""Preprocess an image before TensorFlow SSD inferencing."""
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, shape)
return img
def _postprocess_tf(img, boxes, scores, classes, conf_th):
"""Postprocess TensorFlow SSD output."""
h, w, _ = img.shape
out_boxes = boxes[0] * np.array([h, w, h, w])
out_boxes = out_boxes.astype(np.int32)
out_boxes = out_boxes[:, [1, 0, 3, 2]]
out_confs = scores[0]
out_clss = classes[0].astype(np.int32)
mask = np.where(out_confs >= conf_th)
return out_boxes[mask], out_confs[mask], out_clss[mask]
Sorry, please check it by yourself.
Hello @LoveNvidia, can you share your final code? or point to an example code? I am struggling to do python based post_processing of my faster_rcnn inference using the engine file on Jetson Xavier.
I trained the model using TLT
, then converted the .etlt file using tlt-converter
. Thanks.