trt engine inference in python without deepstream

How to do inference of trt engine trained using TLTK in python without deepstream?

Let’s say I have trained DetectNet_v2 + ResNet50 using TLTK.

Thanks

Hi rohit167,
What do you mean by “TLTK”?

TLTK (Transfer Learning Toolkit)

Reference: https://devtalk.nvidia.com/default/topic/1068207/transfer-learning-toolkit/deploy-engine-file-in-python/post/5410489/#5410489

Is it possible to convert the trained model to trt engine with TLT toolkit or need deepstream sdk for this step?

For this step, tlt provides tlt-converter tool. It can convert the etlt model to trt engine.
DS is not needed.

Is it possible to run generated engine file of TLT on jetson nano using the below codes?

class Engine(object):
    def _load_plugins(self):
        if trt.__version__[0] < '7':
            ctypes.CDLL("ssd/libflattenconcat.so")
        trt.init_libnvinfer_plugins(self.trt_logger, '')
    def _load_engine(self):
        TRTbin = 'ssd/TRT_%s.bin' % self.model
        with open(TRTbin, 'rb') as f, trt.Runtime(self.trt_logger) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    def _create_context(self):
        for binding in self.engine:
            size = trt.volume(self.engine.get_binding_shape(binding)) * \
                   self.engine.max_batch_size
            host_mem = cuda.pagelocked_empty(size, np.float32)
            cuda_mem = cuda.mem_alloc(host_mem.nbytes)
            self.bindings.append(int(cuda_mem))
            if self.engine.binding_is_input(binding):
                self.host_inputs.append(host_mem)
                self.cuda_inputs.append(cuda_mem)
            else:
                self.host_outputs.append(host_mem)
                self.cuda_outputs.append(cuda_mem)
        return self.engine.create_execution_context()
    def __init__(self, model, input_shape, output_layout=7):
        """Initialize TensorRT plugins, engine and conetxt."""
        self.model = model
        self.input_shape = input_shape
        self.output_layout = output_layout
        self.trt_logger = trt.Logger(trt.Logger.INFO)
        self._load_plugins()
        self.engine = self._load_engine()
        self.host_inputs = []
        self.cuda_inputs = []
        self.host_outputs = []
        self.cuda_outputs = []
        self.bindings = []
        self.stream = cuda.Stream()
        self.context = self._create_context()
    def __del__(self):
        """Free CUDA memories."""
        del self.stream
        del self.cuda_outputs
        del self.cuda_inputs
    def detect(self, img, conf_th=0.3):
        """Detect objects in the input image."""
        img_resized = _preprocess_trt(img, self.input_shape)
        np.copyto(self.host_inputs[0], img_resized.ravel())
        cuda.memcpy_htod_async(
            self.cuda_inputs[0], self.host_inputs[0], self.stream)
        self.context.execute_async(
            batch_size=1,
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        cuda.memcpy_dtoh_async(
            self.host_outputs[1], self.cuda_outputs[1], self.stream)
        cuda.memcpy_dtoh_async(
            self.host_outputs[0], self.cuda_outputs[0], self.stream)
        self.stream.synchronize()
        output = self.host_outputs[0]
        return _postprocess_trt(img, output, conf_th, self.output_layout)
def _preprocess_tf(img, shape=(300, 300)):
    """Preprocess an image before TensorFlow SSD inferencing."""
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, shape)
    return img


def _postprocess_tf(img, boxes, scores, classes, conf_th):
    """Postprocess TensorFlow SSD output."""
    h, w, _ = img.shape
    out_boxes = boxes[0] * np.array([h, w, h, w])
    out_boxes = out_boxes.astype(np.int32)
    out_boxes = out_boxes[:, [1, 0, 3, 2]]  
    out_confs = scores[0]
    out_clss = classes[0].astype(np.int32)
    mask = np.where(out_confs >= conf_th)
    return out_boxes[mask], out_confs[mask], out_clss[mask]
1 Like

Sorry, please check it by yourself.

Hello @LoveNvidia, can you share your final code? or point to an example code? I am struggling to do python based post_processing of my faster_rcnn inference using the engine file on Jetson Xavier.
I trained the model using TLT, then converted the .etlt file using tlt-converter. Thanks.