import tensorrt as trt import cv2 import numpy as np import pycuda.driver as cuda import pycuda.autoinit import argparse ## ## tlt-converter -k tlt_encode -o output_bbox/BiasAdd,output_cov/Sigmoid ## -d 3,554,960 -c resnet34_peoplenet_pruned.etlt ## -e peoplenet.engine ## class TrtDetectNet(): def __init__(self, model, input_shape, cuda_ctx=None): self.model = model self.input_shape = input_shape self.cuda_ctx = cuda_ctx if self.cuda_ctx: self.cuda_ctx.push() self.logger = trt.Logger(trt.Logger.INFO) self._load_plugins() self.engine = self._load_engine() print(self.engine.name) if self.engine: try: self.context = self.engine.create_execution_context() self.stream = cuda.Stream() self.cpu_in, self.cpu_out, self.gpu_in, self.gpu_out, self.bindings = self._allocate_buffers() except Exception as e: print('Error occured ', e) finally: if self.cuda_ctx: self.cuda_ctx.pop() else: print("No engine") def __del__(self): del self.gpu_in del self.stream del self.gpu_out def _load_plugins(self): if (trt.__version__[0]) < '7': # XXX ctypes.CDLL("libflattenconcat.so") trt.init_libnvinfer_plugins(self.logger, '') def _load_engine(self): TRT_engine = self.model with open(TRT_engine, 'rb') as f, trt.Runtime(self.logger) as runtime: print("loading engine from []", TRT_engine) return runtime.deserialize_cuda_engine(f.read()) def _allocate_buffers(self): cpu_in, cpu_out, gpu_in, gpu_out, bindings = \ [], [], [], [], [] for binding in self.engine: size = trt.volume(self.engine.get_binding_shape(binding)) * \ self.engine.max_batch_size cpu_mem = cuda.pagelocked_empty(size, np.float32) gpu_mem = cuda.mem_alloc(cpu_mem.nbytes) bindings.append(int(gpu_mem)) if self.engine.binding_is_input(binding): cpu_in.append(cpu_mem) gpu_in.append(gpu_mem) else: cpu_out.append(cpu_mem) gpu_out.append(gpu_mem) return cpu_in, cpu_out, gpu_in, gpu_out, bindings def _preprocess(self, image, shape=(554, 960)): image = cv2.resize(image, shape) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.transpose((2, 0, 1)).astype(np.float32) image *= (2.0/255.0) image -= 1.0 return image def _postprocess(self, image, output, conf_thresh=0.5, output_layout=7): height, width, channel = image.shape bboxes, confs, classes = [], [], [] for idx in range(0, len(output), output_layout): conf = float(output[idx+2]) if conf < conf_thresh: continue # get bounding box in pixel coordinates x1 = int(output[idx+3] * width) y1 = int(output[idx+4] * height) x2 = int(output[idx+5] * width) y2 = int(output[idx+6] * height) # get detection class objClass = int(output[idx+1]) bboxes.append((x1, y1, x2, y2)) classes.append(objClass) confs.append(conf) print("Class: ", objClass) print("Conf: ", conf) print("BBox: ", x1, y1, x2, y2) return bboxes, confs, classes def runInference(self, image, conf_thresh=0.5): data = self._preprocess(image) np.copyto(self.cpu_in[0], data.ravel()) if self.cuda_ctx: self.cuda_ctx.push() cuda.memcpy_htod_async(self.gpu_in[0], self.cpu_in[0], self.stream) self.context.execute_async(batch_size=1, bindings=self.bindings, stream_handle=self.stream.handle) cuda.memcpy_dtoh_async(self.cpu_out[1], self.gpu_out[1], self.stream) cuda.memcpy_dtoh_async(self.cpu_out[0], self.gpu_out[0], self.stream) self.stream.synchronize() if self.cuda_ctx: self.cuda_ctx.pop() output = self.cpu_out[0] return self._postprocess(image, output, conf_thresh) def parse_args(): desc = ("Run peoplenet on the given image") parser = argparse.ArgumentParser(description=desc) parser.add_argument('-i', '--input', type=str) parser.add_argument('-o', '--output', type=str) args = parser.parse_args() return args def draw_boundingbox(image, boxes, confs, classes): for bb, cf, cl in zip(boxes, confs, classes): cl = int(cl) x_min, y_min, x_max, y_max = bb[0], bb[1], bb[2], bb[3] print(cl, x_min, y_min, x_max, y_max) cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2) return image def main(): args = parse_args() in_file = args.input out_file = args.output trt_engine = TrtDetectNet('peoplenet.engine', (554, 960)) image = cv2.imread(in_file) boxes, confs, classes = trt_engine.runInference(image) out = draw_boundingbox(image, boxes, confs, classes) cv2.imwrite(out_file, out) if __name__ == '__main__': main()