import os import tensorrt as trt import numpy as np import pycuda.driver as cuda import pycuda.autoinit import cv2 import matplotlib.pyplot as plt from PIL import Image TRT_ENGINE_PATH = "peoplenet.engine" ########################### ### AUXILIARY FUNCTIONS ### ########################### class HostDeviceMem(object): def __init__(self, host_mem, device_mem, binding_name, shape): self.host = host_mem self.device = device_mem self.name = binding_name self.shape = shape def __str__(self): return 'Name:\n'+str(self.name)+'\nShape:\n'+str(self.shape)+'\nHost:\n'+str(self.host)+'\nDevice:\n'+str(self.device) def __repr__(self): return self.__str__() def allocate_buffers(engine, context): inputs = [] outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: idx = engine.get_binding_index(binding) shape = context.get_binding_shape(idx) size = trt.volume(shape) dtype = trt.nptype(engine.get_binding_dtype(binding)) #print(binding, idx, shape, size, dtype) host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) bindings.append(int(device_mem)) if engine.binding_is_input(idx): inputs.append(HostDeviceMem(host_mem, device_mem, binding, shape)) else: outputs.append(HostDeviceMem(host_mem, device_mem, binding, shape)) return inputs, outputs, bindings, stream """ def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): # Transfer input data to the GPU. [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] # Run inference. context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle) # Transfer predictions back from the GPU. [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] # Synchronize the stream stream.synchronize() # Return only the host outputs. return [out.host for out in outputs] """ 0 def do_inference_v2(context, bindings, inputs, outputs, stream): [cuda.memcpy_htod_async(i.device, i.host, stream) for i in inputs] context.execute_async(batch_size=1, bindings=bindings, stream_handle=stream.handle) [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] stream.synchronize() return [out.host for out in outputs] def analyze_prediction(detection_out, pred_start_idx, img_pil): image_id = int(fetch_prediction_field("image_id", detection_out, pred_start_idx)) label = int(fetch_prediction_field("label", detection_out, pred_start_idx)) confidence = fetch_prediction_field("confidence", detection_out, pred_start_idx) xmin = fetch_prediction_field("xmin", detection_out, pred_start_idx) ymin = fetch_prediction_field("ymin", detection_out, pred_start_idx) xmax = fetch_prediction_field("xmax", detection_out, pred_start_idx) ymax = fetch_prediction_field("ymax", detection_out, pred_start_idx) if confidence > VISUALIZATION_THRESHOLD: class_name = COCO_LABELS[label] confidence_percentage = "{0:.0%}".format(confidence) print("Detected {} with confidence {}".format(class_name, confidence_percentage)) boxes_utils.draw_bounding_boxes_on_image(img_pil, np.array([[ymin, xmin, ymax, xmax]]), display_str_list=["{}: {}".format(class_name, confidence_percentage)], color=coco_utils.COCO_COLORS[label]) ############ ### MAIN ### ############ TRT_LOGGER = trt.Logger(trt.Logger.INFO) TRT_PREDICTION_LAYOUT = { "image_id": 0, "label": 1, "confidence": 2, "xmin": 3, "ymin": 4, "xmax": 5, "ymax": 6 } trt.init_libnvinfer_plugins(TRT_LOGGER, '') trt_runtime = trt.Runtime(TRT_LOGGER) #initialize runtime needed for loading TensorRT engine from file assert os.path.exists(TRT_ENGINE_PATH) with open(TRT_ENGINE_PATH, 'rb') as f: engine_data = f.read() trt_engine = trt_runtime.deserialize_cuda_engine(engine_data) context = trt_engine.create_execution_context() inputs, outputs, bindings, stream = allocate_buffers(trt_engine, context) numpy_array = np.zeros((1, 3, 540, 480)) image = cv2.imread("image.jpg") image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.resize(image, (640, 480)) img = image image = ((image/127.5)-1.0).transpose([2,0,1]) np.copyto(inputs[0].host, image.ravel())