import os
import tensorrt as trt
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import cv2
import matplotlib.pyplot as plt
from PIL import Image

TRT_ENGINE_PATH = "peoplenet.engine"


###########################
### AUXILIARY FUNCTIONS ###
###########################

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem, binding_name, shape):
        self.host = host_mem
        self.device = device_mem
        self.name = binding_name
        self.shape = shape

    def __str__(self):
        return 'Name:\n'+str(self.name)+'\nShape:\n'+str(self.shape)+'\nHost:\n'+str(self.host)+'\nDevice:\n'+str(self.device)

    def __repr__(self):
        return self.__str__()


def allocate_buffers(engine, context):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        idx = engine.get_binding_index(binding)
        shape = context.get_binding_shape(idx)
        size = trt.volume(shape)
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        #print(binding, idx, shape, size, dtype)
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        bindings.append(int(device_mem))
        if engine.binding_is_input(idx):
            inputs.append(HostDeviceMem(host_mem, device_mem, binding, shape))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem, binding, shape))
    return inputs, outputs, bindings, stream

"""    
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]
"""
0

def do_inference_v2(context, bindings, inputs, outputs, stream):
    [cuda.memcpy_htod_async(i.device, i.host, stream) for i in inputs]
    context.execute_async(batch_size=1, bindings=bindings, stream_handle=stream.handle)
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    stream.synchronize()
    return [out.host for out in outputs]


def analyze_prediction(detection_out, pred_start_idx, img_pil):
        image_id = int(fetch_prediction_field("image_id", detection_out, pred_start_idx))
        label = int(fetch_prediction_field("label", detection_out, pred_start_idx))
        confidence = fetch_prediction_field("confidence", detection_out, pred_start_idx)
        xmin = fetch_prediction_field("xmin", detection_out, pred_start_idx)
        ymin = fetch_prediction_field("ymin", detection_out, pred_start_idx)
        xmax = fetch_prediction_field("xmax", detection_out, pred_start_idx)
        ymax = fetch_prediction_field("ymax", detection_out, pred_start_idx)
        if confidence > VISUALIZATION_THRESHOLD:
                class_name = COCO_LABELS[label]
                confidence_percentage = "{0:.0%}".format(confidence)
                print("Detected {} with confidence {}".format(class_name, confidence_percentage))
                boxes_utils.draw_bounding_boxes_on_image(img_pil, np.array([[ymin, xmin, ymax, xmax]]),
                        display_str_list=["{}: {}".format(class_name, confidence_percentage)],
                        color=coco_utils.COCO_COLORS[label])

############
### MAIN ###
############

TRT_LOGGER  = trt.Logger(trt.Logger.INFO)
TRT_PREDICTION_LAYOUT = {
    "image_id": 0,
    "label": 1,
    "confidence": 2,
    "xmin": 3,
    "ymin": 4,
    "xmax": 5,
    "ymax": 6
}

trt.init_libnvinfer_plugins(TRT_LOGGER, '')
trt_runtime = trt.Runtime(TRT_LOGGER) #initialize runtime needed for loading TensorRT engine from file

assert os.path.exists(TRT_ENGINE_PATH)

with open(TRT_ENGINE_PATH, 'rb') as f:
        engine_data = f.read()
trt_engine = trt_runtime.deserialize_cuda_engine(engine_data)
context = trt_engine.create_execution_context()
inputs, outputs, bindings, stream = allocate_buffers(trt_engine, context)


numpy_array = np.zeros((1, 3, 540, 480))

image = cv2.imread("image.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (640, 480))
img   = image
image = ((image/127.5)-1.0).transpose([2,0,1])

np.copyto(inputs[0].host, image.ravel())