Tensor RT output tensor parser

In my project, I use a trained resnet18 network. Via transfer learning toolkit.
But I need to use it without using deepstream.

How do I convert the output tensor (host_outpurs) to format a box [x_min, x_max, y_min, y_max] and class ID.

My code:

import cv2
import time
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import sys

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
runtime = trt.Runtime(TRT_LOGGER)

host_inputs  = []
cuda_inputs  = []
host_outputs = []
cuda_outputs = []
bindings = []

# create engine
with open('/opt/nvidia/deepstream/deepstream-5.0/samples/models/detectnet_v2/resnet18.engine', 'rb') as f:
    buf = f.read()
    engine = runtime.deserialize_cuda_engine(buf)

stream = cuda.Stream()

for binding in engine:
    size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
    host_mem = cuda.pagelocked_empty(size, np.float32)
    cuda_mem = cuda.mem_alloc(host_mem.nbytes)

    if engine.binding_is_input(binding):
context = engine.create_execution_context()

# inference
ori = cv2.imread(sys.argv[1])
image = cv2.cvtColor(ori, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (512, 512))
image = (2.0/255.0) * image - 1.0
image = image.transpose((2, 0, 1))
np.copyto(host_inputs[0], image.ravel())

start_time = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
print("execute times "+str(time.time()-start_time))

