I’m try run inference network using tensorrt on jetson nano.
But outputs of tensorflow model and of ternsorrt model are very different.
How can i get the correct output?
Tensorflow code:
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.python.platform import gfile
with tf.Graph().as_default():
with gfile.FastGFile("landmarks_5points.pb",'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
sess=tf.Session()
input_x = sess.graph.get_tensor_by_name("input_image_tensor:0")
output = sess.graph.get_tensor_by_name("logits/BiasAdd:0")
graph_nodes=[n for n in graph_def.node]
wts = [n for n in graph_nodes if n.op=='Const']
image_path = "627.jpg"
image = cv2.imread(image_path)
image = cv2.resize(image, (128,128))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = image.astype(np.float32)
input_data = np.empty((1,128,128,3), dtype = np.float32)
#input_data.fill(0)
input_data[0] = image
#save input data
f = open("input_pb.txt", "w")
for i in input_data.ravel():
f.write(str(i) + " ")
f.close()
#perform
np_image, arr_output = sess.run([input_x, output],
feed_dict = {input_x: input_data})
#save output data
arr_output = arr_output[0]
f = open("output_pb.txt", "w")
for i in arr_output.ravel():
f.write(str(i) + " ")
f.close()
Tensorrt code:
import time
import numpy as np
import os
import cv2
import graphsurgeon as gs
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
class ModelTRT:
def __init__(self, plan_filename):
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
self.runtime = trt.Runtime(TRT_LOGGER)
with open(plan_filename, "rb") as f:
self.engine = self.runtime.deserialize_cuda_engine(f.read())
self.inputs = []
self.outputs = []
self.bindings = []
for binding in self.engine:
size = trt.volume(self.engine.get_binding_shape(binding)) * self.engine.max_batch_size
dtype = np.float32
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
self.bindings.append(int(device_mem))
# Append to the appropriate list.
if self.engine.binding_is_input(binding):
self.inputs.append(HostDeviceMem(host_mem, device_mem))
else:
self.outputs.append(HostDeviceMem(host_mem, device_mem))
self.stream = cuda.Stream()
self.context = self.engine.create_execution_context()
def predict(self, img):
np.copyto(self.inputs[0].host, img.ravel())
[cuda.memcpy_htod(inp.device, inp.host) for inp in self.inputs]
self.context.execute(batch_size = img.shape[0],
bindings = self.bindings)
[cuda.memcpy_dtoh(out.host, out.device) for out in self.outputs]
return self.outputs
path = "627.jpg";
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB, 3)
image = cv2.resize(image, (128,128))
models = ModelTRT('landmarks_5points.plan')
input_data = np.empty((1,128,128,3), dtype = np.float32)
#input_data.fill(0)
input_data[0] = image
back_input_data = input_data
for j in np.arange(4):
if (j==1):
input_data = np.transpose(back_input_data, (0, 2, 1, 3))
if (j==2):
input_data = np.transpose(back_input_data, (0, 3, 1, 2))
if (j==3):
input_data = np.transpose(back_input_data, (0, 3, 2, 1))
f = open("input_trt.txt" + "__" + str(j), "w")
for i in input_data.ravel():
f.write(str(i) + " ")
f.write(os.linesep)
f.close()
output = models.predict(input_data)
arr_output= output[0].host;
f = open("output_trt.txt" + "__" + str(j), "w")
for i in arr_output.ravel():
f.write(str(i) + " ")
f.close()
Tensorflow version: 1.13.0-rc0
Tensorrt version: 5.1.6.1
Tensorflow pb file: https://drive.google.com/file/d/1Xmfs4Klgbg-IItamTRMqcoZIy4pXRb-c/view?usp=sharing
Tensorrt plan file: https://drive.google.com/file/d/14xlFH3MZJeGizbI5KCG9VJk7QYAfw_l8/view?usp=sharing
Image file: https://drive.google.com/file/d/1qGx6qNAlJ38dqAUWxyZgWIz6FH2-L4Ql/view?usp=sharing
These examples https://github.com/NVIDIA-AI-IOT/tf_to_trt_image_classification work correctly.