Hello,

Thanks for the response. I managed to find out what was causing the #15 error. Apologies, it was not clear to me that I should have read the .uff file as a binary before passing it into the tensorrt engine . I did that and the standard TensorRT started working. I was trying to compare the layers as you asked and I got a bit confused. I originally thought that you wanted me to compare the weights between TensorRT and Tensorflow at each layer until I noticed a difference between them. However when I do the following:

```
...
parser.register_output("layer1")
...
context.enqueue(1, bindings, stream.handle, None)
cuda.memcpy_dtoh_async(h_layer1, d_layer1, stream)
# Compare result between TensorRT and TensorFlow here
```

No matter what I change the layer name to, the weights are the same in TensorRT. Is that the expected result? I wrote the comparison code for the provided Lenet model in TensorRT3 and this is what happens as well. My code is below:

```
import tensorrt as trt
import pycuda.driver as cuda
from tensorrt.parsers import uffparser
from PIL import Image
import numpy as np
import tensorflow as tf
FLAGS = tf.flags.FLAGS
def isclose(a, b, rel_tol=1e-05, abs_tol=0.00003):
return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
def compare_arrays(array1,array2):
if(len(array1)!=len(array2)):
return False
for i in range(len(array1)):
status=isclose(array1[i],array2[i])
return status
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)
return graph
def normalize(data):
#each image is provided as a 3D numpy array (like how it’s provided to inference function)
array_holder = np.arange(784).reshape(1, 28, 28)
array_holder = array_holder.astype(np.float32)
for i in range(len(data)): # normalize
holder1 = data[i] / 255.0
array_holder[i] = 1.0 - holder1
return array_holder
# frozen_graph=open("/usr/local/TensorRT-3.0.1/data/mnist/lenet5_mnist_frozen.pb",'rb').read()
uff_model=open("/raid/nri/Classification_task/Exported_uff_files/new_lenet.uff",'rb').read()
# frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
# uff_model=uff.from_tensorflow(frozen_graph,output_filename="/raid/nri/Classification_task/TensorRt_text_files/lenet_uff",output_nodes=["out"],text=True)
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
parser = uffparser.create_uff_parser()
parser.register_input("in", (1,28,28),0)
parser.register_output("wc2")
engine = trt.utils.uff_to_trt_engine(G_LOGGER,uff_model,parser,1,1 << 20)
#host_mem = parser.hidden_plugin_memory()
parser.destroy()
output_layer = 'wc2'
# Load frozen model (TF)
graph = load_graph("/usr/local/TensorRT-3.0.1/data/mnist/lenet5_mnist_frozen.pb")
input_name = "import/" + "in"
output_name = "import/" + output_layer
input_operation = graph.get_operation_by_name(input_name)
output_operation = graph.get_operation_by_name(output_name)
def main(_):
with tf.Session(graph=graph) as sess:
for c in range(10):
im = Image.open("/usr/local/TensorRT-3.0.1/data/mnist/" + str(c) + ".pgm")
im_n=normalize(np.array(im).reshape(1,28,28))
normalized_im_o = im_n.reshape(28, 28, 1)
arr = tf.expand_dims(normalized_im_o, [0])
arr_final = arr.eval()
results_tf = sess.run(output_operation.outputs[0], {input_operation.outputs[0]: arr_final})
results = np.squeeze(results_tf)
runtime = trt.infer.create_infer_runtime(G_LOGGER)
context = engine.create_execution_context()
output = np.empty(10, dtype=np.float32) # allocate device memory
d_input = cuda.mem_alloc(1 * im_n.size * im_n.dtype.itemsize)
d_output = cuda.mem_alloc(1 * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# transfer input data to device
cuda.memcpy_htod_async(d_input, im_n, stream)
# execute model
context.enqueue(1, bindings, stream.handle, None)
# transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# synchronize threads
stream.synchronize()
# print("Test Case: " + str(label))
# print("Prediction: " + str(np.argmax(output)))
print("Tensorflow "+str(results))
print("TensorRT "+str(output))
print(str(compare_arrays(results,output)))
context.destroy()
engine.destroy()
runtime.destroy()
if __name__ == '__main__':
tf.app.run()
```

“output” is always the same in TensorRT, no matter what layer name is used and always corresponds to the result at the final “output_node” of the Tensorflow model . I am not sure if that is correct or not. I chose the layer names based on the .pbtxt file i get from:

```
uff_model=uff.from_tensorflow(frozen_graph,output_filename=uff_model,output_nodes=["out"],text=True)
```

Please let me know if the weights in the .uff files are meant to be the same for most layers. If not can you tell me how to get the layer names that will give me appropriate weights?

Thanks as always