Hi I have object detection problem and want to optimise the model using TensorRT 4.0 i.e convert frozen graph into trt_engine
Code
import keras
import keras.backend as K
import tensorflow as tf
import uff
output_names = [“detection_classes”]
frozen_graph_filename = ‘mars-small128.pb’
sess = K.get_session()
freeze graph and remove training nodes
graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_names)
graph_def = tf.graph_util.remove_training_nodes(graph_def)
write frozen graph to file
with open(frozen_graph_filename, ‘wb’) as f:
f.write(graph_def.SerializeToString())
f.close()
# convert frozen graph to uff
uff_model = uff.from_tensorflow_frozen_model(frozen_graph_filename, output_names)
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
parser = uffparser.create_uff_parser()
parser.register_input("imagess", (128, 64, 3), 0)
parser.register_output("features")
engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, 1, 1 << 20)
parser.destroy()
runtime = trt.infer.create_infer_runtime(G_LOGGER)
context = engine.create_execution_context()
output = np.empty(10, dtype = np.float32)
# Alocate device memory
d_input = cuda.mem_alloc(1 * img.nbytes)
d_output = cuda.mem_alloc(1 * output.nbytes)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# Transfer input data to device
cuda.memcpy_htod_async(d_input, img, stream)
# Execute model
context.enqueue(1, bindings, stream.handle, None)
# Transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# Syncronize threads
stream.synchronize()
print("Test Case: " + str(label))
print ("Prediction: " + str(np.argmax(output)))
trt.utils.write_engine_to_file("./tf_mnist.engine", engine.serialize())
Error
AssertionError Traceback (most recent call last)
in ()
9
10 # freeze graph and remove training nodes
—> 11 graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_names)
12 graph_def = tf.graph_util.remove_training_nodes(graph_def)
13
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/graph_util_impl.py in convert_variables_to_constants(sess, input_graph_def, output_node_names, variable_names_whitelist, variable_names_blacklist)
230 # This graph only includes the nodes needed to evaluate the output nodes, and
231 # removes unneeded nodes like those involved in saving and assignment.
→ 232 inference_graph = extract_sub_graph(input_graph_def, output_node_names)
233
234 found_variables = {}
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/graph_util_impl.py in extract_sub_graph(graph_def, dest_nodes)
172 name_to_input_name, name_to_node, name_to_seq_num = _extract_graph_summary(
173 graph_def)
→ 174 _assert_nodes_are_present(name_to_node, dest_nodes)
175
176 nodes_to_keep = _bfs_for_reachable_nodes(dest_nodes, name_to_input_name)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/graph_util_impl.py in _assert_nodes_are_present(name_to_node, nodes)
131 “”“Assert that nodes are present in the graph.”“”
132 for d in nodes:
→ 133 assert d in name_to_node, “%s is not in graph” % d
134
135
AssertionError: detection_classes is not in graph
but in my graph these node exist which are given below and these nodes are present in google pretrained object detection model and I try all nodes genrate same error
" ‘num_detections’, ‘detection_boxes’, ‘detection_scores’,\n",
" ‘detection_classes’, ‘detection_masks’\n",
you can see my frozen graph
And the file from which I created frozen graph
system information
Linux:16
Cuda:9.0
driver 390.87
cudnn 7.1
tensorRt:4
Python 3.5
Gpu:Gtx 1080
Tensorflow:1.7.1
Thanks