Provide details on the platforms you are using:
Linux distro and version: Ubuntu 16.04
GPU type GeForce GTX 1080 Ti
nvidia driver version Cuda compilation tools, release 9.0, V9.0.176
CUDA version
CUDNN version
Python version [if using python] python 3.5
Tensorflow version tf 1.12
TensorRT version trt5.0.2.6
If Jetson, OS, hw versions
Describe the problem
I’ve succeed to create an LSTM using tensorflow (see RNN method below).
Then, used uff converter on the frozen model to generate .uff file.
Finaly tried to create an inference engine in order to use it through tensorrt.
(This has been made on my desktop but the aim is to prove that this is feasible in JetsonXavier).
I don’t understand from where ExpandDims error comes from as I don’t use it in my network creation, maybe an option to be set in tf.nn.rnn_cell.LSTMCell ?
Thanks in advance for your help :)
The creation of the engine failed with the following error code:
python3 test.py
[TensorRT] ERROR: UFFParser: Validator error: rnn/LSTMCellZeroState/ExpandDims_2: Unsupported operation _ExpandDims
[TensorRT] ERROR: Network must have at least one output
Traceback (most recent call last):
File "test.py", line 28, in <module>
with build_engine_uff(uff_file) as engine:
AttributeError: __exit__
zsh: exit 1 python3 test.py
python3 test.py 2,73s user 3,03s system 250% cpu 2,300 total
LSTM Creation code:
def RNN(x):
w = {
'hidden': tf.Variable(tf.random_normal([N_FEATURES, N_HIDDEN_UNITS])),
'output': tf.Variable(tf.random_normal([N_HIDDEN_UNITS, N_CLASSES]))
}
biases = {
'hidden': tf.Variable(tf.random_normal([N_HIDDEN_UNITS], mean=1.0)),
'output': tf.Variable(tf.random_normal([N_CLASSES]))
}
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [-1, N_FEATURES])
# Generate a n_input-element sequence of inputs
# (eg. [had] [a] [general] -> [20] [6] [33])
x = tf.nn.relu(tf.matmul(x, w['hidden']) + biases['hidden'])
x = tf.split(x, N_TIME_STEPS, 0)
# 1-layer LSTM with n_hidden units.
rnn_cell = tf.nn.rnn_cell.LSTMCell(N_HIDDEN_UNITS, name='basic_lstm_cell', forget_bias=0, state_is_tuple=True)
# generate prediction
outputs, states = tf.contrib.rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
# there are n_input outputs but
# we only want the last output
return tf.matmul(outputs[-1], w['output']) + biases['output']
pb to uff file conversion:
python3 /usr/lib/python3.5/dist-packages/uff/bin/convert_to_uff.py tensorflow -o /path/to/graph.uff --input-file /path/to/frozen_graph.pb -O prediction
Inference engine creation code:
def build_engine_uff(model_file):
# You can set the logger severity higher to suppress messages (or lower to display more messages).
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
# Workspace size is the maximum amount of memory available to the builder while building an engine.
# It should generally be set as high as possible.
builder.max_workspace_size = common.GiB(1)
# We need to manually register the input and output nodes for UFF.
parser.register_input("input", (200,9))
parser.register_output("y_")
# Load the UFF model and parse it in order to populate the TensorRT network.
parser.parse(model_file, network)
# Build and return an engine.
return builder.build_cuda_engine(network)
uff_file = "/media/leonard/Data1/Elter_Projects/Malin/dev/Exported_Model/graph.uff"
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
#uff_file = uff.from_tensorflow_frozen_model("/media/leonard/Data1/Elter_Projects/Malin/frozen_graph.pb", output_nodes=['prediction'])
with build_engine_uff(uff_file) as engine:
# Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
# Allocate buffers and create a CUDA stream.
with open("sample.engine", "wb") as f:
f.write(engine.serialize())
Just in case it has an incidence, here is my freezing method:
def freeze_graph(meta_model_dir, output_node_name, pb_model_dir):
# We retrieve our checkpoint fullpath
try:
checkpoint = tf.train.get_checkpoint_state(meta_model_dir)
input_checkpoint = checkpoint.model_checkpoint_path
print("[INFO] input_checkpoint:", input_checkpoint)
except:
input_checkpoint = meta_model_dir
print("[INFO] Model folder", meta_model_dir)
# We clear devices to allow TensorFlow to control on which device it will load operations
clear_devices = True
# We import the meta graph and retrieve a Saver
saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
# We retrieve the protobuf graph definition
graph = tf.get_default_graph()
input_graph_def = graph.as_graph_def()
# We start a session and restore the graph weights
with tf.Session() as sess:
tf.global_variables_initializer()
saver.restore(sess, input_checkpoint)
# We use a built-in TF helper to export variables to constants
output_graph_def = graph_util.convert_variables_to_constants(
sess, # The session is used to retrieve the weights
input_graph_def, # The graph_def is used to retrieve the nodes
output_node_name.split(",") # The output node names are used to select the usefull nodes
)
# Finally we serialize and dump the output graph to the filesystem
with tf.gfile.GFile(pb_model_dir, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." % len(output_graph_def.node))
print("[INFO] output_graph:", pb_model_dir)
print("[INFO] all done")