I am trying to use TensorRT-4.0.1.6 to perform inference on a fast-rcnn model that I have trained with tensorpack.
I am able to freeze the tensorflow graph and convert it with trt.create_inference_graph and use it to inference, I get the flow error:
2019-01-31 11:57:08.651514: E tensorflow/contrib/tensorrt/log/trt_logger.cc:38] DefaultLogger Parameter check failed at: ../builder/Network.cpp::addInput::364, condition: isValidDims(dims)
2019-01-31 11:57:08.651541: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:13 due to: "Invalid argument: Failed to create Input layer" SKIPPING......( 4 nodes)
Here is the program I am trying to run:
#coding:utf-8
import cv2
import argparse
import tensorflow as tf
import time
import numpy as np
import tensorflow.contrib.tensorrt as trt
from graph_utils import force_nms_cpu as f_force_nms_cpu
from graph_utils import replace_relu6 as f_replace_relu6
from graph_utils import remove_assert as f_remove_assert
def resize_image(img, size=600, max_size=1024):
h, w = img.shape[:2]
scale = size * 1.0 / min(h, w)
if h < w:
newh, neww = size, scale * w
else:
newh, neww = scale * h, size
if max(newh, neww) > max_size:
scale = max_size * 1.0 / max(newh, neww)
newh = newh * scale
neww = neww * scale
neww = int(neww + 0.5)
newh = int(newh + 0.5)
ret = cv2.resize(img, (neww, newh), interpolation=cv2.INTER_LINEAR)
return ret
def inference(graph_path,mode):
"""Run the pruned and frozen inference graph. """
tf_config = tf.ConfigProto(allow_soft_placement=True)
tf_config.gpu_options.allow_growth = True
output_nodes = ['output/boxes', 'output/scores', 'output/labels', 'output/all_probs']
input_nodes = ['image']
frozen_graph = tf.GraphDef()
with tf.gfile.GFile(graph_path, "rb") as f:
frozen_graph.ParseFromString(f.read())
graph = tf.Graph()
if mode in ['FP32','FP16']:
print ('----------tensorRT-----------')
trt_graph = trt.create_inference_graph(
input_graph_def=frozen_graph,
outputs=output_nodes,
max_batch_size=1,
max_workspace_size_bytes=1 << 30,
precision_mode=mode
)
with graph.as_default():
tf.import_graph_def(trt_graph,name='')
else:
with graph.as_default():
tf.import_graph_def(frozen_graph,name='')
tf_input = graph.get_tensor_by_name('image:0')
tf_scores = graph.get_tensor_by_name('output/scores:0')
tf_boxes = graph.get_tensor_by_name('output/boxes:0')
tf_labels = graph.get_tensor_by_name('output/labels:0')
tf_probs = graph.get_tensor_by_name('output/all_probs:0')
with tf.Session(config=tf_config,graph=graph) as sess:
input_file = "./samples/201804191100300605810503418731_18_1524103754819.jpg"
img = cv2.imread(input_file, cv2.IMREAD_COLOR)
resized_img = resize_image(img)
feed_dict = {tf_input: resized_img}
scores, boxes, labels, probs = sess.run(
[tf_scores, tf_boxes, tf_labels, tf_probs], feed_dict=feed_dict)
num_samples = 10
start = time.time()
for i in range(num_samples):
scores, boxes, labels, probs = sess.run(
[tf_scores, tf_boxes, tf_labels, tf_probs], feed_dict=feed_dict)
end = time.time()
print('Average runtime: %f seconds' % (float(end - start)/num_samples))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--precision_mode', type=str, default='native')
parser.add_argument('--model_path', type=str)
pars, unparsed = parser.parse_known_args()
graph_path = str(pars.model_path)
mode = str(pars.precision_mode)
inference(
graph_path,
mode
)
The whole output of this program is:
----------tensorRT-----------
2019-01-31 11:57:04.945507: I tensorflow/core/grappler/devices.cc:51] Number of eligible GPUs (core count >= 8): 2
2019-01-31 11:57:08.259189: I tensorflow/contrib/tensorrt/convert/convert_graph.cc:383] MULTIPLE tensorrt candidate conversion: 264
2019-01-31 11:57:08.608475: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:0 due to: "Unimplemented: Require 4 dimensional input. Got 2 fastrcnn/outputs/box/MatMul" SKIPPING......( 3 nodes)
2019-01-31 11:57:08.611419: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:1 due to: "Unimplemented: Require 4 dimensional input. Got 2 fastrcnn/outputs/class/MatMul" SKIPPING......( 3 nodes)
2019-01-31 11:57:08.614149: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:2 due to: "Unimplemented: Require 4 dimensional input. Got 5 fastrcnn/gn2/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.617048: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:3 due to: "Unimplemented: Require 4 dimensional input. Got 5 fastrcnn/gn0/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.626805: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:2660] Max batch size= 184326 max workspace size= 2951191
2019-01-31 11:57:08.626828: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:2664] Using FP16 precision mode
2019-01-31 11:57:08.626836: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:2666] starting build engine
2019-01-31 11:57:08.626858: E tensorflow/contrib/tensorrt/log/trt_logger.cc:38] DefaultLogger Tensor: multilevel_roi_align/GatherV2 at max batch size of 184326 exceeds the maximum element count of 2147483647
2019-01-31 11:57:08.626869: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:2671] Built network
2019-01-31 11:57:08.626890: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:4 due to: "Internal: Engine building failure" SKIPPING......( 6 nodes)
2019-01-31 11:57:08.629621: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:5 due to: "Unimplemented: Require 4 dimensional input. Got 2 multilevel_roi_align/roi_level5/roi_align/crop_and_resize/transform_fpcoor_for_tf/div_3" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.632334: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:6 due to: "Unimplemented: Require 4 dimensional input. Got 2 multilevel_roi_align/roi_level3/roi_align/crop_and_resize/transform_fpcoor_for_tf/div_3" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.635073: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:7 due to: "Unimplemented: Require 4 dimensional input. Got 2 multilevel_roi_align/roi_level2/roi_align/crop_and_resize/transform_fpcoor_for_tf/div_3" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.637887: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:8 due to: "Unimplemented: Require 4 dimensional input. Got 1 multilevel_roi_align/fpn_map_rois_to_levels/Log" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.640590: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:9 due to: "Unimplemented: Require 4 dimensional input. Got 1 multilevel_roi_align/fpn_map_rois_to_levels/Sqrt" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.643387: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:10 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block11/conv3/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.646105: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:11 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block13/conv1/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.648803: E tensorflow/contrib/tensorrt/log/trt_logger.cc:38] DefaultLogger Parameter check failed at: ../builder/Network.cpp::addInput::364, condition: isValidDims(dims)
2019-01-31 11:57:08.648831: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:12 due to: "Invalid argument: Failed to create Input layer" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.651514: E tensorflow/contrib/tensorrt/log/trt_logger.cc:38] DefaultLogger Parameter check failed at: ../builder/Network.cpp::addInput::364, condition: isValidDims(dims)
2019-01-31 11:57:08.651541: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:13 due to: "Invalid argument: Failed to create Input layer" SKIPPING......( 4 nodes)
2019-01-31 11:57:08.654259: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:14 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block11/conv1/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.657160: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:15 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block10/conv1/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.659879: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:16 due to: "Unimplemented: Require 4 dimensional input. Got 5 group3/block0/convshortcut/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.662639: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:17 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block9/conv3/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.665370: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:18 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block3/conv1/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.668084: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for subgraph_index:19 due to: "Unimplemented: Require 4 dimensional input. Got 5 group2/block9/conv2/gn/moments/SquaredDifference" SKIPPING......( 12 nodes)
2019-01-31 11:57:08.670779: E tensorflow/contrib/tensorrt/log/trt_logger.cc:38] DefaultLogger Parameter check failed at: ../builder/Network.cpp::addInput::364, condition: isValidDims(dims)
2019-01-31 11:57:08.670807: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:418] subgraph conversion error for
...
2019-01-31 11:57:15.566272: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1435] Adding visible gpu devices: 0, 1
2019-01-31 11:57:15.947373: I tensorflow/core/common_runtime/gpu/gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-01-31 11:57:15.947430: I tensorflow/core/common_runtime/gpu/gpu_device.cc:929] 0 1
2019-01-31 11:57:15.947442: I tensorflow/core/common_runtime/gpu/gpu_device.cc:942] 0: N N
2019-01-31 11:57:15.947449: I tensorflow/core/common_runtime/gpu/gpu_device.cc:942] 1: N N
2019-01-31 11:57:15.948129: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 14266 MB memory) -> physical GPU (device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:04:00.0, compute capability: 6.0)
2019-01-31 11:57:15.949269: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 14758 MB memory) -> physical GPU (device: 1, name: Tesla P100-PCIE--16GB, pci bus id: 0000:84:00.0, compute capability: 6.0)
Average runtime: 0.120971 seconds