Ubuntu 16.04
GPU: Nvidia 1080ti
Nvidia driver version: 384.130
Cuda: 9.0
Cudnn: 7
Python: 3.5
Tensroflow version: 1.9.0
TensorRT version: 4.0.1
My Deeplabv3+ frozen graph and calibration dataset is upload to the google drive: model - Google Drive
problem description:
i want to quantize the deeplabv3 model, i have generated the TRTINT8Cali.pb file ,but when i calibrate the model using dataset , it turns out that the dimension is mismatch. what happened ?
My whole repo is:
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# !/bin/env python -tt
r""" TF-TensorRT integration sample script """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.ops import data_flow_ops
import tensorflow.contrib.tensorrt as trt
import numpy as np
import time
from tensorflow.python.platform import gfile
from tensorflow.python.client import timeline
import argparse, sys, itertools, datetime
import json
import utils.preprocessing as preprocessing
from utils.segmentation_metric import Evaluator
tf.logging.set_verbosity(tf.logging.INFO)
import scipy.misc as misc
#import cv2
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # selects a specific device
IMAGE_CHN = 3
_MIN_SCALE = 0.5
_MAX_SCALE = 2.0
_HEIGHT = 513
_WIDTH = 513
_IGNORE_LABEL = 255
def read_tensor_from_image_file(file_name, input_height=224, input_width=224,
input_mean=0, input_std=255):
""" Read a jpg image file and return a tensor """
input_name = "file_reader"
output_name = "normalized"
file_reader = tf.read_file(file_name, input_name)
image_reader = tf.image.decode_png(file_reader, channels=3,
name='jpg_reader')
float_caster = tf.cast(image_reader, tf.float32)
dims_expander = tf.expand_dims(float_caster, 0)
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.50)))
result = sess.run([normalized, tf.transpose(normalized, perm=(0, 3, 1, 2))])
del sess
return result
def parse_example_proto(example_serialized):
"""Parse the unserialized feature data from the serialized data.
Args:
* example_serialized: serialized example data
Returns:
* features: unserialized feature data
"""
# parse features from the serialized data
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
# 'image/format': tf.FixedLenFeature([], dtype=tf.string, default_value='jpeg'),
# 'image/filename': tf.FixedLenFeature((), dtype=tf.string, default_value=''),
'image/shape': tf.FixedLenFeature([3], dtype=tf.int64),
'image/height': tf.FixedLenFeature([1], dtype=tf.int64),
'image/width': tf.FixedLenFeature([1], dtype=tf.int64),
'label/shape': tf.FixedLenFeature([3], dtype=tf.int64),
'image/segmentation/label': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
}
features = tf.parse_single_example(example_serialized, feature_map)
return features
def parse_fn(example_serialized, is_train=False):
"""Parse image & objects from the serialized data.
Args:
* example_serialized: serialized example data
* is_train: whether to construct the training subset
Returns:
* image: image tensor
* objects: one tensor with all the annotations packed together
"""
# obtain the image data
features = parse_example_proto(example_serialized)
# features = tf.parse_single_example(example_serialized, keys_to_features)
height = tf.cast(features['image/height'], tf.int32)
width = tf.cast(features['image/width'], tf.int32)
image_shape = tf.cast(features['image/shape'], tf.int32)
label_shape = tf.cast(features['label/shape'], tf.int32)
image = tf.to_float(tf.reshape(tf.decode_raw(features['image/encoded'], tf.uint8), shape=image_shape))
# label = tf.to_int32(tf.reshape(tf.decode_raw(features['image/segmentation/label'], tf.uint8), shape=label_shape))
label = tf.to_int32(tf.reshape(tf.decode_raw(features['image/segmentation/label'], tf.uint8),
shape=[label_shape[0], label_shape[1], label_shape[2]]))
if is_train:
image, label = preprocessing.random_rescale_image_and_label(
image, label, _MIN_SCALE, _MAX_SCALE) # return the value of depth is 3
#
# # Randomly crop or pad a [_HEIGHT, _WIDTH] section of the image and label.
image, label = preprocessing.random_crop_or_pad_image_and_label(
image, label, _HEIGHT, _WIDTH, _IGNORE_LABEL) # return the value of depth is 3
# #
# # Randomly flip the image and label horizontally.
image, label = preprocessing.random_flip_left_right_image_and_label(
image, label)
#
image = preprocessing.mean_image_subtraction(image)
image.set_shape([_HEIGHT, _WIDTH, 3])
label.set_shape([_HEIGHT, _WIDTH, 1])
# image.set_shape([None, None, 3])
# label.set_shape([None, None, 1])
else:
image = tf.image.resize_images(image, [_HEIGHT, _WIDTH],
method=tf.image.ResizeMethod.BILINEAR)
label = tf.image.resize_images(label, [_HEIGHT, _WIDTH],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
image.set_shape([None, None, 3])
label.set_shape([None, None, 1])
image = preprocessing.mean_image_subtraction(image) # shape 无法确定 所以报错
label = label
#image.set_shape([None, None, 3])
#image = preprocessing.mean_image_subtraction(image) # shape 无法确定 所以报错
#label = label
#label.set_shape([None, None, 1])
# image, label = preprocessing.random_rescale_image_and_label(
# image, label, _MIN_SCALE, _MAX_SCALE) # return the value of depth is 3
# #
# # # Randomly crop or pad a [_HEIGHT, _WIDTH] section of the image and label.
# image, label = preprocessing.random_crop_or_pad_image_and_label(
# image, label, _HEIGHT, _WIDTH, _IGNORE_LABEL) # return the value of depth is 3
# #
# image = preprocessing.mean_image_subtraction(image)
# image.set_shape([_HEIGHT, _WIDTH, 3])
# label.set_shape([_HEIGHT, _WIDTH, 1])
# image_info = {'image': image, 'shape': shape}
return image, label
def updateGraphDef(fileName):
with gfile.FastGFile(fileName, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.reset_default_graph()
g = tf.Graph()
with g.as_default():
tf.import_graph_def(graph_def, name="")
with gfile.FastGFile(fileName, 'wb') as f:
f.write(g.as_graph_def().SerializeToString())
def getResnet_v2_101():
with gfile.FastGFile("resnetV1_50_frozen.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def
def getInceptionV4():
with gfile.FastGFile("inception_v4.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def # input node: input output node: scores
def getDeeplabv3_plus():
with gfile.FastGFile("/workspace/deeplabv3_plus_frozen_model_513.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def # input node: inputs_placeholder1 output node: predictions
def printStats(graphName, timings, batch_size):
if timings is None:
return
times = np.array(timings)
speeds = batch_size / times
avgTime = np.mean(timings)
avgSpeed = batch_size / avgTime
stdTime = np.std(timings)
stdSpeed = np.std(speeds)
print("images/s : %.1f +/- %.1f, s/batch: %.5f +/- %.5f" % (avgSpeed, stdSpeed, avgTime, stdTime))
print("RES, %s, %s, %.2f, %.2f, %.5f, %.5f" % (graphName, batch_size, avgSpeed, stdSpeed, avgTime, stdTime))
def getFP32(batch_size=3, workspace_size=1 << 30):
trt_graph = trt.create_inference_graph(getDeeplabv3_plus(), ["predictions"],
max_batch_size=batch_size,
max_workspace_size_bytes=workspace_size,
precision_mode="FP32") # Get optimized graph
with gfile.FastGFile("deeplabv3_plus_TRTFP32.pb", 'wb') as f:
f.write(trt_graph.SerializeToString())
return trt_graph
def getFP16(batch_size=3, workspace_size=1 << 30):
trt_graph = trt.create_inference_graph(getDeeplabv3_plus(), ["predictions"],
max_batch_size=batch_size,
max_workspace_size_bytes=workspace_size,
precision_mode="FP16") # Get optimized graph
with gfile.FastGFile("deeplabv3_plus_TRTFP16.pb", 'wb') as f:
f.write(trt_graph.SerializeToString())
return trt_graph
def getINT8CalibGraph(batch_size=3, workspace_size=1 << 30):
trt_graph = trt.create_inference_graph(getDeeplabv3_plus(), ["predictions"],
max_batch_size=batch_size,
max_workspace_size_bytes=workspace_size,
precision_mode="INT8") # calibration
with gfile.FastGFile("deeplabv3_plus_TRTINT8Calib.pb", 'wb') as f:
f.write(trt_graph.SerializeToString())
return trt_graph
def getINT8InferenceGraph(calibGraph):
trt_graph = trt.calib_graph_to_infer_graph(calibGraph)
with gfile.FastGFile("deeplabv3_plus_TRTINT8.pb", 'wb') as f:
f.write(trt_graph.SerializeToString())
return trt_graph
def timeGraph(gdef, batch_size=1, num_loops=100, dummy_input=None, timelineName=None):
data_files = '/workspace/tensorrt_tf_tmp/val_pascalAug.tfrecords'
tf.logging.info("Starting execution")
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.50)
tf.reset_default_graph()
g = tf.Graph()
if dummy_input is None:
dummy_input = np.random.random_sample((batch_size, 224, 224, 3))
outlist = []
labelist = []
imagelist = []
with g.as_default():
dataset = tf.data.TFRecordDataset(data_files)
dataset = dataset.apply(tf.contrib.data.map_and_batch(map_func=parse_fn, batch_size=batch_size, num_parallel_calls=3)) # parse the tfrecord file
dataset = dataset.repeat(3)
dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
iterator = dataset.make_one_shot_iterator()
next_element, labels = iterator.get_next()
out = tf.import_graph_def(
graph_def=gdef,
input_map={"input": next_element},
return_elements=["predictions"]
) # return the operations or tensor corresponding to names in return elements
out = out[0].outputs[0]
outlist.append(out)
labelist.append(labels)
imagelist.append(next_element)
timings = []
root = '/workspace/'
with tf.Session(graph=g, config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # tensorboard 默认是不会记录每个节点的运行时间和内存占用
run_metadata = tf.RunMetadata()
# for i in range(3):
sess.run(tf.local_variables_initializer())
# img_name = 'img' + '_' + str(i) + '.jpg'
# lb_name = 'lb' + '_' + str(i) + '.png'
# img, lb = sess.run([next_element, labels])
# misc.imsave(os.path.join(root, img_name), img[0,:,:,:])
# misc.imsave(os.path.join(root, lb_name), lb[0,:,:,0])
tf.logging.info("Starting Warmup cycle")
def mergeTraceStr(mdarr):
tl = timeline.Timeline(mdarr[0][0].step_stats)
ctf = tl.generate_chrome_trace_format()
Gtf = json.loads(ctf)
deltat = mdarr[0][1][1]
for md in mdarr[1:]:
tl = timeline.Timeline(md[0].step_stats) # 创建timeline对象
ctf = tl.generate_chrome_trace_format() # 写成json文件
tmp = json.loads(ctf)
deltat = 0
Gtf["traceEvents"].extend(tmp["traceEvents"])
deltat = md[1][1]
return json.dumps(Gtf, indent=2)
rmArr = [[tf.RunMetadata(), 0] for x in range(20)]
if timelineName:
if gfile.Exists(timelineName):
gfile.Remove(timelineName)
ttot = int(0)
tend = time.time()
for i in range(20):
tstart = time.time()
valt = sess.run(outlist, options=run_options, run_metadata=rmArr[i][0])
tend = time.time()
rmArr[i][1] = (int(tstart * 1.e6), int(tend * 1.e6))
with gfile.FastGFile(timelineName, "a") as tlf:
tlf.write(mergeTraceStr(rmArr))
else:
evaluator = Evaluator(21)
evaluator.reset()
for i in range(1449):
valt, lb = sess.run([outlist, labelist])
#print(type(valt), type(lb))
#print(i)
#valt = np.argmax(valt, axis=3)
evaluator.add_batch(lb[0], valt[0])
mIoU = evaluator.Mean_Intersection_over_Union()
print(" the mIoU of val set is:{}".format(mIoU))
tf.logging.info("Warmup done. Starting real timing")
num_iters = 50
for i in range(num_loops):
tstart = time.time()
for k in range(num_iters):
val = sess.run(outlist)
timings.append((time.time() - tstart) / float(num_iters))
print("iter ", i, " ", timings[-1])
# comp = sess.run(tf.reduce_all(tf.equal(val[0], valt[0]))) #
# print("Comparison=", comp)
sess.close()
tf.logging.info("Timing loop done!")
return timings, None, None, None
def score(nat, trt, topN=5):
ind = np.argsort(nat)[:, -topN:]
tind = np.argsort(trt)[:, -topN:]
return np.array_equal(ind, tind), howClose(nat, trt, topN)
def topX(arr, X):
ind = np.argsort(arr)[:, -X:][:, ::-1]
return arr[np.arange(np.shape(arr)[0])[:, np.newaxis], ind], ind
def howClose(arr1, arr2, X):
val1, ind1 = topX(arr1, X)
val2, ind2 = topX(arr2, X)
ssum = 0.
for i in range(X):
in1 = ind1[0]
in2 = ind2[0]
if (in1[i] == in2[i]):
ssum += 1
else:
pos = np.where(in2 == in1[i])
pos = pos[0]
if pos.shape[0]:
if np.abs(pos[0] - i) < 2:
ssum += 0.5
return ssum / X
def getLabels(labels, ids):
return [labels[str(x + 1)] for x in ids]
if "__main__" in __name__:
P = argparse.ArgumentParser(prog="test")
P.add_argument('--FP32', action='store_true')
P.add_argument('--FP16', action='store_true')
P.add_argument('--INT8', action='store_true')
P.add_argument('--native', action='store_true')
P.add_argument('--num_loops', type=int, default=20)
P.add_argument('--topN', type=int, default=10)
P.add_argument('--batch_size', type=int, default=1)
P.add_argument('--dump_diff', action='store_true')
P.add_argument('--with_timeline', action='store_true')
P.add_argument('--workspace_size', type=int, default=1 << 10, help="workspace size in MB")
P.add_argument('--update_graphdef', action='store_true')
f, unparsed = P.parse_known_args()
print(f)
valnative = None
valfp32 = None
valfp16 = None
valint8 = None
res = [None, None, None, None]
print("Starting at", datetime.datetime.now())
if f.update_graphdef:
updateGraphDef("/workspace/deeplabv3_plus_frozen_model_513.pb") #
# dummy_input = np.random.random_sample((f.batch_size, 224, 224, 3))
# with open("labellist.json", "r") as lf:
# labels = json.load(lf)
# imageName = "grace_hopper.jpg"
# t = read_tensor_from_image_file(imageName,
# input_height=224,
# input_width=224,
# input_mean=0,
# input_std=1.0) # return a [image_tensor, transposed image_tensor]
# tshape = list(t[0].shape)
# tshape[0] = f.batch_size
# tnhwcbatch = np.tile(t[0], (f.batch_size, 1, 1, 1))
dummy_input = None
wsize = f.workspace_size << 20
timelineName = None
if f.native:
if f.with_timeline: timelineName = "NativeTimeline.json"
timings, comp, valnative, mdstats = timeGraph(getDeeplabv3_plus(), f.batch_size,
f.num_loops, dummy_input, timelineName)
printStats("Native", timings, f.batch_size)
printStats("NativeRS", mdstats, f.batch_size) #
# print()
if f.FP32:
if f.with_timeline: timelineName = "FP32Timeline.json"
timings, comp, valfp32, mdstats = timeGraph(getFP32(f.batch_size, wsize), f.batch_size, f.num_loops,
dummy_input, timelineName)
printStats("TRT-FP32", timings, f.batch_size)
printStats("TRT-FP32RS", mdstats, f.batch_size)
if f.FP16:
k = 0
if f.with_timeline: timelineName = "FP16Timeline.json"
timings, comp, valfp16, mdstats = timeGraph(getFP16(f.batch_size, wsize), f.batch_size,
f.num_loops, dummy_input, timelineName)
printStats("TRT-FP16", timings, f.batch_size)
printStats("TRT-FP16RS", mdstats, f.batch_size)
if f.INT8:
calibGraph = getINT8CalibGraph(f.batch_size, wsize)
print("Running Calibration")
timings, comp, _, mdstats = timeGraph(calibGraph, f.batch_size, 1, dummy_input)
print("Creating inference graph")
int8Graph = getINT8InferenceGraph(calibGraph)
del calibGraph
if f.with_timeline: timelineName = "INT8Timeline.json"
timings, comp, valint8, mdstats = timeGraph(int8Graph, f.batch_size,
f.num_loops, dummy_input, timelineName) # dummy
printStats("TRT-INT8", timings, f.batch_size)
printStats("TRT-INT8RS", mdstats, f.batch_size)
vals = [valnative, valfp32, valfp16, valint8]
enabled = [(f.native, "native", valnative),
(f.FP32, "FP32", valfp32),
(f.FP16, "FP16", valfp16),
(f.INT8, "INT8", valint8)]
print("Done timing", datetime.datetime.now())
##
# for i in enabled:
# if i[0]:
# print(i[1], getLabels(labels, topX(i[2], f.topN)[1][0]))
sys.exit(0)
below is the runtime logs:
2019-05-06 02:02:55.396641: I tensorflow/core/grappler/devices.cc:51] Number of eligible GPUs (core count >= 8): 1
2019-05-06 02:02:58.289008: I tensorflow/contrib/tensorrt/convert/convert_graph.cc:438] MULTIPLE tensorrt candidate conversion: 12
2019-05-06 02:02:58.291838: E tensorflow/contrib/tensorrt/log/trt_logger.cc:38] DefaultLogger Parameter check failed at: ../builder/Network.cpp::addInput::364, condition: isValidDims(dims)
2019-05-06 02:02:58.291866: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:507] subgraph conversion error for subgraph_index:0 due to: "Invalid argument: Failed to create Input layer" SKIPPING......( 26 nodes)
2019-05-06 02:02:58.295488: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 6114703
2019-05-06 02:02:58.295517: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.295538: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.295548: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.297883: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 6114703
2019-05-06 02:02:58.297906: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.297925: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.297932: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.300170: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 6114703
2019-05-06 02:02:58.300193: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.300211: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.300219: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.302386: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 6114703
2019-05-06 02:02:58.302409: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.302427: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.302434: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.310410: W tensorflow/contrib/tensorrt/convert/convert_graph.cc:507] subgraph conversion error for subgraph_index:5 due to: "Invalid argument: Output node 'aspp/concat-5-LayoutOptimizer' is weights not tensor" SKIPPING......( 16 nodes)
2019-05-06 02:02:58.351016: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 25681752
2019-05-06 02:02:58.351078: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.351129: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.351138: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.366046: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 17121168
2019-05-06 02:02:58.366088: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.366126: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.366135: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.386840: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 20789990
2019-05-06 02:02:58.386890: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.386921: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.386929: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.389213: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 6114703
2019-05-06 02:02:58.389243: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.389265: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.389274: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.598352: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 922097216
2019-05-06 02:02:58.598671: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.598929: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.598942: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
2019-05-06 02:02:58.601519: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3198] Max batch size= 1 max workspace size= 6114703
2019-05-06 02:02:58.601543: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3212] finished op preparation
2019-05-06 02:02:58.601561: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3220] OK
2019-05-06 02:02:58.601568: I tensorflow/contrib/tensorrt/convert/convert_nodes.cc:3221] finished op building
Running Calibration
INFO:tensorflow:Starting execution
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/importer.py", line 418, in import_graph_def
graph._c_graph, serialized, options) # pylint: disable=protected-access
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimensions must be equal, but are 1024 and 512 for 'import/resnet_v2_101/block4/unit_1/bottleneck_v2/conv3/Conv2D' (op: 'Conv2D') with input shapes: [?,1024,33,33], [1,1,512,2048].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "optimize_compress.py", line 447, in <module>
timings, comp, _, mdstats = timeGraph(calibGraph, f.batch_size, 1, dummy_input)
File "optimize_compress.py", line 273, in timeGraph
return_elements=["predictions"]
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 432, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/importer.py", line 422, in import_graph_def
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 1024 and 512 for 'import/resnet_v2_101/block4/unit_1/bottleneck_v2/conv3/Conv2D' (op: 'Conv2D') with input shapes: [?,1024,33,33], [1,1,512,2048].
2019-05-06 02:03:01.003784: I ./tensorflow/contrib/tensorrt/resources/trt_resources.h:48] Destroying Calibration Resource