I believe after you run the initial conversion, it should save the TensorRT engine and not take as long to load on subsequent runs. That particular repo is showing the TF-TRT interoperability workflow, which loads TensorFlow runtime. There are also these similar repos that shows converting the models to UFF, which only depends on TensorRT at runtime:
I have tried at first my own code (sticked together from other examples), to only use Tensorflow models.
Image Classification:
import tensorflow as tf
#if type(tf.contrib) != type(tf): tf.contrib._warning = None
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import tensorflow.contrib.tensorrt as trt
import argparse
from PIL import Image
#import cv2
import numpy as np
import time
from tf_trt_models.classification import download_classification_checkpoint, build_classification_graph
def argParse():
desc = ('Run optimized TensorRT image classification models on Jetson devices.')
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--model', help='Name of the pretrained model.', required=True)
parser.add_argument('--num_classes', help='Number of classes in the label file.', required=True, type=int)
parser.add_argument('--image', help='File path of the input image.', required=True)
args = parser.parse_args()
return args
def load_graph(graph_path, model, num_classes, checkpoint_path):
try:
with tf.gfile.GFile(graph_path, "rb") as f:
# Build frozen graph
print("------------------------------------")
print("loading existing TensorRT graph")
print("------------------------------------")
trt_graph = tf.GraphDef()
trt_graph.ParseFromString(f.read())
input_names = ['image_tensor']
output_names = ['scores']
print("loaded optimized graph ... ready to predict")
except:
# Build frozen graph
print("------------------------------------")
print("building graph")
print("------------------------------------")
frozen_graph, input_names, output_names = build_classification_graph(
model=model,
checkpoint=checkpoint_path,
num_classes=num_classes
)
# Optimize graph with TensorRT
print("------------------------------------")
print("optimizing graph with TensorRT")
print("------------------------------------")
trt_graph = trt.create_inference_graph(
input_graph_def=frozen_graph,
outputs=output_names,
max_batch_size=1,
max_workspace_size_bytes=1 << 25,
precision_mode='FP16',
minimum_segment_size=50
)
# Save TensorRT graph
with tf.gfile.GFile(graph_path, 'wb') as f:
f.write(trt_graph.SerializeToString())
print("optimized graph ... ready to predict")
print(input_names)
print(output_names)
return (trt_graph, input_names, output_names)
def main():
args = argParse()
MODEL = args.model
DATA_DIR = './data/'
CHECKPOINT_PATH = args.model + '.ckpt'
NUM_CLASSES = args.num_classes
LABELS_PATH = './data/imagenet_labels_%d.txt' % NUM_CLASSES
IMAGE_PATH = args.image
GRAPH_PATH = DATA_DIR + MODEL + '_trt.pb'
# Download model checkpoint and sample image
checkpoint_path = download_classification_checkpoint(MODEL, 'data')
# load TensorRT optimized graph for inference
trt_graph, input_names, output_names = load_graph(GRAPH_PATH,
MODEL, NUM_CLASSES, checkpoint_path)
# Create session and load graph
print("------------------------------------")
print("creating session for inference")
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf_sess = tf.Session(config=tf_config)
tf.import_graph_def(trt_graph, name='')
tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
tf_output = tf_sess.graph.get_tensor_by_name(output_names[0] + ':0')
# Load and preprocess image
print("------------------------------------")
print("loading image")
image = Image.open(IMAGE_PATH)
width = int(tf_input.shape.as_list()[1])
height = int(tf_input.shape.as_list()[2])
image = np.array(image.resize((width, height)))
print("------------------------------------")
print("warmup prediction ...")
start_time = time.time()
output = tf_sess.run(tf_output, feed_dict={tf_input: image[None, ...]})
delta = (time.time() - start_time)
print('warmup run time is %.4f ms' % (delta*1000))
# Execute model
print("------------------------------------")
print("starting image classification on Nvidida Jetson TX2 ...")
runs = 1000
print("running inferencing for ", runs, " times.")
times = []
for i in range(0,runs,1):
start_time = time.time()
output = tf_sess.run(tf_output, feed_dict={tf_input: image[None, ...]})
delta = (time.time() - start_time)
times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
print("------------------------------------")
print("Prediction from image classification model: " + MODEL)
scores = output[0]
# Get top 5 labels
with open(LABELS_PATH, 'r') as f:
labels = f.readlines()
top5_idx = scores.argsort()[::-1][0:5]
num = 1
for i in top5_idx:
print(" Object " + str(num))
print(" Accuracy: %.4f %%" % (scores[i]*100))
print(" Label : %s" % labels[i])
print(" **********************")
num += 1
print("------------------------------------")
print('Time[ms] : %.4f' % (mean_delta*1000))
print('FPS : %.4f' % fps)
print("------------------------------------")
# Close session to release resources
tf_sess.close()
if __name__ == '__main__':
main()
Object Detection:
import tensorflow as tf
#if type(tf.contrib) != type(tf): tf.contrib._warning = None
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import tensorflow.contrib.tensorrt as trt
import argparse
from PIL import Image
import cv2
import numpy as np
import time
from tf_trt_models.detection import download_detection_model, build_detection_graph
def argParser():
desc = ('Run optimized TensorRT object detection models on Jetson devices.')
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--model', help='Name of the pretrained model.', required=True)
parser.add_argument('--image', help='File path of the input image.', required=True)
parser.add_argument('--image_out', help='File path to save result image.', required=True)
parser.add_argument('--size', help='Size of the models tensor.', type=int, default=300)
args = parser.parse_args()
return args
def load_graph(graph_path, config_path, checkpoint_path):
try:
with tf.gfile.GFile(graph_path, "rb") as f:
# Build frozen graph
print("------------------------------------")
print("loading existing TensorRT graph")
print("------------------------------------")
trt_graph = tf.GraphDef()
trt_graph.ParseFromString(f.read())
input_names = ['image_tensor']
print("loaded optimized graph ... ready to predict")
except:
# Build frozen graph
print("------------------------------------")
print("building graph")
print("------------------------------------")
frozen_graph, input_names, output_names = build_detection_graph(
config=config_path,
checkpoint=checkpoint_path,
score_threshold=0.3,
batch_size=1,
force_nms_cpu=False
)
# Optimize graph with TensorRT
print("------------------------------------")
print("optimizing graph with TensorRT")
print("------------------------------------")
trt_graph = trt.create_inference_graph(
input_graph_def=frozen_graph,
outputs=output_names,
max_batch_size=1,
max_workspace_size_bytes=1 << 25,
precision_mode='FP16',
minimum_segment_size=50
)
with tf.gfile.GFile(graph_path, 'wb') as f:
f.write(trt_graph.SerializeToString())
print("optimized graph ... ready to predict")
print(output_names)
return (trt_graph, input_names)
# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
# Function to save given image
def save_image(data, fname, swap_channel=True):
if swap_channel:
data = data[..., ::-1]
cv2.imwrite(fname, data)
# Function to draw
def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
font_scale=0.5, thickness=2):
size = cv2.getTextSize(label, font, font_scale, thickness)[0]
x, y = point
cv2.rectangle(image, (x, y - size[1]),
(x + size[0], y), (0, 0, 255), cv2.FILLED)
cv2.putText(image, label, point, font, font_scale,
(255, 255, 255), thickness)
# Function to remove overlapping boxes
def non_max_suppression(boxes, probs=None, nms_threshold=0.3):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes are integers, convert them to floats -- this
# is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# compute the area of the bounding boxes and grab the indexes to sort
# (in the case that no probabilities are provided, simply sort on the
# bottom-left y-coordinate)
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = y2
# if probabilities are provided, sort on them instead
if probs is not None:
idxs = probs
# sort the indexes
idxs = np.argsort(idxs)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value
# to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of the bounding
# box and the smallest (x, y) coordinates for the end of the bounding
# box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have overlap greater
# than the provided overlap threshold
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > nms_threshold)[0])))
# return only the bounding boxes indexes
return pick
def main():
args = argParser()
MODEL = args.model
DATA_DIR = './data/'
CONFIG_FILE = MODEL + '.config'
CHECKPOINT_PATH = 'model.ckpt'
IMAGE_PATH = args.image
GRAPH_PATH = DATA_DIR + MODEL + '_trt.pb'
LABELS_PATH = './data/coco_labels.txt'
# Download model checkpoint and sample image
config_path, checkpoint_path = download_detection_model(MODEL, 'data')
# load TensorRT optimized graph for inference
trt_graph, input_names = load_graph(GRAPH_PATH, config_path, checkpoint_path)
# Create session and load graph
print("------------------------------------")
print("creating session for inference")
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf_sess = tf.Session(config=tf_config)
tf.import_graph_def(trt_graph, name='')
tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0')
tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0')
tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0')
tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0')
# Load and preprocess image
print("------------------------------------")
print("loading image")
image = cv2.imread(IMAGE_PATH)
image_resized = cv2.resize(image, (args.size, args.size))
print("------------------------------------")
print("warmup prediction ...")
start_time = time.time()
scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections],
feed_dict={tf_input: image_resized[None, ...]})
delta = (time.time() - start_time)
print('warmup run time is %.4f ms' % (delta*1000))
# Execute model
print("------------------------------------")
print("starting object detection on Nvidida Jetson TX2 ...")
runs = 1000
print("running inferencing for ", runs, " times.")
times = []
for i in range(runs):
start_time = time.time()
scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections],
feed_dict={tf_input: image_resized[None, ...]})
delta = (time.time() - start_time)
times.append(delta)
mean_delta = np.array(times).mean()
fps = 1/mean_delta
boxes = boxes[0] # Index by 0 to remove batch dimension
scores = scores[0]
classes = classes[0]
num_detections = int(num_detections[0])
# Boxes unit in pixels (image coordinates).
boxes_pixels = []
for i in range(num_detections):
# scale box to image coordinates
box = boxes[i] * np.array([image.shape[0],
image.shape[1], image.shape[0], image.shape[1]])
box = np.round(box).astype(int)
boxes_pixels.append(box)
boxes_pixels = np.array(boxes_pixels)
# Remove overlapping boxes with non-max suppression, return picked indexes.
pick = non_max_suppression(boxes_pixels, scores[:num_detections], 0.5)
print("------------------------------------")
print("Prediction from object detection model: " + MODEL)
# Read all labels
labels = ReadLabelFile(LABELS_PATH)
min_score = 0.5
print("%d objects predicted" % num_detections)
print("objects with a minimum score of %.1f %%" % (min_score*100))
print(" **********************")
# Run threw non overlapping boxes and objects
for i in pick:
score = scores[i]
if score >= min_score:
classID = classes[i]
print(" Object " + str(i+1))
print(" Accuracy: %.4f %%" % (score*100))
print(" Label : %s" % labels[classID])
print(" **********************")
box = boxes_pixels[i]
box = np.round(box).astype(int)
# Draw bounding box.
image = cv2.rectangle(image, (box[1], box[0]), (box[3], box[2]),
(0, 0, 255), 2)
label = labels[classID] + ": {:.4f}%".format(score*100)
draw_label(image, (box[1], box[0]), label)
save_image(image[:, :, ::-1], args.image_out)
print("------------------------------------")
print('Time[ms] : %.4f' % (mean_delta*1000))
print('FPS : %.4f' % fps)
print("------------------------------------")
# Close session to release resources
tf_sess.close()
if __name__ == '__main__':
main()
This takes about 15 min to build and load the model.
It doesn’t save an engine file, just a frozen graph file, how could I do that, because the engine file needs to fit to the right model, for example MobileNet SSD V2. Am I right?
Yes I can see it on the Jetson Zoo page: https://elinux.org/Jetson_Zoo
I just want to use Tensorflow Models thats why I couldn’t really take use of the Jetson-Inference repo for Image Classification.