Model inferencing with TensorRT on Jetson (TX2)

Hello there!

I would like to know, if its possible to start inferencing with Tensorflow Models only.
Do they need be in a .uff format?

Like in this repo, there are only caffeemodels for image classification available:
https://github.com/dusty-nv/jetson-inference/blob/master/docs/imagenet-console-2.md#using-the-console-program-on-jetson

I would like to only use Tensoflow models like in this repo:
https://github.com/NVIDIA-AI-IOT/tf_trt_models

When I try it out with the normal Tensorflow imports, it takes really long (15 min) to create a frozen_graph and then optimizing it with TensorRT.

In the first repo it all took very short time to load the model and inference.

Moved to Jetson TX2 forum.

I believe after you run the initial conversion, it should save the TensorRT engine and not take as long to load on subsequent runs. That particular repo is showing the TF-TRT interoperability workflow, which loads TensorFlow runtime. There are also these similar repos that shows converting the models to UFF, which only depends on TensorRT at runtime:

https://github.com/NVIDIA-AI-IOT/tf_to_trt_image_classification
https://github.com/AastaNV/TRT_object_detection

For image recognition in that repo, both caffemodels and ONNX are used. And UFF is used by it object detection for SSD-Mobilenet and SSD-Inception.

Thanks for the answer and links :)

I have tried at first my own code (sticked together from other examples), to only use Tensorflow models.

Image Classification:

import tensorflow as tf
#if type(tf.contrib) != type(tf): tf.contrib._warning = None
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import tensorflow.contrib.tensorrt as trt
import argparse
from PIL import Image
#import cv2
import numpy as np
import time
from tf_trt_models.classification import download_classification_checkpoint, build_classification_graph

def argParse():
  desc = ('Run optimized TensorRT image classification models on Jetson devices.')
  parser = argparse.ArgumentParser(description=desc)
  parser.add_argument('--model', help='Name of the pretrained model.', required=True)
  parser.add_argument('--num_classes', help='Number of classes in the label file.', required=True, type=int)
  parser.add_argument('--image', help='File path of the input image.', required=True)
  args = parser.parse_args()
  return args

def load_graph(graph_path, model, num_classes, checkpoint_path):
  try:
    with tf.gfile.GFile(graph_path, "rb") as f:
      # Build frozen graph
      print("------------------------------------")
      print("loading existing TensorRT graph")
      print("------------------------------------")
      trt_graph = tf.GraphDef()
      trt_graph.ParseFromString(f.read())
  
    input_names = ['image_tensor']
    output_names = ['scores']
    print("loaded optimized graph ... ready to predict")

  except:
    # Build frozen graph
    print("------------------------------------")
    print("building graph")
    print("------------------------------------")
    frozen_graph, input_names, output_names = build_classification_graph(
        model=model,
        checkpoint=checkpoint_path,
        num_classes=num_classes
    )

    # Optimize graph with TensorRT
    print("------------------------------------")
    print("optimizing graph with TensorRT")
    print("------------------------------------")
    trt_graph = trt.create_inference_graph(
        input_graph_def=frozen_graph,
        outputs=output_names,
        max_batch_size=1,
        max_workspace_size_bytes=1 << 25,
        precision_mode='FP16',
        minimum_segment_size=50
    )

    # Save TensorRT graph
    with tf.gfile.GFile(graph_path, 'wb') as f:
        f.write(trt_graph.SerializeToString())

    print("optimized graph ... ready to predict")
    print(input_names)
    print(output_names)
  
  return (trt_graph, input_names, output_names)

def main():
  args = argParse()

  MODEL = args.model
  DATA_DIR = './data/'
  CHECKPOINT_PATH = args.model + '.ckpt'
  NUM_CLASSES = args.num_classes
  LABELS_PATH = './data/imagenet_labels_%d.txt' % NUM_CLASSES
  IMAGE_PATH = args.image
  GRAPH_PATH = DATA_DIR + MODEL + '_trt.pb'

  # Download model checkpoint and sample image
  checkpoint_path = download_classification_checkpoint(MODEL, 'data')

  # load TensorRT optimized graph for inference
  trt_graph, input_names, output_names = load_graph(GRAPH_PATH, 
    MODEL, NUM_CLASSES, checkpoint_path)

  # Create session and load graph
  print("------------------------------------")
  print("creating session for inference")
  tf_config = tf.ConfigProto()
  tf_config.gpu_options.allow_growth = True

  tf_sess = tf.Session(config=tf_config)

  tf.import_graph_def(trt_graph, name='')

  tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
  tf_output = tf_sess.graph.get_tensor_by_name(output_names[0] + ':0')

  # Load and preprocess image
  print("------------------------------------")
  print("loading image")
  image = Image.open(IMAGE_PATH)

  width = int(tf_input.shape.as_list()[1])
  height = int(tf_input.shape.as_list()[2])

  image = np.array(image.resize((width, height)))

  print("------------------------------------")
  print("warmup prediction ...")
  start_time = time.time()
  output = tf_sess.run(tf_output, feed_dict={tf_input: image[None, ...]})
  delta = (time.time() - start_time)
  print('warmup run time is %.4f ms' % (delta*1000))

  # Execute model
  print("------------------------------------")
  print("starting image classification on Nvidida Jetson TX2 ...")
  runs = 1000
  print("running inferencing for ", runs, " times.")

  times = []
  for i in range(0,runs,1):
    start_time = time.time()
    output = tf_sess.run(tf_output, feed_dict={tf_input: image[None, ...]})
    delta = (time.time() - start_time)
    times.append(delta)

  mean_delta = np.array(times).mean()
  fps = 1/mean_delta

  print("------------------------------------")
  print("Prediction from image classification model: " + MODEL)
  scores = output[0]
  # Get top 5 labels
  with open(LABELS_PATH, 'r') as f:
      labels = f.readlines()

  top5_idx = scores.argsort()[::-1][0:5]

  num = 1
  for i in top5_idx:
    print("  Object " + str(num))
    print("  Accuracy: %.4f %%" % (scores[i]*100))
    print("  Label   : %s" % labels[i])
    print("  **********************")
    num += 1

  print("------------------------------------")
  print('Time[ms] : %.4f' % (mean_delta*1000))
  print('FPS      : %.4f' % fps)
  print("------------------------------------")

  # Close session to release resources
  tf_sess.close()

if __name__ == '__main__':
  main()

Object Detection:

import tensorflow as tf
#if type(tf.contrib) != type(tf): tf.contrib._warning = None
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import tensorflow.contrib.tensorrt as trt
import argparse
from PIL import Image
import cv2
import numpy as np
import time
from tf_trt_models.detection import download_detection_model, build_detection_graph

def argParser():
  desc = ('Run optimized TensorRT object detection models on Jetson devices.')
  parser = argparse.ArgumentParser(description=desc)
  parser.add_argument('--model', help='Name of the pretrained model.', required=True)
  parser.add_argument('--image', help='File path of the input image.', required=True)
  parser.add_argument('--image_out', help='File path to save result image.', required=True)
  parser.add_argument('--size', help='Size of the models tensor.', type=int, default=300)
  args = parser.parse_args()
  return args

def load_graph(graph_path, config_path, checkpoint_path):
  try:
    with tf.gfile.GFile(graph_path, "rb") as f:
      # Build frozen graph
      print("------------------------------------")
      print("loading existing TensorRT graph")
      print("------------------------------------")
      trt_graph = tf.GraphDef()
      trt_graph.ParseFromString(f.read())

    input_names = ['image_tensor']
    print("loaded optimized graph ... ready to predict")

  except:
    # Build frozen graph
    print("------------------------------------")
    print("building graph")
    print("------------------------------------")
    frozen_graph, input_names, output_names = build_detection_graph(
        config=config_path,
        checkpoint=checkpoint_path,
        score_threshold=0.3,
        batch_size=1,
        force_nms_cpu=False
    )

    # Optimize graph with TensorRT
    print("------------------------------------")
    print("optimizing graph with TensorRT")
    print("------------------------------------")
    trt_graph = trt.create_inference_graph(
        input_graph_def=frozen_graph,
        outputs=output_names,
        max_batch_size=1,
        max_workspace_size_bytes=1 << 25,
        precision_mode='FP16',
        minimum_segment_size=50
    )
    with tf.gfile.GFile(graph_path, 'wb') as f:
        f.write(trt_graph.SerializeToString())
    print("optimized graph ... ready to predict")
    print(output_names)

  return (trt_graph, input_names)

# Function to read labels from text files.
def ReadLabelFile(file_path):
  with open(file_path, 'r') as f:
    lines = f.readlines()
  ret = {}
  for line in lines:
    pair = line.strip().split(maxsplit=1)
    ret[int(pair[0])] = pair[1].strip()
  return ret

# Function to save given image
def save_image(data, fname, swap_channel=True):
    if swap_channel:
        data = data[..., ::-1]
    cv2.imwrite(fname, data)

# Function to draw
def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.5, thickness=2):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]),
                  (x + size[0], y), (0, 0, 255), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale,
                (255, 255, 255), thickness)

# Function to remove overlapping boxes
def non_max_suppression(boxes, probs=None, nms_threshold=0.3):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes
    pick = []

    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # compute the area of the bounding boxes and grab the indexes to sort
    # (in the case that no probabilities are provided, simply sort on the
    # bottom-left y-coordinate)
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = y2

    # if probabilities are provided, sort on them instead
    if probs is not None:
        idxs = probs

    # sort the indexes
    idxs = np.argsort(idxs)

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the index value
        # to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of the bounding
        # box and the smallest (x, y) coordinates for the end of the bounding
        # box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]

        # delete all indexes from the index list that have overlap greater
        # than the provided overlap threshold
        idxs = np.delete(idxs, np.concatenate(([last],
                                               np.where(overlap > nms_threshold)[0])))
    # return only the bounding boxes indexes
    return pick

def main():
  args = argParser()  

  MODEL = args.model
  DATA_DIR = './data/'
  CONFIG_FILE = MODEL + '.config'
  CHECKPOINT_PATH = 'model.ckpt'
  IMAGE_PATH = args.image
  GRAPH_PATH = DATA_DIR + MODEL + '_trt.pb'
  LABELS_PATH = './data/coco_labels.txt'

  # Download model checkpoint and sample image
  config_path, checkpoint_path = download_detection_model(MODEL, 'data')

  # load TensorRT optimized graph for inference
  trt_graph, input_names = load_graph(GRAPH_PATH, config_path, checkpoint_path)

  # Create session and load graph
  print("------------------------------------")
  print("creating session for inference")
  tf_config = tf.ConfigProto()
  tf_config.gpu_options.allow_growth = True

  tf_sess = tf.Session(config=tf_config)

  tf.import_graph_def(trt_graph, name='')

  tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
  tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0')
  tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0')
  tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0')
  tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0')

  # Load and preprocess image
  print("------------------------------------")
  print("loading image")
  image = cv2.imread(IMAGE_PATH)
  image_resized = cv2.resize(image, (args.size, args.size))

  print("------------------------------------")
  print("warmup prediction ...")
  start_time = time.time()
  scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections], 
    feed_dict={tf_input: image_resized[None, ...]})
  delta = (time.time() - start_time)
  print('warmup run time is %.4f ms' % (delta*1000))

  # Execute model
  print("------------------------------------")
  print("starting object detection on Nvidida Jetson TX2 ...")
  runs = 1000
  print("running inferencing for ", runs, " times.")

  times = []
  for i in range(runs):
    start_time = time.time()
    scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections], 
      feed_dict={tf_input: image_resized[None, ...]})
    delta = (time.time() - start_time)
    times.append(delta)

  mean_delta = np.array(times).mean()
  fps = 1/mean_delta

  boxes = boxes[0] # Index by 0 to remove batch dimension
  scores = scores[0]
  classes = classes[0]
  num_detections = int(num_detections[0])

  # Boxes unit in pixels (image coordinates).
  boxes_pixels = []
  for i in range(num_detections):
      # scale box to image coordinates
      box = boxes[i] * np.array([image.shape[0],
                                 image.shape[1], image.shape[0], image.shape[1]])
      box = np.round(box).astype(int)
      boxes_pixels.append(box)

  boxes_pixels = np.array(boxes_pixels)

  # Remove overlapping boxes with non-max suppression, return picked indexes.
  pick = non_max_suppression(boxes_pixels, scores[:num_detections], 0.5)

  print("------------------------------------")
  print("Prediction from object detection model: " + MODEL)

  # Read all labels
  labels = ReadLabelFile(LABELS_PATH)
  
  min_score = 0.5
  print("%d objects predicted" % num_detections)
  print("objects with a minimum score of %.1f %%" % (min_score*100))
  print("  **********************")

  # Run threw non overlapping boxes and objects
  for i in pick:
    score = scores[i]
    if score >= min_score:
      classID = classes[i]
      print("  Object " + str(i+1))
      print("  Accuracy: %.4f %%" % (score*100))
      print("  Label   : %s" % labels[classID])
      print("  **********************")

      box = boxes_pixels[i]
      box = np.round(box).astype(int)
      # Draw bounding box.
      image = cv2.rectangle(image, (box[1], box[0]), (box[3], box[2]), 
        (0, 0, 255), 2)

      label = labels[classID] + ": {:.4f}%".format(score*100)
      draw_label(image, (box[1], box[0]), label)
    
  save_image(image[:, :, ::-1], args.image_out)
	  
  print("------------------------------------")
  print('Time[ms] : %.4f' % (mean_delta*1000))
  print('FPS      : %.4f' % fps)
  print("------------------------------------")    

  # Close session to release resources
  tf_sess.close()

if __name__ == '__main__':
  main()

This takes about 15 min to build and load the model.

It doesn’t save an engine file, just a frozen graph file, how could I do that, because the engine file needs to fit to the right model, for example MobileNet SSD V2. Am I right?

Yes I can see it on the Jetson Zoo page: https://elinux.org/Jetson_Zoo
I just want to use Tensorflow Models thats why I couldn’t really take use of the Jetson-Inference repo for Image Classification.