I finally managed to install correctly by reinstalling cython, I wonder if I’ll have to reinstall each time…
Anyway, I tried to create my own python script from this link (https://github.com/NVIDIA-AI-IOT/tf_trt_models/blob/master/examples/detection/detection.ipynb)
I basically copied all the command and execute the script.
#!/usr/bin/env python
from PIL import Image
import sys
import os
import urllib
import tensorflow.contrib.tensorrt as trt
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import tensorflow as tf
import numpy as np
import time
from tf_trt_models.detection import download_detection_model, build_detection_graph
MODEL = 'ssd_mobilenet_v1_coco'
DATA_DIR = './data/'
CONFIG_FILE = MODEL + '.config' # ./data/ssd_inception_v2_coco.config
CHECKPOINT_FILE = 'model.ckpt' # ./data/ssd_inception_v2_coco/model.ckpt
IMAGE_PATH = './data/huskies.jpg'
# Download the pretrained model #
config_path, checkpoint_path = download_detection_model(MODEL, 'data')
# Build the frozen graph #
frozen_graph, input_names, output_names = build_detection_graph(
config=config_path,
checkpoint=checkpoint_path,
score_threshold=0.3,
batch_size=1
)
# Optimize the model with TensorRT #
print(output_names)
trt_graph = trt.create_inference_graph(
input_graph_def=frozen_graph,
outputs=output_names,
max_batch_size=1,
max_workspace_size_bytes=1 << 25,
precision_mode='FP16',
minimum_segment_size=20
)
with open('./data/ssd_inception_v2_coco_trt.pb', 'wb') as f:
f.write(trt_graph.SerializeToString())
# Create session and load graph #
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf_sess = tf.Session(config=tf_config)
tf.import_graph_def(trt_graph, name='')
tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0')
tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0')
tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0')
tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0')
# Load and Preprocess Image #
image = Image.open(IMAGE_PATH)
plt.imshow(image)
image_resized = np.array(image.resize((300, 300)))
image = np.array(image)
# Run network on Image #
scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections], feed_dict={
tf_input: image_resized[None, ...]
})
boxes = boxes[0] # index by 0 to remove batch dimension
scores = scores[0]
classes = classes[0]
num_detections = num_detections[0]
# Display Results #
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.imshow(image)
# plot boxes exceeding score threshold
for j in range(num_detections):
# scale box to image coordinates
box = boxes[j] * np.array([image.shape[0], image.shape[1], image.shape[0], image.shape[1]])
# display rectangle
patch = patches.Rectangle((box[1], box[0]), box[3] - box[1], box[2] - box[0], color='g', alpha=0.3)
ax.add_patch(patch)
# display class index and score
plt.text(x=box[1] + 10, y=box[2] - 10, s='%d (%0.2f) ' % (classes[j], scores[j]), color='w')
plt.show()
# Benchmark #
num_samples = 50
t0 = time.time()
for i in range(num_samples):
scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections], feed_dict={
tf_input: image_resized[None, ...]
})
t1 = time.time()
print('Average runtime: %f seconds' % (float(t1 - t0) / num_samples))
# Close session to release resources #
tf_sess.close()
Here is the error I get each time (always at the trt.create_inference_graph function):
python3 detection_tf_trt.py
2018-11-28 14:06:23.109193: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:864] ARM64 does not support NUMA - returning NUMA node zero
2018-11-28 14:06:23.109418: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties:
name: NVIDIA Tegra X2 major: 6 minor: 2 memoryClockRate(GHz): 1.3005
pciBusID: 0000:00:00.0
totalMemory: 7.66GiB freeMemory: 2.28GiB
2018-11-28 14:06:23.109482: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0
2018-11-28 14:06:24.274927: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-11-28 14:06:24.275049: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958] 0
2018-11-28 14:06:24.275077: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: N
2018-11-28 14:06:24.275254: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1774 MB memory) -> physical GPU (device: 0, name: NVIDIA Tegra X2, pci bus id: 0000:00:00.0, compute capability: 6.2)
WARNING:tensorflow:From /home/nvidia/.local/lib/python3.5/site-packages/object_detection-0.1-py3.5.egg/object_detection/exporter.py:356: get_or_create_global_step (from tensorflow.contrib.framework.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Please switch to tf.train.get_or_create_global_step
2018-11-28 14:07:40.012185: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0
2018-11-28 14:07:40.012332: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-11-28 14:07:40.012377: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958] 0
2018-11-28 14:07:40.012432: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: N
2018-11-28 14:07:40.012569: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1774 MB memory) -> physical GPU (device: 0, name: NVIDIA Tegra X2, pci bus id: 0000:00:00.0, compute capability: 6.2)
2018-11-28 14:08:33.326804: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0
2018-11-28 14:08:33.326981: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-11-28 14:08:33.327012: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958] 0
2018-11-28 14:08:33.327044: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: N
2018-11-28 14:08:33.327152: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1774 MB memory) -> physical GPU (device: 0, name: NVIDIA Tegra X2, pci bus id: 0000:00:00.0, compute capability: 6.2)
2018-11-28 14:08:46.371327: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0
2018-11-28 14:08:46.371478: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-11-28 14:08:46.371510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958] 0
2018-11-28 14:08:46.371535: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: N
2018-11-28 14:08:46.371636: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1774 MB memory) -> physical GPU (device: 0, name: NVIDIA Tegra X2, pci bus id: 0000:00:00.0, compute capability: 6.2)
['detection_boxes', 'detection_classes', 'detection_scores', 'num_detections']
2018-11-28 14:09:41.088915: I tensorflow/core/grappler/devices.cc:51] Number of eligible GPUs (core count >= 8): 0
2018-11-28 14:09:53.230506: I tensorflow/contrib/tensorrt/convert/convert_graph.cc:438] MULTIPLE tensorrt candidate conversion: 7
Segmentation fault (core dumped)
I precise that I installed TensorFlow 1.9.
I don’t get why it says
I tensorflow/core/grappler/devices.cc:51] Number of eligible GPUs (core count >= 8): 0
Also how do I clear some RAM ?
totalMemory: 7.66GiB freeMemory: 2.28GiB
Thank you in advance