Hi there
I trained a graph with ssd_inception an converted it to a rt-graph to get faster results.
But the code is still way to slow, almost stocking…
Could need some help, here is my Implementation:
The Programm is for detecting a drone an calculating the distance.
import cv2
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt
#tf.contrib.resampler
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
#from win32api import GetSystemMetrics
import glob
import subprocess
This is needed since the notebook is stored in the object_detection folder.
sys.path.append(“…”)
from utils import label_map_util
from utils import visualization_utils as vis_util
cap1 = cv2.VideoCapture(1)
cap2 = cv2.VideoCapture(0)
CAMERA_WIDTH = 1920
CAMERA_HEIGHT = 1080
IMAGE_WIDTH = 640
IMAGE_HEIGHT = 480
#User set camera
cap1.set(3, CAMERA_WIDTH) # Set resolution width
cap1.set(4, CAMERA_HEIGHT) # Set resolution height
#retl_e = img_l.set(15, exposure) # Set exposure.
cap2.set(3, CAMERA_WIDTH) # Set resolution width
cap2.set(4, CAMERA_HEIGHT) # Set resolution hight
#drone image:
#droneIMG = cv2.resize(cv2.imread(‘backgroundIMG/drone_image.jpg’), (50,50))
output = subprocess.Popen(‘xrandr | grep “*” | cut -d" " -f4’,shell=True, stdout=subprocess.PIPE).communicate()[0]
screenX, screenY = [int(i) for i in output.decode(“utf-8”).replace(“\n”, “”).split(‘x’)]
for SIFT
MIN_MATCH_COUNT = 1
What model to download.
MODEL_NAME = ‘drone_graph’
Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + ‘/TensorRT_model.pb’
List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(‘training’, ‘object-detection.pbtxt’)
NUM_CLASSES = 1
print(“Preparing TensorFlow…”)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, ‘rb’) as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name=‘’)
tf_config = tf.ConfigProto()#Hannes memory laufend dazu nehmen
tf_config.gpu_options.allow_growth = True
‘’’
for op in detection_graph.get_operations():
print(op.name)
#exit()
‘’’
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)#Hannes
#calibrate cameras
REMAP_INTERPOLATION = cv2.INTER_LINEAR
#read parameters
rootDir = ‘./’
outputFile = rootDir + ‘output/calibration.npz’
calibration = np.load(outputFile, allow_pickle=False)
imageSize = tuple(calibration[“imageSize”])
leftMapX = calibration[“leftMapX”]
leftMapY = calibration[“leftMapY”]
leftROI = tuple(calibration[“leftROI”])
rightMapX = calibration[“rightMapX”]
rightMapY = calibration[“rightMapY”]
rightROI = tuple(calibration[“rightROI”])
R = calibration[“rotationMatrix”]
T = calibration[“translationVector”]
P1 = calibration[“leftProjection”]
P2 = calibration[“rightProjection”]
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
def compare_SIFT(box1, img1, box2, img2):
“”“code is in SIFTry, right now not working with live feed, so returning center of both boxes”“”
pass
print(“Ready!”)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
retL,imageL = cap1.read()
retR,imageR = cap2.read()
imageL = cv2.resize(imageL, (IMAGE_WIDTH, IMAGE_HEIGHT))
imageR = cv2.resize(imageR, (IMAGE_WIDTH, IMAGE_HEIGHT))
#whiteIMG = cv2.resize(cv2.imread('backgroundIMG/white.jpg'), (int(screenX/2),screenY))
# for Left camera:
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expandedL = np.expand_dims(imageL, axis=0)
image_tensorL = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxesL = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scoresL = detection_graph.get_tensor_by_name('detection_scores:0')
classesL = detection_graph.get_tensor_by_name('detection_classes:0')
num_detectionsL = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxesL, scoresL, classesL, num_detectionsL) = sess.run([boxesL, scoresL, classesL, num_detectionsL],feed_dict={image_tensorL: image_np_expandedL})
# Visualization of the results of a detection.
boxL = vis_util.visualize_boxes_and_labels_on_image_array(imageL, np.squeeze(boxesL), np.squeeze(classesL).astype(np.int32),
np.squeeze(scoresL),
category_index,
use_normalized_coordinates=True,
line_thickness=1)
# for right camera:
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expandedR = np.expand_dims(imageR, axis=0)
image_tensorR = detection_graph.get_tensor_by_name('image_tensor:0')#image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxesR = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scoresR = detection_graph.get_tensor_by_name('detection_scores:0')
classesR = detection_graph.get_tensor_by_name('detection_classes:0')
num_detectionsR = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxesR, scoresR, classesR, num_detectionsR) = sess.run([boxesR, scoresR, classesR, num_detectionsR],feed_dict={image_tensorR: image_np_expandedR})
# Visualization of the results of a detection.
boxR = vis_util.visualize_boxes_and_labels_on_image_array(imageR, np.squeeze(boxesR), np.squeeze(classesR).astype(np.int32),
np.squeeze(scoresR),
category_index,
use_normalized_coordinates=True,
line_thickness=1)
if boxL!=None and boxR!=None:
# un-normalize box(integer values):
im_widthL, im_heightL = Image.fromarray(np.uint8(imageL)).convert('RGB').size
im_widthR, im_heightR = Image.fromarray(np.uint8(imageR)).convert('RGB').size
yminL, xminL, ymaxL, xmaxL = boxL
boxL = (xminL * im_widthL, xmaxL * im_widthL, yminL * im_heightL, ymaxL * im_heightL)
yminL, xminL, ymaxL, xmaxL = boxL
yminR, xminR, ymaxR, xmaxR = boxR
boxR = (xminR * im_widthR, xmaxR * im_widthR, yminR * im_heightR, ymaxR * im_heightR)
yminR, xminR, ymaxR, xmaxR = boxR
#find same dots in both cameras:
#just center of two boxes (problem with SIFT)
#compare_SIFT(box1,image_np, box2, image_np2)
centerL = ((yminL+xminL)/2, (ymaxL+xmaxL)/2)
centerR = ((yminR+xminR)/2, (ymaxR+xmaxR)/2)
#trangulate:
arr = np.array([])
arr = cv2.triangulatePoints(P1, P2, centerL, centerR, arr)
arrCar = arr/arr[3]
arrCar = np.delete(arrCar, (3), axis=0)
print(centerL)
print(centerR)
print(arrCar)
cv2.circle(imageR, (int(centerR[0]), int(centerR[1])), 5, (0,0,255),5)#5, (0,0,255),10)
cv2.circle(imageL, (int(centerL[0]), int(centerL[1])), 5, (0,0,255),5)
cv2.imshow('right', imageL)
cv2.imshow('left', imageR)
#tranX = int(centerR[0] + Pr[0]) -25
#tranY = int(centerR[1] + Pr[1]) -25 +120
#whiteIMG[ tranY:50+tranY , tranX:50+tranX] = droneIMG
#cv2.circle(whiteIMG, (320,360), 5, (0,0,0),10)
#cv2.line(whiteIMG, (320,360),(tranX+25,tranY+25), (0,0,255),5)
#whiteIMG = cv2.flip(whiteIMG,1)
#distirng = "distance: " + "{}".format(round(Pr[2]/300,2))
#cv2.putText(whiteIMG,distirng, (20,80), cv2.FONT_HERSHEY_SIMPLEX, 1, 255)
#cv2.imshow('world', whiteIMG)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
print("Done!")
break
Thanks.