He Guys, I’m a graduate student Industrial Design and I’m currently using the Jetson TX2 to integrate face detect and facial landmark detection in one of my prototypes. I got it working right now using the developers kit camera that comes with the TX2. However my problem is that I can only reach a framerate of about 4fps, while on my laptop (32gb ram, 4gb gpu, I know not a fair comparison) I get (what feels like) real time detection. I’m using Python, Opencv, Dlib and some more packages to get it all working. However I’m not a diehard programmer and therefore I have actually no idea if I am using the Jetson TX2 to it’s fullest potential. So my question is how can I check this? Are there ways to get this code working faster? Any advice in general? You can find my code below:
# the needed libraries/packages
from imutils.video import WebcamVideoStream
from imutils import face_utils
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import dlib
import cv2
# Put arguments that you want to parse here
ap = argparse.ArgumentParser()
ap.add_argument("-s","--shape-predictor",default="./shape_predictor_5_face_landmarks.dat",help="path to facial landmark predictor")
ap.add_argument("-p", "--prototxt", default="./deploy.prototxt.txt",help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", default="./res10_300x300_ssd_iter_140000.caffemodel", help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2, help="minimum probability to filter weak detections")
ap.add_argument("-b","--blurtreshold",type=float,default=100.0,help="The sharpnes treshold")
ap.add_argument("-t", "--sizetreshold",type=float,default=0.4,help="The amount of pixels the face takes in percentage of total video resolution")
ap.add_argument("-w","--width",type=int,default=1920,help="width of video input")
ap.add_argument("-l","--height",type=int,default=1080,help="height of video input")
args = vars(ap.parse_args())
# load AI models here
print("[INFO] loading models...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) #find faces using Deep Neural Net
predictor = dlib.shape_predictor(args["shape_predictor"]) #Find face features using dlib
#starting up the camera
gst_str = ("nvcamerasrc ! "
"video/x-raw(memory:NVMM), width=(int)2592, height=(int)1458, format=(string)I420, framerate=(fraction)30/1 ! "
"nvvidconv ! video/x-raw, width=(int){}, height=(int){}, format=(string)BGRx ! "
"videoconvert ! appsink").format(args["width"], args["height"])
vs = cv2.VideoCapture(gst_str,cv2.CAP_GSTREAMER)
fps = FPS().start()
time.sleep(0.5)
#Using the Laplacian Variance the sharpness of the image is checked.
def variance_of_laplacian(image):
return cv2.Laplacian(frame,cv2.CV_64F).var()
def create_eye_rect(x1,y1,x2,y2,name):
faceHeight = endY-startY
eyeHeight = faceHeight/8
region = cv2.rectangle(frame,(x1,y1-int(eyeHeight/2)),(x2,y2+int(eyeHeight/2)),(0,255,0),2)
roi = frame[y1-int(eyeHeight/2):y2+int(eyeHeight/2),x1:x2]
sharpnessFactor = variance_of_laplacian(roi)
print(name, "has variance of laplacian of:", sharpnessFactor)
def find_center_of_eye(eye,name):
x1,y1 = eye[0]
x2,y2 = eye[1]
x3 = int(x1+((x2-x1)/2))
y3 = int(y1+((y2-y1)/2))
create_eye_rect(int(x1),int(y1),int(x2),int(y2),name)
return cv2.circle(frame,(x3,y3),4,(255,0,255),-1)
# loop over the frames from the video stream
while True:
ret_val, frame = vs.read() #use on windows
frame = imutils.resize(frame,width=400)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,(300, 300), (104.0, 177.0, 123.0))
net.setInput(blob)
detections = net.forward()
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence < args["confidence"]:
continue
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
dlib_rect = dlib.rectangle(int(startX), int(startY), int(endX), int(endY))
text1 = "DNN confidence = {:.2f}%".format(confidence * 100)
text2 = "68 Facial Landmarks model enabled"
text3 = "No frontal face recognized"
cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 0, 255), 2)
cv2.rectangle(frame, (0, 0), (680, 30),(255, 255, 255),-1)
cv2.putText(frame, text1, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 0), 1)
if ((endY-startY)/h) > args["sizetreshold"]:
shape = predictor(gray,dlib_rect)
shape = face_utils.shape_to_np(shape)
for (x,y) in shape:
cv2.circle(frame,(x,y),2,(255,255,255),1)
eye_one = shape[0:2]
eye_two = shape[2:4]
nose = shape[4:]
find_center_of_eye(eye_one,"sharpness eye one")
find_center_of_eye(eye_two,"sharpness eye two")
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
fps.update()
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
fps.stop()
print("[INFO] approx.FPS: {:.2f}".format(fps.fps()))
cv2.destroyAllWindows()
vs.stop()