Issue while giving data to eye gazing model

Hi @Morganh

I am using 2080TI GPU.

I am trying to write script for eye gazing model.

I was able to get the result from facial landmark model (facial landmarks, leye image,reye image) but I am unable to understand that how to pass these info to the eyegazing model. Please help me out in the scripting that how to preprocess images and facial landmarks for eye gazing model output.

FacialLandmark script.

import os
import time

import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil
from collections import OrderedDict

trt_engine_path = "../models/faciallandmark/faciallandmarks.etlt_b4_gpu0_int8.engine"

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
#def allocate_buffers(engine, batch_size=-1):
def allocate_buffers(engine, batch_size=-1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream



def do_model_2_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # NPR input shape
    input_shape = (1,1,80,80)
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context


inputs_model_2, outputs_model_2, bindings_model_2, stream_model_2, context_model_2 = model_loading(trt_engine_path)



def _landmark_minimax_boxes(landmarks, r):
    landmarks = landmarks[r[0]:r[1]]
    x0,y0 = np.min(landmarks, axis=0)
    x1,y1 = np.max(landmarks, axis=0)
    return [x0,y0,x1,y1]


def leye_boxes(landmarks):
    return _landmark_minimax_boxes(landmarks, (42, 48))


def reye_boxes(landmarks):
    return _landmark_minimax_boxes(landmarks, (36, 42))





orig_img = cv2.imread("./images/crop_face1.jpg")
orig_img_resize = cv2.resize(orig_img, (80, 80))

image = cv2.cvtColor(orig_img_resize, cv2.COLOR_BGR2GRAY)


image = [image]

# image = np.array([(cv2.resize(img, ( 80 , 80 )))/ 255.0 for img in image], dtype=np.float32)

image=np.array(image,dtype=np.float32)
np.copyto(inputs_model_2[0].host, image.ravel())
outputs = do_model_2_inference(context_model_2, bindings=bindings_model_2, inputs=inputs_model_2, outputs=outputs_model_2, stream=stream_model_2)

face_landmark_orig = (outputs[1].astype(int)).tolist()
# face_landmark = np.array([face_landmark_orig[x:x+2] for x in range(0, len(outputs[1]), 2)])
face_landmark = [face_landmark_orig[x:x+2] for x in range(0, len(outputs[1]), 2)]

print("face_landmark:",face_landmark)

print("outputs : ",len(outputs[1]))
for i in range(0,len(outputs[1]),2):
    cv2.circle(orig_img_resize, (int(outputs[1][i]), int(outputs[1][i+1])), 1, (0, 0, 255 ), 1 )


leye_cord = leye_boxes(face_landmark)
reye_cord = reye_boxes(face_landmark)

cv2.rectangle(orig_img_resize, (leye_cord[0], leye_cord[1]), (leye_cord[2], leye_cord[3]), (255,0,0), 1)
cv2.rectangle(orig_img_resize, (reye_cord[0], reye_cord[1]), (reye_cord[2], reye_cord[3]), (255,0,0), 1)

leye_image = orig_img_resize[leye_cord[1]:leye_cord[3],leye_cord[0]:leye_cord[2]]
reye_image = orig_img_resize[reye_cord[1]:reye_cord[3],reye_cord[0]:reye_cord[2]]


cv2.imwrite('face_landmark.jpg',orig_img_resize)

OutputImage:

EyeGazing Script:

I would like to use eye_gazing function from facial_landmark and want to get output points returned by eye gazing model.

import os
import time

import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil
from collections import OrderedDict

trt_engine_path = "../models/gazenet/gazenet_facegrid.etlt_b8_gpu0_fp16.engine"

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def load_engine(trt_runtime, engine_path):
    with open(engine_path, "rb") as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
#def allocate_buffers(engine, batch_size=-1):
def allocate_buffers(engine, batch_size=1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        # pdb.set_trace()
        size = trt.volume(engine.get_binding_shape(binding)) * batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
            # print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
    return inputs, outputs, bindings, stream



def do_model_2_inference(context, bindings, inputs, outputs, stream, batch_size=-1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(
        batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
    )
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]

def model_loading(trt_engine_path):
    # TensorRT logger singleton
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    # trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"

    trt_runtime = trt.Runtime(TRT_LOGGER)
    # pdb.set_trace()
    trt_engine = load_engine(trt_runtime, trt_engine_path)
    # Execution context is needed for inference
    context = trt_engine.create_execution_context()
    # NPR input shape
    input_shape = (1,3,48,96)
    context.set_binding_shape(0, input_shape)
    # This allocates memory for network inputs/outputs on both CPU and GPU
    inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
    return inputs, outputs, bindings, stream, context


inputs_model_2, outputs_model_2, bindings_model_2, stream_model_2, context_model_2 = model_loading(trt_engine_path)

print("inputs_model_2 : ",inputs_model_2)

def eye_gazing(face_image,leye_image,reye_image,face_landmark):

    return eye_gazing_output



Please help me out. What change I can do to get the result.

Please run gazenet via deepstream_tao_apps/apps/tao_others/deepstream-gaze-app at master · NVIDIA-AI-IOT/deepstream_tao_apps · GitHub.

Hi @Morganh

I had also run the C code implemented in DS and I am able to get gaze vector but I want make some customization therefore I want it in Python.

Please help me in this.

Currently, it is not supported for python version.

So we can not perform inference on model using custom python script using tensorRT?

Currently, it can support running .etlt model or tensort engine via deepstream_tao_apps/apps/tao_others/deepstream-gaze-app at master · NVIDIA-AI-IOT/deepstream_tao_apps · GitHub.