Parallel inference

cespedesk · November 27, 2022, 2:29am

I put this script together from others in the internet. When I run one inference it runs at 10 fps 20% GPU. When I run 2 it runs at 5 fps 20% GPU. How can I make the GPU give more performance like in multiprocessing.?

import multiprocessing
import time
import cv2
import numpy as np

In screen FPS counter

timeStamp=time.time()
fpsFilt=0

code constants

th1=5
th2=100
frame_counter = 0
font=cv2.FONT_HERSHEY_SIMPLEX
reverse = 2560
whT = 320
confThreshold =0.5
nmsThreshold= 0.2

imgerror = cv2.imread(‘/home/kc/Downloads/darknet/C.jpg’)

def Cam1(to_AI1,): # 1 camera loop
timeStamp=time.time()
fpsFilt=0

dispW=1280
dispH=720

cap5=cv2.VideoCapture('/dev/v4l/by-path/pci-0000:05:00.0-usb-0:2.3.3:1.0-video-index0')
cap5.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
cap5.set(cv2.CAP_PROP_FRAME_WIDTH, dispW)
cap5.set(cv2.CAP_PROP_FRAME_HEIGHT, dispH)
while True:
    success, img = cap5.read()

    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps cam',(0,70),font,1,(0,100,255),2)

    if to_AI1.empty():
        to_AI1.put(img)

    if cv2.waitKey(1)==ord('q'):
        break
cap5.release()

def Cam2(to_AI2,): # 1 camera loop
timeStamp=time.time()
fpsFilt=0

dispW=1280
dispH=720

cap5=cv2.VideoCapture('/dev/v4l/by-path/pci-0000:05:00.0-usb-0:2.4:1.0-video-index0')
cap5.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
cap5.set(cv2.CAP_PROP_FRAME_WIDTH, dispW)
cap5.set(cv2.CAP_PROP_FRAME_HEIGHT, dispH)
while True:
    success, img = cap5.read()

    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps cam',(0,70),font,1,(0,100,255),2)

    if to_AI2.empty():
        to_AI2.put(img)

    if cv2.waitKey(1)==ord('q'):
        break
cap5.release()

def inference1_AI(to_AI1,): # inference from can1
global fpsFilt
global timeStamp
global frame_counter

classesFile = "/home/kc/Downloads/darknet/data/coco.names"
classNames = []
with open(classesFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')
print(classNames)

modelConfiguration = "/home/kc/Downloads/darknet/yolov4.cfg"
modelWeights = "/home/kc/Downloads/darknet/yolov4.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

while True:

    if not to_AI1.empty():
        img=to_AI1.get()

        try:
            while 1 not in img:
                img = imgerror
        except Exception:
            img = imgerror
            pass
    else:
        continue
    
    blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
    net.setInput(blob)
    layersNames = net.getLayerNames()
    outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
    outputs = net.forward(outputNames)
    

    hT, wT, cT = img.shape
    bbox = []
    classIds = []
    confs = []
    for output in outputs:
        for det in output:
            scores = det[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                w,h = int(det[2]*wT) , int(det[3]*hT)
                x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
                bbox.append([x,y,w,h])
                classIds.append(classId)
                confs.append(float(confidence))

    indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
  
    for i in indices:
        box = bbox[i]
        x, y, w, h = box[0], box[1], box[2], box[3]

        cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
        (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
            
    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps AI',(0,30),font,1,(0,0,255),2)
    cv2.imshow('inference1',img)
    
    if cv2.waitKey(1)==ord('q'):
        break
cv2.destroyAllWindows()
print('after brake')

def inference2_AI(to_AI2,): # inference from can1

global fpsFilt
global timeStamp
global frame_counter

classesFile = "/home/kc/Downloads/darknet/data/coco.names"
classNames = []
with open(classesFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')
print(classNames)

modelConfiguration = "/home/kc/Downloads/darknet/yolov4.cfg"
modelWeights = "/home/kc/Downloads/darknet/yolov4.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

while True:

    if not to_AI2.empty():
        img=to_AI2.get()

        try:
            while 1 not in img:
                img = imgerror
        except Exception:
            img = imgerror
            pass
    else:
        continue
    
    blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
    net.setInput(blob)
    layersNames = net.getLayerNames()
    outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
    outputs = net.forward(outputNames)
    
    hT, wT, cT = img.shape
    bbox = []
    classIds = []
    confs = []
    for output in outputs:
        for det in output:
            scores = det[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                w,h = int(det[2]*wT) , int(det[3]*hT)
                x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
                bbox.append([x,y,w,h])
                classIds.append(classId)
                confs.append(float(confidence))

    indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
   
    for i in indices:
 
        box = bbox[i]
        x, y, w, h = box[0], box[1], box[2], box[3]

        cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
        (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)

    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps AI',(0,30),font,1,(0,0,255),2)
    cv2.imshow('inference2',img)

    if cv2.waitKey(1)==ord('q'):
        break

cv2.destroyAllWindows()
print('after brake')

to_AI1 = multiprocessing.Queue()
to_AI2 = multiprocessing.Queue()

turret contol AI

t1Cam = multiprocessing.Process(target=Cam1, args=(to_AI1,))
t2Cam = multiprocessing.Process(target=Cam2, args=(to_AI2,))

turret contol AI

t1AI = multiprocessing.Process(target=inference1_AI, args=(to_AI1,))
t2AI = multiprocessing.Process(target=inference2_AI, args=(to_AI2,))

t1Cam.start() # cam start
time.sleep(.3)

t2Cam.start() # cam start
time.sleep(.3)

t1AI.start() # AI start
t2AI.start() # AI start

NVES · November 27, 2022, 2:37am

Hi,
Please refer to below links related custom plugin implementation and sample:

While IPluginV2 and IPluginV2Ext interfaces are still supported for backward compatibility with TensorRT 5.1 and 6.0.x respectively, however, we recommend that you write new plugins or refactor existing ones to target the IPluginV2DynamicExt or IPluginV2IOExt interfaces instead.

Thanks!

Topic		Replies	Views
Multiple threads running inference are causing a slowdown TensorRT jetson , deepstream	1	886	August 1, 2023
Parallel execution of several trt contexts on one GPU TensorRT onnx	1	1458	August 7, 2023
Running two models in multiple models increases the FPS TensorRT tensorrt , cuda , python	1	1501	October 28, 2020
Running Multiple TensorRT Engines On Jetson AGX Jetson AGX Xavier tensorrt , yolo	2	2582	August 29, 2021
Separate GPU for Parallel on Jeton AgxOrin Jetson AGX Xavier gpu	19	261	August 28, 2024
Optimal Trt inference using threads/processes for peoplenet model for Triton Inference Server (archived) tensorrt , inference-server-triton , a100	1	1208	July 30, 2021
Capable of running multiple Inference concurrently on the same GPU using TensorRT? TensorRT	0	911	March 27, 2019
Is it possible to run multiple TensorRT model inference on a GPU simultaneously and parallelly? TensorRT tensorrt , cuda	3	2221	August 23, 2022
Tensorrt multiple process TensorRT tensorrt	2	1726	February 21, 2024
TensorRT multi stream TensorRT	3	2950	February 29, 2024

Parallel inference

In screen FPS counter

code constants

turret contol AI

turret contol AI

Related topics