Parallel inference

I put this script together from others in the internet. When I run one inference it runs at 10 fps 20% GPU. When I run 2 it runs at 5 fps 20% GPU. How can I make the GPU give more performance like in multiprocessing.?

import multiprocessing
import time
import cv2
import numpy as np

In screen FPS counter

timeStamp=time.time()
fpsFilt=0

code constants

th1=5
th2=100
frame_counter = 0
font=cv2.FONT_HERSHEY_SIMPLEX
reverse = 2560
whT = 320
confThreshold =0.5
nmsThreshold= 0.2

imgerror = cv2.imread(‘/home/kc/Downloads/darknet/C.jpg’)

def Cam1(to_AI1,): # 1 camera loop
timeStamp=time.time()
fpsFilt=0

dispW=1280
dispH=720

cap5=cv2.VideoCapture('/dev/v4l/by-path/pci-0000:05:00.0-usb-0:2.3.3:1.0-video-index0')
cap5.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
cap5.set(cv2.CAP_PROP_FRAME_WIDTH, dispW)
cap5.set(cv2.CAP_PROP_FRAME_HEIGHT, dispH)
while True:
    success, img = cap5.read()

    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps cam',(0,70),font,1,(0,100,255),2)

    if to_AI1.empty():
        to_AI1.put(img)

    if cv2.waitKey(1)==ord('q'):
        break
cap5.release()

def Cam2(to_AI2,): # 1 camera loop
timeStamp=time.time()
fpsFilt=0

dispW=1280
dispH=720

cap5=cv2.VideoCapture('/dev/v4l/by-path/pci-0000:05:00.0-usb-0:2.4:1.0-video-index0')
cap5.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
cap5.set(cv2.CAP_PROP_FRAME_WIDTH, dispW)
cap5.set(cv2.CAP_PROP_FRAME_HEIGHT, dispH)
while True:
    success, img = cap5.read()

    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps cam',(0,70),font,1,(0,100,255),2)

    if to_AI2.empty():
        to_AI2.put(img)

    if cv2.waitKey(1)==ord('q'):
        break
cap5.release()

def inference1_AI(to_AI1,): # inference from can1
global fpsFilt
global timeStamp
global frame_counter

classesFile = "/home/kc/Downloads/darknet/data/coco.names"
classNames = []
with open(classesFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')
print(classNames)

modelConfiguration = "/home/kc/Downloads/darknet/yolov4.cfg"
modelWeights = "/home/kc/Downloads/darknet/yolov4.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

while True:

    if not to_AI1.empty():
        img=to_AI1.get()

        try:
            while 1 not in img:
                img = imgerror
        except Exception:
            img = imgerror
            pass
    else:
        continue
    
    blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
    net.setInput(blob)
    layersNames = net.getLayerNames()
    outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
    outputs = net.forward(outputNames)
    

    hT, wT, cT = img.shape
    bbox = []
    classIds = []
    confs = []
    for output in outputs:
        for det in output:
            scores = det[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                w,h = int(det[2]*wT) , int(det[3]*hT)
                x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
                bbox.append([x,y,w,h])
                classIds.append(classId)
                confs.append(float(confidence))

    indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
  
    for i in indices:
        box = bbox[i]
        x, y, w, h = box[0], box[1], box[2], box[3]

        cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
        (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
            
    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps AI',(0,30),font,1,(0,0,255),2)
    cv2.imshow('inference1',img)
    
    if cv2.waitKey(1)==ord('q'):
        break
cv2.destroyAllWindows()
print('after brake')

def inference2_AI(to_AI2,): # inference from can1

global fpsFilt
global timeStamp
global frame_counter

classesFile = "/home/kc/Downloads/darknet/data/coco.names"
classNames = []
with open(classesFile, 'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')
print(classNames)

modelConfiguration = "/home/kc/Downloads/darknet/yolov4.cfg"
modelWeights = "/home/kc/Downloads/darknet/yolov4.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

while True:

    if not to_AI2.empty():
        img=to_AI2.get()

        try:
            while 1 not in img:
                img = imgerror
        except Exception:
            img = imgerror
            pass
    else:
        continue
    
    blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
    net.setInput(blob)
    layersNames = net.getLayerNames()
    outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
    outputs = net.forward(outputNames)
    
    hT, wT, cT = img.shape
    bbox = []
    classIds = []
    confs = []
    for output in outputs:
        for det in output:
            scores = det[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                w,h = int(det[2]*wT) , int(det[3]*hT)
                x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
                bbox.append([x,y,w,h])
                classIds.append(classId)
                confs.append(float(confidence))

    indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
   
    for i in indices:
 
        box = bbox[i]
        x, y, w, h = box[0], box[1], box[2], box[3]

        cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
        cv2.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
        (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)

    dt=time.time()-timeStamp
    timeStamp=time.time()
    fps=1/dt
    fpsFilt=.9*fpsFilt + .1*fps
    cv2.putText(img,str(round(fpsFilt,1))+' fps AI',(0,30),font,1,(0,0,255),2)
    cv2.imshow('inference2',img)

    if cv2.waitKey(1)==ord('q'):
        break

cv2.destroyAllWindows()
print('after brake')

to_AI1 = multiprocessing.Queue()
to_AI2 = multiprocessing.Queue()

turret contol AI

t1Cam = multiprocessing.Process(target=Cam1, args=(to_AI1,))
t2Cam = multiprocessing.Process(target=Cam2, args=(to_AI2,))

turret contol AI

t1AI = multiprocessing.Process(target=inference1_AI, args=(to_AI1,))
t2AI = multiprocessing.Process(target=inference2_AI, args=(to_AI2,))

t1Cam.start() # cam start
time.sleep(.3)

t2Cam.start() # cam start
time.sleep(.3)

t1AI.start() # AI start
t2AI.start() # AI start

Hi,
Please refer to below links related custom plugin implementation and sample:

While IPluginV2 and IPluginV2Ext interfaces are still supported for backward compatibility with TensorRT 5.1 and 6.0.x respectively, however, we recommend that you write new plugins or refactor existing ones to target the IPluginV2DynamicExt or IPluginV2IOExt interfaces instead.

Thanks!