Need suggestion for Multiprocessing using threading and event

Dear,

Please give some advice for multiprocessing with GPU.
I am trying python multiprocessing and event for Camera reading purpose.
But I have poor performance when I called GPU related things at RunSession.

  • 3 EVENT
  • 3 THREAD
    • RUN session
    • Read Camera
    • Write to Display

Please let me know why I have poor performance.

from threading import Thread
from threading import Event

TIMEOUT_VGET = 0.400
TIMEOUT_SRUN = 0.400
TIMEOUT_VSHOW = 0.400

ev_vg = Event()
ev_sr = Event()
ev_vs = Event()


class RunSession:
    """
    Class that continuously shows a frame using a dedicated thread.
    """

    def __init__(self, index=None):
        self.index = index
        self.stopped = False
        self.rs2_index = 0
        self.rs2_skipindex = 0
        
    def start(self):
        Thread(target=self.show, args=()).start()
        return self

    def show(self):
        while not self.stopped:
            ev_sr.wait(timeout=TIMEOUT_SRUN)
            if ev_sr.is_set() != True:
                self.rs2_skipindex += 1
            else:
                ev_sr.clear()
                # some operations
                self.t_start = time.time()
                # some GPU operation here
                self.t_end = time.time()
                self.t_run = (self.t_end - self.t_start)*1000
                self.rs2_index += 1
                g_frame = g_frame

            #common
            cv2.waitKey(1) # check
            ev_vs.set()
            
    def stop(self):
        self.stopped = True

    def SetEvent(self):
        ev_sr.set()

    def ClearEvent(self):
        ev_sr.clear()
        
        
class VideoGet:
    """
    Class that continuously gets frames from a VideoCapture object
    with a dedicated thread.
    """

    def __init__(self, src=0):
        self.stream = cv2.VideoCapture(src)
        (self.grabbed, self.frame) = self.stream.read()
        self.stopped = False
        self.vg2_index = 0
        self.vg2_skipindex = 0

    def start(self):    
        Thread(target=self.get, args=()).start()
        return self

    def get(self):
        while not self.stopped:
            ev_vg.wait(timeout = TIMEOUT_VGET )
            self.t_start = time.time()
            if ev_vg.is_set() != True:
                self.vg2_skipindex += 1
            else:
                ev_vg.clear()
                if not self.grabbed:
                    self.stop()
                else:
                    (self.grabbed, self.frame) = self.stream.read()
                    g_frame = self.frame
                self.vg2_index += 1

            #common
            cv2.waitKey(1) # some sleep
            ev_sr.set()
            self.t_end = time.time()
            self.t_run = (self.t_end - self.t_start)*1000
            
    def stop(self):
        self.stopped = True

    def startGet(self):
        ev_vg.set()

    def SetEvent(self):
        ev_vg.set()

    def ClearEvent(self):
        ev_vg.clear()


class VideoShow2:
    """
    Class that continuously shows a frame using a dedicated thread.
    """

    def __init__(self, frame=None):
        self.frame = g_frame
        self.stopped = False
        self.vs2_index = 0
        self.vs2_skipindex = 0
        
    def start(self):
        Thread(target=self.show, args=()).start()
        return self

    def show(self):
        while not self.stopped:
            ev_vs.wait(timeout=TIMEOUT_VSHOW)
            self.frame = g_frame

            self.t_start = time.time()
            if ev_vs.is_set() != True:
                self.vs2_skipindex += 1
            else:
                ev_vs.clear()
                cv2.imshow("Video", self.frame)
                if cv2.waitKey(1) == ord("q"):
                    self.stopped = True
                self.vs2_index += 1
                logging.info(">>>> VS2INDEX= {}".format(self.vs2_index ))

            #common
            cv2.waitKey(1) # some sleep
            ev_vg.set()
            self.t_end = time.time()
            self.t_run = (self.t_end - self.t_start)*1000
            log_DBG_VSHOW_time.append(self.t_run)

    def stop(self):
        self.stopped = True

    def SetEvent(self):
        ev_vs.set()

    def ClearEvent(self):
        ev_vs.clear()


def threadBoth(source=0):
    """
    Dedicated thread for grabbing video frames with VideoGet object.
    Dedicated thread for showing video frames with VideoShow object.
    Main thread serves only to pass frames between VideoGet and
    VideoShow objects/threads.
    """

    ev_vg.set()
    ev_sr.set()
    ev_vs.set()
    
    ev_vg.clear()
    ev_sr.clear()
    ev_vs.clear()
    
    video_getter = VideoGet2(source).start()
    g_frame = video_getter.frame

    video_shower = VideoShow2(video_getter.frame).start()
    cps = CountsPerSec().start()

    video_getter.SetEvent()

    index = 0
    while True:
        if video_getter.stopped or video_shower.stopped:
            video_getter.stop()
            session_runner.stop()
            video_shower.stop()
            break

        cv2.waitKey(1) # some sleep

    #FOR EXCEPTION (to make sure )
    video_getter.stop()
    session_runner.stop()
    video_shower.stop()

Hi,
We suggest you use tegra_multimedia_api + OpenCV GpuMat. You may refer to
https://devtalk.nvidia.com/default/topic/1047563/jetson-tx2/libargus-eglstream-to-nvivafilter/post/5319890/#5319890

09_camera_jpeg_capture is for Bayer sensors like default camera ov5693. If your source is v4l2 source, you may refer to 12_camera_v4l2_cuda and integrate the patch.