Multi-object tracker tx1

Update:
I took some time to test your code and adapt to opencv (main.py below).
I’ve used opencv4.2 (pre 4.3.0). You would need an opencv build with gstreamer support installed and available from python (uncomment #cv2.getBuildInformation() to be sure).

As input, it uses a gstreamer pipeline in order to read the AVI file TownCentreXVID.avi and provide frames in BGR format, with decodebin that should use nvv4l2decoder for HW(NVDEC) H264 decoding and nvvidconv for NV12 to BGRx HW(ISP) conversion.

For saving, it also uses videoWriter with a gstreamer pipeline for HW(NVENC) H264 encoding and saves the mp4 file into output directory.

It works fine @30 fps on my Xavier R32.3.1 with Kalman detectors tracking about 50 objects in a few ms (0-20), but using dlib is terribly slow…70 tracked objects lead to about 10s… May be my python2 install is broken, or you have something to dig into from your side.

main.py
import numpy as np
import cv2
import sys
import os.path
import time
import argparse

from sort import Sort
from detector import GroundTruthDetections

def main():
    #print(cv2.getBuildInformation())
    args = parse_args()
    display = args.display
    use_dlibTracker  = args.use_dlibTracker
    saver = args.saver

    total_time = 0.0
    total_frames = 0

    # Prepare colors for display
    if (display or saver):
        colours = np.zeros((32, 3), dtype=np.int)  
        for colIdx in range(0,31):
            colours[colIdx] = np.random.random_integers(0,255,3)
            #print('colour %d: (%d, %d, %d)' % (colIdx, col[0], col[1], col[2]) )

    if not os.path.exists('output'):
        os.makedirs('output')
    out_file = 'output/townCentreOut.top'

    #init detector
    detector = GroundTruthDetections()

    #init tracker
    tracker =  Sort(use_dlib= use_dlibTracker) #create instance of the SORT tracker

    if use_dlibTracker:
        print ("Dlib Correlation tracker activated!")
    else:
        print ("Kalman tracker activated!")

    cap = cv2.VideoCapture("filesrc location=TownCentreXVID.avi ! decodebin ! nvvidconv ! video/x-raw, format=BGRx ! videoconvert ! video/x-raw, format=BGR ! appsink", cv2.CAP_GSTREAMER)
    if (not cap.isOpened()):
        print('Failed to open camera')
        sys.exit(-1)
    cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    cap_fps = cap.get(cv2.CAP_PROP_FPS)
    print('Input video %d x %d @ %d fps' % (cap_width, cap_height, cap_fps) )

    if(saver):
        gst_saver = 'appsrc ! video/x-raw, format=BGR, width=%d, height=%d, framerate=%d/1 ! queue ! videoconvert ! video/x-raw, format=BGRx ! nvvidconv ! video/x-raw(memory:NVMM), format=NV12 ! nvv4l2h264enc ! h264parse ! mp4mux ! filesink location=output/test_out.mp4 ' % (cap_width, cap_height, cap_fps)
        writer = cv2.VideoWriter(gst_saver, cv2.CAP_GSTREAMER, 0, cap_fps/1.0, (int(cap_width), int(cap_height)) )
        if (not writer.isOpened()):
        	print('Failed to open writer')
        	sys.exit(-2)

    with open(out_file, 'w') as f_out:
        frames = detector.get_total_frames()
        for frame in range(0, frames):  #frame numbers begin at 0!
            # get detections
            detections = detector.get_detected_items(frame)

            #read new frame
            ret, img = cap.read()
            total_frames +=1

            start_time = time.time()

            #update tracker
            trackers = tracker.update(detections,img)

            cycle_time = time.time() - start_time
            total_time += cycle_time

            print('frame: %d...took: %3fs'%(frame,cycle_time))
            print('tracked %d objects' % (trackers.size))
            for d in trackers:
                f_out.write('%d,%d,%d,%d,x,x,x,x,%.3f,%.3f,%.3f,%.3f\n' % (d[4], frame, 1, 1, d[0], d[1], d[2], d[3]))
               
                if (display or saver):
                    # add bbox and label onto img
                    d = d.astype(np.int32)
                    # set current tracker color
                    tc = colours[d[4] % 32, :]
                    # set pensize
                    ps = 2
                    # draw bounding box
                    cv2.rectangle(img, (d[0], d[1]), (d[2], d[3]), tc, ps)
                    #label
                    cv2.putText(img, 'id = %d' % (d[4]), (d[0], d[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, tc, ps, cv2.LINE_AA)
                    if detections != []:#detector is active in this frame
                	cv2.putText(img, ' DETECTOR', (5, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, tc, ps, cv2.LINE_AA)

            if (display):
                # show the frame with tracking boxes and labels
                if(use_dlibTracker):
                    cv2.imshow('Dlib Correlation Tracker', img)
                else:
                    cv2.imshow('Kalman Tracker', img)
                cv2.waitKey(1)

            if(saver):
                 # push the labeled frame
                 writer.write(img)

    print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))

def parse_args():
    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='Experimenting Trackers with SORT')
    parser.add_argument('--NoDisplay', dest='display', help='Disables online display of tracker output (slow)',action='store_false')
    parser.add_argument('--dlib', dest='use_dlibTracker', help='Use dlib correlation tracker instead of kalman tracker',action='store_true')
    parser.add_argument('--save', dest='saver', help='Saves frames with tracking output, not used if --NoDisplay',action='store_true')

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    main()