Multi-object tracker tx1

I’ve succeeded on my tx1 board

but it’s too slow compared to the demonstration video
Is the difference in speed due to board specifications?
Is there any way to speed up?

Or is there another multi-object tracker that is suitable for tx1?

You may add timings in order to find out where is the bottleneck.

I have not tried, but my guess would be that reading jpeg input files with skimage using ffmpeg is not efficient on Jetson.
So you may try a gstreamer pipeline instead with hw decoding: multifilesrc -> jpegparse -> nvv4l2decoder -> nvvidconv to BGRx -> videoconvert to BGR -> appsink. You may use this pipeline as opencv videocapture.

Be also sure to boost your jetson:

sudo nvpmodel -m0
sudo jetson_clocks

Update:
I took some time to test your code and adapt to opencv (main.py below).
I’ve used opencv4.2 (pre 4.3.0). You would need an opencv build with gstreamer support installed and available from python (uncomment #cv2.getBuildInformation() to be sure).

As input, it uses a gstreamer pipeline in order to read the AVI file TownCentreXVID.avi and provide frames in BGR format, with decodebin that should use nvv4l2decoder for HW(NVDEC) H264 decoding and nvvidconv for NV12 to BGRx HW(ISP) conversion.

For saving, it also uses videoWriter with a gstreamer pipeline for HW(NVENC) H264 encoding and saves the mp4 file into output directory.

It works fine @30 fps on my Xavier R32.3.1 with Kalman detectors tracking about 50 objects in a few ms (0-20), but using dlib is terribly slow…70 tracked objects lead to about 10s… May be my python2 install is broken, or you have something to dig into from your side.

main.py
import numpy as np
import cv2
import sys
import os.path
import time
import argparse

from sort import Sort
from detector import GroundTruthDetections

def main():
    #print(cv2.getBuildInformation())
    args = parse_args()
    display = args.display
    use_dlibTracker  = args.use_dlibTracker
    saver = args.saver

    total_time = 0.0
    total_frames = 0

    # Prepare colors for display
    if (display or saver):
        colours = np.zeros((32, 3), dtype=np.int)  
        for colIdx in range(0,31):
            colours[colIdx] = np.random.random_integers(0,255,3)
            #print('colour %d: (%d, %d, %d)' % (colIdx, col[0], col[1], col[2]) )

    if not os.path.exists('output'):
        os.makedirs('output')
    out_file = 'output/townCentreOut.top'

    #init detector
    detector = GroundTruthDetections()

    #init tracker
    tracker =  Sort(use_dlib= use_dlibTracker) #create instance of the SORT tracker

    if use_dlibTracker:
        print ("Dlib Correlation tracker activated!")
    else:
        print ("Kalman tracker activated!")

    cap = cv2.VideoCapture("filesrc location=TownCentreXVID.avi ! decodebin ! nvvidconv ! video/x-raw, format=BGRx ! videoconvert ! video/x-raw, format=BGR ! appsink", cv2.CAP_GSTREAMER)
    if (not cap.isOpened()):
        print('Failed to open camera')
        sys.exit(-1)
    cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    cap_fps = cap.get(cv2.CAP_PROP_FPS)
    print('Input video %d x %d @ %d fps' % (cap_width, cap_height, cap_fps) )

    if(saver):
        gst_saver = 'appsrc ! video/x-raw, format=BGR, width=%d, height=%d, framerate=%d/1 ! queue ! videoconvert ! video/x-raw, format=BGRx ! nvvidconv ! video/x-raw(memory:NVMM), format=NV12 ! nvv4l2h264enc ! h264parse ! mp4mux ! filesink location=output/test_out.mp4 ' % (cap_width, cap_height, cap_fps)
        writer = cv2.VideoWriter(gst_saver, cv2.CAP_GSTREAMER, 0, cap_fps/1.0, (int(cap_width), int(cap_height)) )
        if (not writer.isOpened()):
        	print('Failed to open writer')
        	sys.exit(-2)

    with open(out_file, 'w') as f_out:
        frames = detector.get_total_frames()
        for frame in range(0, frames):  #frame numbers begin at 0!
            # get detections
            detections = detector.get_detected_items(frame)

            #read new frame
            ret, img = cap.read()
            total_frames +=1

            start_time = time.time()

            #update tracker
            trackers = tracker.update(detections,img)

            cycle_time = time.time() - start_time
            total_time += cycle_time

            print('frame: %d...took: %3fs'%(frame,cycle_time))
            print('tracked %d objects' % (trackers.size))
            for d in trackers:
                f_out.write('%d,%d,%d,%d,x,x,x,x,%.3f,%.3f,%.3f,%.3f\n' % (d[4], frame, 1, 1, d[0], d[1], d[2], d[3]))
               
                if (display or saver):
                    # add bbox and label onto img
                    d = d.astype(np.int32)
                    # set current tracker color
                    tc = colours[d[4] % 32, :]
                    # set pensize
                    ps = 2
                    # draw bounding box
                    cv2.rectangle(img, (d[0], d[1]), (d[2], d[3]), tc, ps)
                    #label
                    cv2.putText(img, 'id = %d' % (d[4]), (d[0], d[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, tc, ps, cv2.LINE_AA)
                    if detections != []:#detector is active in this frame
                	cv2.putText(img, ' DETECTOR', (5, 45), cv2.FONT_HERSHEY_SIMPLEX, 1, tc, ps, cv2.LINE_AA)

            if (display):
                # show the frame with tracking boxes and labels
                if(use_dlibTracker):
                    cv2.imshow('Dlib Correlation Tracker', img)
                else:
                    cv2.imshow('Kalman Tracker', img)
                cv2.waitKey(1)

            if(saver):
                 # push the labeled frame
                 writer.write(img)

    print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))

def parse_args():
    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='Experimenting Trackers with SORT')
    parser.add_argument('--NoDisplay', dest='display', help='Disables online display of tracker output (slow)',action='store_false')
    parser.add_argument('--dlib', dest='use_dlibTracker', help='Use dlib correlation tracker instead of kalman tracker',action='store_true')
    parser.add_argument('--save', dest='saver', help='Saves frames with tracking output, not used if --NoDisplay',action='store_true')

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    main()

i’ve tried your code on my board
but this error comes out

Kalman tracker activated!
OpenCV Error: Unspecified error (GStreamer: unable to start pipeline
) in cvCaptureFromCAM_GStreamer, file /home/nvidia/opencv/opencv-3.4.0/modules/videoio/src/cap_gstreamer.cpp, line 890
VIDEOIO(cvCreateCapture_GStreamer (CV_CAP_GSTREAMER_FILE, filename)): raised OpenCV exception:

/home/nvidia/opencv/opencv-3.4.0/modules/videoio/src/cap_gstreamer.cpp:890: error: (-2) GStreamer: unable to start pipeline
in function cvCaptureFromCAM_GStreamer

Failed to open camera

Do you have file TownCentreXVID.avi in current directory ?