Slow FPS on Orin Nano 8 GB - YoloV8

I’m trying to build an application to detect and track people in a region. Below is my code, however I’
m getting 10 FPS even after using TensorRT. How do i make it to realtime with atleast 30 FPS?

# Assuming the YOLO and DeepSort classes are defined elsewhere and properly imported
model = YOLO("yolov8n.engine")
tracker = DeepSort(max_age=10)

height, width = 640, 640

person_tracker = {}
debounce_tracker = {}
DEBOUNCE_PERIOD = timedelta(seconds=2)  # Debounce time of 2 seconds

def write_data_to_file(data, filename):
    try:
        with open(filename, "a") as file:
            file.write(json.dumps(data) + "\n")
    except Exception as e:
        print("Error writing data to file:", e)

def determine_region(cx, cy, width, height):
    c1_width = width // 3
    aisle_height = height // 2

    if cy < aisle_height:
        return "Aisle"
    elif cx < c1_width:
        return "C1"
    elif c1_width <= cx < 2 * c1_width:
        return "C2"
    else:
        return "C3"

cap = cv2.VideoCapture("/home/sai/jetson-inference/build/aarch64/bin/pedestrians.mp4")
my_file = open("coco.txt", "r")
data = my_file.read()
class_list = data.split("\n")
my_file.close()

output_file = "tracking_output.txt"

prev_frame_time = 0
new_frame_time = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    new_frame_time = time.time()

    #frame = cv2.resize(frame, (640, 640))
    results = model.predict(frame, conf=0.8, agnostic_nms=True, iou=0.1)
    
    detections = []
    for row in results[0].boxes.data.cpu():
        x1, y1, x2, y2, _, d = map(int, row)
        c = class_list[d]
        if 'person' in c:
            detections.append(([x1, y1, x2 - x1, y2 - y1], row[4], c))

    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        bbox = track.to_tlbr()
        cx = int((bbox[0] + bbox[2]) / 2)
        cy = int((bbox[1] + bbox[3]) / 2)

        region_name = determine_region(cx, cy, width, height)

        # Debounce logic
        if track_id not in person_tracker:
            event_data = {"PersonID": track_id, "EventType": 1, "EventTime": str(datetime.now()), "LocationID": region_name}
            person_tracker[track_id] = event_data
            debounce_tracker[track_id] = {"LastUpdated": datetime.now(), "LocationID": region_name}
            write_data_to_file(event_data, output_file)  # Writing entry event to file
        else:
            current_region = person_tracker[track_id]["LocationID"]
            last_updated = debounce_tracker[track_id]["LastUpdated"]
            if region_name != current_region and datetime.now() - last_updated > DEBOUNCE_PERIOD:
                # Write exit event for the previous region
                exit_data = {"PersonID": track_id, "EventType": 0, "EventTime": str(datetime.now()), "LocationID": current_region}
                write_data_to_file(exit_data, output_file)
                # Update for the new region entry
                entry_data = {"PersonID": track_id, "EventType": 1, "EventTime": str(datetime.now()), "LocationID": region_name}
                person_tracker[track_id] = entry_data
                debounce_tracker[track_id] = {"LastUpdated": datetime.now(), "LocationID": region_name}
                write_data_to_file(entry_data, output_file)  # Writing entry event to file

        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        cv2.putText(frame, f'Person ID: {track_id}, LocationID: {region_name}', (int(bbox[0]), int(bbox[1]) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Calculate FPS
    fps = 1 / (new_frame_time - prev_frame_time)
    prev_frame_time = new_frame_time

    # Display FPS on frame
    cv2.putText(frame, f'FPS: {fps:.2f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow('Video', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Also I’m attaching the output video here and the last few lines of tegrastats

<pre>01-24-2024 22:08:27 RAM 5525/6481MB (lfb 8x1MB) SWAP 1531/7336MB (cached 2MB) CPU [66%@1510,68%@1510,80%@1510,76%@1510,76%@1510,73%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 15%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.187C SOC2@49.781C SOC0@48.593C CV1@-256C GPU@49.625C tj@51.031C SOC1@48.812C CV2@-256C VDD_IN 7142mW/5787mW VDD_CPU_GPU_CV 2733mW/1684mW VDD_SOC 1785mW/1618mW
01-24-2024 22:08:28 RAM 5514/6481MB (lfb 2x2MB) SWAP 1531/7336MB (cached 2MB) CPU [81%@1510,85%@1510,80%@1510,77%@1510,76%@1510,78%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 64%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.062C SOC2@49.718C SOC0@48.625C CV1@-256C GPU@49.937C tj@51.062C SOC1@48.812C CV2@-256C VDD_IN 7221mW/5791mW VDD_CPU_GPU_CV 2773mW/1687mW VDD_SOC 1785mW/1619mW
01-24-2024 22:08:29 RAM 5515/6481MB (lfb 2x2MB) SWAP 1531/7336MB (cached 2MB) CPU [87%@1510,74%@1510,75%@1510,76%@1510,76%@1510,69%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 48%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.187C SOC2@49.812C SOC0@48.562C CV1@-256C GPU@49.906C tj@51.187C SOC1@48.843C CV2@-256C VDD_IN 7380mW/5795mW VDD_CPU_GPU_CV 2812mW/1690mW VDD_SOC 1785mW/1619mW
01-24-2024 22:08:30 RAM 5517/6481MB (lfb 2x2MB) SWAP 1531/7336MB (cached 2MB) CPU [81%@1510,78%@1510,77%@1510,85%@1510,80%@1510,81%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 33%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.406C SOC2@49.843C SOC0@48.468C CV1@-256C GPU@49.75C tj@51.406C SOC1@49.031C CV2@-256C VDD_IN 7221mW/5799mW VDD_CPU_GPU_CV 2852mW/1693mW VDD_SOC 1785mW/1620mW
01-24-2024 22:08:31 RAM 5521/6481MB (lfb 2x2MB) SWAP 1531/7336MB (cached 2MB) CPU [79%@1510,81%@1510,78%@1510,74%@1510,82%@1510,77%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 5%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.156C SOC2@49.781C SOC0@48.593C CV1@-256C GPU@49.593C tj@51.156C SOC1@49C CV2@-256C VDD_IN 7142mW/5803mW VDD_CPU_GPU_CV 2773mW/1696mW VDD_SOC 1785mW/1620mW
01-24-2024 22:08:32 RAM 5520/6481MB (lfb 2x2MB) SWAP 1531/7336MB (cached 2MB) CPU [71%@1510,64%@1510,81%@1510,66%@1510,74%@1510,74%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 86%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.187C SOC2@49.781C SOC0@48.531C CV1@-256C GPU@49.75C tj@51.187C SOC1@49.031C CV2@-256C VDD_IN 7182mW/5806mW VDD_CPU_GPU_CV 2773mW/1699mW VDD_SOC 1785mW/1621mW
01-24-2024 22:08:33 RAM 5521/6481MB (lfb 2x2MB) SWAP 1531/7336MB (cached 2MB) CPU [82%@1510,73%@1510,76%@1510,75%@1510,76%@1510,72%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 75%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.5C SOC2@49.812C SOC0@48.531C CV1@-256C GPU@49.937C tj@51.5C SOC1@49C CV2@-256C VDD_IN 7261mW/5810mW VDD_CPU_GPU_CV 2812mW/1702mW VDD_SOC 1785mW/1621mW
01-24-2024 22:08:34 RAM 5520/6481MB (lfb 2x2MB) SWAP 1530/7336MB (cached 2MB) CPU [76%@1510,81%@1510,75%@1510,72%@1510,78%@1510,77%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 0%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.187C SOC2@49.906C SOC0@48.781C CV1@-256C GPU@49.75C tj@51.187C SOC1@49C CV2@-256C VDD_IN 7142mW/5814mW VDD_CPU_GPU_CV 2733mW/1705mW VDD_SOC 1785mW/1622mW
01-24-2024 22:08:35 RAM 5521/6481MB (lfb 2x2MB) SWAP 1530/7336MB (cached 2MB) CPU [78%@1510,76%@1510,77%@1510,77%@1510,75%@1510,77%@1510] EMC_FREQ 11%@2133 GR3D_FREQ 0%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.312C SOC2@49.937C SOC0@48.656C CV1@-256C GPU@49.656C tj@51.312C SOC1@49C CV2@-256C VDD_IN 6944mW/5817mW VDD_CPU_GPU_CV 2654mW/1707mW VDD_SOC 1745mW/1622mW
01-24-2024 22:08:36 RAM 5521/6481MB (lfb 2x2MB) SWAP 1530/7336MB (cached 2MB) CPU [74%@1510,73%@1510,65%@1510,75%@1510,72%@1510,80%@1510] EMC_FREQ 10%@2133 GR3D_FREQ 0%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.375C SOC2@49.906C SOC0@48.75C CV1@-256C GPU@49.718C tj@51.375C SOC1@49.031C CV2@-256C VDD_IN 6983mW/5820mW VDD_CPU_GPU_CV 2654mW/1710mW VDD_SOC 1745mW/1622mW
01-24-2024 22:08:37 RAM 5576/6481MB (lfb 2x2MB) SWAP 1438/7336MB (cached 2MB) CPU [41%@1510,28%@729,58%@729,41%@729,40%@1510,42%@1510] EMC_FREQ 7%@2133 GR3D_FREQ 0%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.187C SOC2@49.718C SOC0@48.781C CV1@-256C GPU@49.625C tj@51.187C SOC1@49C CV2@-256C VDD_IN 5713mW/5820mW VDD_CPU_GPU_CV 1624mW/1710mW VDD_SOC 1626mW/1622mW
01-24-2024 22:08:38 RAM 5582/6481MB (lfb 2x2MB) SWAP 1431/7336MB (cached 2MB) CPU [24%@1510,18%@1510,24%@1510,35%@1510,21%@1510,82%@1510] EMC_FREQ 5%@2133 GR3D_FREQ 0%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.093C SOC2@49.687C SOC0@48.593C CV1@-256C GPU@49.468C tj@51.093C SOC1@49.031C CV2@-256C VDD_IN 5634mW/5820mW VDD_CPU_GPU_CV 1545mW/1709mW VDD_SOC 1626mW/1622mW
01-24-2024 22:08:39 RAM 5550/6481MB (lfb 2x2MB) SWAP 1445/7336MB (cached 2MB) CPU [31%@1113,22%@1036,23%@1036,21%@1036,30%@1510,51%@1510] EMC_FREQ 3%@2133 GR3D_FREQ 0%@[624,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@51.156C SOC2@49.656C SOC0@48.75C CV1@-256C GPU@49.437C tj@51.156C SOC1@49C CV2@-256C VDD_IN 5555mW/5819mW VDD_CPU_GPU_CV 1426mW/1708mW VDD_SOC 1587mW/1622mW
01-24-2024 22:08:41 RAM 5486/6481MB (lfb 3x2MB) SWAP 1462/7336MB (cached 2MB) CPU [28%@729,22%@729,23%@729,36%@729,33%@729,27%@729] EMC_FREQ 3%@2133 GR3D_FREQ 0%@[509,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.781C SOC2@49.531C SOC0@48.625C CV1@-256C GPU@49.531C tj@50.781C SOC1@48.906C CV2@-256C VDD_IN 5396mW/5818mW VDD_CPU_GPU_CV 1267mW/1707mW VDD_SOC 1584mW/1622mW
01-24-2024 22:08:42 RAM 5493/6481MB (lfb 3x2MB) SWAP 1455/7336MB (cached 2MB) CPU [15%@1510,11%@729,2%@729,8%@729,9%@1510,69%@1510] EMC_FREQ 3%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.968C SOC2@49.437C SOC0@48.593C CV1@-256C GPU@49.281C tj@50.968C SOC1@48.937C CV2@-256C VDD_IN 5079mW/5816mW VDD_CPU_GPU_CV 1069mW/1706mW VDD_SOC 1587mW/1622mW
01-24-2024 22:08:43 RAM 5511/6481MB (lfb 1x2MB) SWAP 1418/7336MB (cached 2MB) CPU [17%@729,23%@729,17%@729,4%@729,3%@729,18%@729] EMC_FREQ 2%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.5C SOC2@49.343C SOC0@48.531C CV1@-256C GPU@49.406C tj@50.5C SOC1@48.812C CV2@-256C VDD_IN 4840mW/5813mW VDD_CPU_GPU_CV 871mW/1703mW VDD_SOC 1547mW/1622mW
01-24-2024 22:08:44 RAM 5459/6481MB (lfb 8x2MB) SWAP 1450/7336MB (cached 2MB) CPU [1%@729,1%@729,1%@729,18%@729,2%@1036,50%@1036] EMC_FREQ 2%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.437C SOC2@49.406C SOC0@48.5C CV1@-256C GPU@49.093C tj@50.437C SOC1@48.812C CV2@-256C VDD_IN 4880mW/5811mW VDD_CPU_GPU_CV 871mW/1701mW VDD_SOC 1547mW/1622mW
01-24-2024 22:08:45 RAM 5414/6481MB (lfb 2x2MB) SWAP 1456/7336MB (cached 2MB) CPU [25%@729,18%@729,1%@729,0%@729,0%@729,23%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.5C SOC2@49.343C SOC0@48.343C CV1@-256C GPU@49.062C tj@50.5C SOC1@48.718C CV2@-256C VDD_IN 4801mW/5808mW VDD_CPU_GPU_CV 752mW/1699mW VDD_SOC 1547mW/1621mW
01-24-2024 22:08:46 RAM 5325/6481MB (lfb 5x1MB) SWAP 1458/7336MB (cached 2MB) CPU [12%@729,3%@729,1%@729,6%@729,2%@729,2%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.375C SOC2@49.187C SOC0@48.375C CV1@-256C GPU@49.125C tj@50.375C SOC1@48.812C CV2@-256C VDD_IN 4761mW/5805mW VDD_CPU_GPU_CV 633mW/1696mW VDD_SOC 1545mW/1621mW
01-24-2024 22:08:47 RAM 5315/6481MB (lfb 6x1MB) SWAP 1457/7336MB (cached 2MB) CPU [1%@1510,90%@1510,0%@1510,14%@1510,0%@729,0%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.625C SOC2@49.25C SOC0@48.375C CV1@-256C GPU@48.937C tj@50.625C SOC1@48.843C CV2@-256C VDD_IN 4952mW/5803mW VDD_CPU_GPU_CV 1188mW/1694mW VDD_SOC 1545mW/1621mW
01-24-2024 22:08:48 RAM 5285/6481MB (lfb 1x2MB) SWAP 1455/7336MB (cached 2MB) CPU [2%@729,46%@729,5%@729,4%@729,1%@729,0%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.343C SOC2@49.218C SOC0@48.312C CV1@-256C GPU@49.343C tj@50.343C SOC1@48.718C CV2@-256C VDD_IN 4801mW/5800mW VDD_CPU_GPU_CV 752mW/1692mW VDD_SOC 1545mW/1621mW
01-24-2024 22:08:49 RAM 3322/6481MB (lfb 58x4MB) SWAP 1323/7336MB (cached 2MB) CPU [42%@1510,0%@1510,4%@1510,3%@1510,6%@729,16%@729] EMC_FREQ 2%@2133 GR3D_FREQ 38%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.406C SOC2@49.312C SOC0@48.312C CV1@-256C GPU@49.062C tj@50.406C SOC1@48.812C CV2@-256C VDD_IN 5079mW/5798mW VDD_CPU_GPU_CV 1069mW/1690mW VDD_SOC 1587mW/1621mW
01-24-2024 22:08:50 RAM 2729/6481MB (lfb 61x4MB) SWAP 1323/7336MB (cached 2MB) CPU [17%@729,3%@729,8%@729,24%@729,8%@729,13%@729] EMC_FREQ 2%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.343C SOC2@49.187C SOC0@48.343C CV1@-256C GPU@49.187C tj@50.343C SOC1@48.812C CV2@-256C VDD_IN 4560mW/5795mW VDD_CPU_GPU_CV 720mW/1688mW VDD_SOC 1560mW/1620mW
01-24-2024 22:08:51 RAM 2732/6481MB (lfb 61x4MB) SWAP 1323/7336MB (cached 2MB) CPU [1%@1510,2%@1510,2%@1510,14%@1510,2%@729,4%@729] EMC_FREQ 2%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.218C SOC2@49.125C SOC0@48.281C CV1@-256C GPU@49.156C tj@50.218C SOC1@48.656C CV2@-256C VDD_IN 4407mW/5791mW VDD_CPU_GPU_CV 681mW/1685mW VDD_SOC 1562mW/1620mW
01-24-2024 22:08:52 RAM 2731/6481MB (lfb 61x4MB) SWAP 1322/7336MB (cached 2MB) CPU [1%@729,0%@729,1%@729,15%@729,1%@729,0%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.531C SOC2@49.093C SOC0@48.218C CV1@-256C GPU@48.781C tj@50.531C SOC1@48.656C CV2@-256C VDD_IN 4373mW/5788mW VDD_CPU_GPU_CV 642mW/1682mW VDD_SOC 1564mW/1620mW
01-24-2024 22:08:53 RAM 2731/6481MB (lfb 61x4MB) SWAP 1322/7336MB (cached 2MB) CPU [2%@729,1%@729,0%@729,0%@729,0%@729,0%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.062C SOC2@48.968C SOC0@48.062C CV1@-256C GPU@48.937C tj@50.062C SOC1@48.625C CV2@-256C VDD_IN 4260mW/5784mW VDD_CPU_GPU_CV 562mW/1679mW VDD_SOC 1527mW/1620mW
01-24-2024 22:08:54 RAM 2720/6481MB (lfb 61x4MB) SWAP 1322/7336MB (cached 2MB) CPU [0%@729,1%@729,0%@729,0%@729,1%@729,1%@729] EMC_FREQ 1%@2133 GR3D_FREQ 0%@[407,0] VIC_FREQ 435 APE 200 CV0@-256C CPU@50.312C SOC2@48.968C SOC0@48.156C CV1@-256C GPU@48.781C tj@50C SOC1@48.625C CV2@-256C VDD_IN 4300mW/5780mW VDD_CPU_GPU_CV 562mW/1676mW VDD_SOC 1527mW/1620mW
</pre>

Hi,

The GPU utilization from tegrastats is pretty low.
Please try the below command and share the qps of your model with us.

Usually, the bottleneck comes from data read/write rather than inference.

Boost the device

$ sudo nvpmodel -m 0
$ sudo jetson_clocks

Bench inference-only

$ /usr/src/tensorrt/bin/trtexec --loadEngine=yolov8n.engine

Thanks.

Thanks for your response, here is the video after boosting the device

Also /usr/src/tensorrt/bin/trtexec --loadEngine=yolov8n.engine

/usr/src/tensorrt/bin/trtexec --loadEngine=yolov8n.engine
&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --loadEngine=yolov8n.engine
[01/24/2024-23:09:48] [I] === Model Options ===
[01/24/2024-23:09:48] [I] Format: *
[01/24/2024-23:09:48] [I] Model: 
[01/24/2024-23:09:48] [I] Output:
[01/24/2024-23:09:48] [I] === Build Options ===
[01/24/2024-23:09:48] [I] Max batch: 1
[01/24/2024-23:09:48] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[01/24/2024-23:09:48] [I] minTiming: 1
[01/24/2024-23:09:48] [I] avgTiming: 8
[01/24/2024-23:09:48] [I] Precision: FP32
[01/24/2024-23:09:48] [I] LayerPrecisions: 
[01/24/2024-23:09:48] [I] Calibration: 
[01/24/2024-23:09:48] [I] Refit: Disabled
[01/24/2024-23:09:48] [I] Sparsity: Disabled
[01/24/2024-23:09:48] [I] Safe mode: Disabled
[01/24/2024-23:09:48] [I] DirectIO mode: Disabled
[01/24/2024-23:09:48] [I] Restricted mode: Disabled
[01/24/2024-23:09:48] [I] Build only: Disabled
[01/24/2024-23:09:48] [I] Save engine: 
[01/24/2024-23:09:48] [I] Load engine: yolov8n.engine
[01/24/2024-23:09:48] [I] Profiling verbosity: 0
[01/24/2024-23:09:48] [I] Tactic sources: Using default tactic sources
[01/24/2024-23:09:48] [I] timingCacheMode: local
[01/24/2024-23:09:48] [I] timingCacheFile: 
[01/24/2024-23:09:48] [I] Heuristic: Disabled
[01/24/2024-23:09:48] [I] Preview Features: Use default preview flags.
[01/24/2024-23:09:48] [I] Input(s)s format: fp32:CHW
[01/24/2024-23:09:48] [I] Output(s)s format: fp32:CHW
[01/24/2024-23:09:48] [I] Input build shapes: model
[01/24/2024-23:09:48] [I] Input calibration shapes: model
[01/24/2024-23:09:48] [I] === System Options ===
[01/24/2024-23:09:48] [I] Device: 0
[01/24/2024-23:09:48] [I] DLACore: 
[01/24/2024-23:09:48] [I] Plugins:
[01/24/2024-23:09:48] [I] === Inference Options ===
[01/24/2024-23:09:48] [I] Batch: 1
[01/24/2024-23:09:48] [I] Input inference shapes: model
[01/24/2024-23:09:48] [I] Iterations: 10
[01/24/2024-23:09:48] [I] Duration: 3s (+ 200ms warm up)
[01/24/2024-23:09:48] [I] Sleep time: 0ms
[01/24/2024-23:09:48] [I] Idle time: 0ms
[01/24/2024-23:09:48] [I] Streams: 1
[01/24/2024-23:09:48] [I] ExposeDMA: Disabled
[01/24/2024-23:09:48] [I] Data transfers: Enabled
[01/24/2024-23:09:48] [I] Spin-wait: Disabled
[01/24/2024-23:09:48] [I] Multithreading: Disabled
[01/24/2024-23:09:48] [I] CUDA Graph: Disabled
[01/24/2024-23:09:48] [I] Separate profiling: Disabled
[01/24/2024-23:09:48] [I] Time Deserialize: Disabled
[01/24/2024-23:09:48] [I] Time Refit: Disabled
[01/24/2024-23:09:48] [I] NVTX verbosity: 0
[01/24/2024-23:09:48] [I] Persistent Cache Ratio: 0
[01/24/2024-23:09:48] [I] Inputs:
[01/24/2024-23:09:48] [I] === Reporting Options ===
[01/24/2024-23:09:48] [I] Verbose: Disabled
[01/24/2024-23:09:48] [I] Averages: 10 inferences
[01/24/2024-23:09:48] [I] Percentiles: 90,95,99
[01/24/2024-23:09:48] [I] Dump refittable layers:Disabled
[01/24/2024-23:09:48] [I] Dump output: Disabled
[01/24/2024-23:09:48] [I] Profile: Disabled
[01/24/2024-23:09:48] [I] Export timing to JSON file: 
[01/24/2024-23:09:48] [I] Export output to JSON file: 
[01/24/2024-23:09:48] [I] Export profile to JSON file: 
[01/24/2024-23:09:48] [I] 
[01/24/2024-23:09:48] [I] === Device Information ===
[01/24/2024-23:09:48] [I] Selected Device: Orin
[01/24/2024-23:09:48] [I] Compute Capability: 8.7
[01/24/2024-23:09:48] [I] SMs: 8
[01/24/2024-23:09:48] [I] Compute Clock Rate: 0.624 GHz
[01/24/2024-23:09:48] [I] Device Global Memory: 6480 MiB
[01/24/2024-23:09:48] [I] Shared Memory per SM: 164 KiB
[01/24/2024-23:09:48] [I] Memory Bus Width: 128 bits (ECC disabled)
[01/24/2024-23:09:48] [I] Memory Clock Rate: 0.624 GHz
[01/24/2024-23:09:48] [I] 
[01/24/2024-23:09:48] [I] TensorRT version: 8.5.2
[01/24/2024-23:09:49] [I] Engine loaded in 0.269194 sec.
[01/24/2024-23:09:53] [I] [TRT] Loaded engine size: 8 MiB
[01/24/2024-23:09:53] [E] Error[1]: [stdArchiveReader.cpp::StdArchiveReader::32] Error Code 1: Serialization (Serialization assertion magicTagRead == kMAGIC_TAG failed.Magic tag does not match)
[01/24/2024-23:09:53] [E] Error[4]: [runtime.cpp::deserializeCudaEngine::65] Error Code 4: Internal Error (Engine deserialization failed.)
[01/24/2024-23:09:53] [E] Engine deserialization failed
[01/24/2024-23:09:53] [E] Got invalid engine!
[01/24/2024-23:09:53] [E] Inference set up failed
&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --loadEngine=yolov8n.engine

Hi,

It looks like your model is built with a different TensorRT version.
Do you use the same environment as the Python sample?

Thanks.

Yes, i don’t have any other env i generated .engine file from yolo export command and now after looking at the final line error i tried to generate it from /usr/src/tensorrt/bin$ ./trtexec --onnx=/home/sai/Documents/traffic/yolov8n.onnx --saveEngine=/home/sai/Documents/traffic/yolov8n2.engine

sai@sai-desktop:/usr/src/tensorrt/bin$ ./trtexec --onnx=/home/sai/Documents/traffic/yolov8n.onnx --saveEngine=/home/sai/Documents/traffic/yolov8n2.engine
&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # ./trtexec --onnx=/home/sai/Documents/traffic/yolov8n.onnx --saveEngine=/home/sai/Documents/traffic/yolov8n2.engine
[01/24/2024-23:47:39] [I] === Model Options ===
[01/24/2024-23:47:39] [I] Format: ONNX
[01/24/2024-23:47:39] [I] Model: /home/sai/Documents/traffic/yolov8n.onnx
[01/24/2024-23:47:39] [I] Output:
[01/24/2024-23:47:39] [I] === Build Options ===
[01/24/2024-23:47:39] [I] Max batch: explicit batch
[01/24/2024-23:47:39] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[01/24/2024-23:47:39] [I] minTiming: 1
[01/24/2024-23:47:39] [I] avgTiming: 8
[01/24/2024-23:47:39] [I] Precision: FP32
[01/24/2024-23:47:39] [I] LayerPrecisions: 
[01/24/2024-23:47:39] [I] Calibration: 
[01/24/2024-23:47:39] [I] Refit: Disabled
[01/24/2024-23:47:39] [I] Sparsity: Disabled
[01/24/2024-23:47:39] [I] Safe mode: Disabled
[01/24/2024-23:47:39] [I] DirectIO mode: Disabled
[01/24/2024-23:47:39] [I] Restricted mode: Disabled
[01/24/2024-23:47:39] [I] Build only: Disabled
[01/24/2024-23:47:39] [I] Save engine: /home/sai/Documents/traffic/yolov8n2.engine
[01/24/2024-23:47:39] [I] Load engine: 
[01/24/2024-23:47:39] [I] Profiling verbosity: 0
[01/24/2024-23:47:39] [I] Tactic sources: Using default tactic sources
[01/24/2024-23:47:39] [I] timingCacheMode: local
[01/24/2024-23:47:39] [I] timingCacheFile: 
[01/24/2024-23:47:39] [I] Heuristic: Disabled
[01/24/2024-23:47:39] [I] Preview Features: Use default preview flags.
[01/24/2024-23:47:39] [I] Input(s)s format: fp32:CHW
[01/24/2024-23:47:39] [I] Output(s)s format: fp32:CHW
[01/24/2024-23:47:39] [I] Input build shapes: model
[01/24/2024-23:47:39] [I] Input calibration shapes: model
[01/24/2024-23:47:39] [I] === System Options ===
[01/24/2024-23:47:39] [I] Device: 0
[01/24/2024-23:47:39] [I] DLACore: 
[01/24/2024-23:47:39] [I] Plugins:
[01/24/2024-23:47:39] [I] === Inference Options ===
[01/24/2024-23:47:39] [I] Batch: Explicit
[01/24/2024-23:47:39] [I] Input inference shapes: model
[01/24/2024-23:47:39] [I] Iterations: 10
[01/24/2024-23:47:39] [I] Duration: 3s (+ 200ms warm up)
[01/24/2024-23:47:39] [I] Sleep time: 0ms
[01/24/2024-23:47:39] [I] Idle time: 0ms
[01/24/2024-23:47:39] [I] Streams: 1
[01/24/2024-23:47:39] [I] ExposeDMA: Disabled
[01/24/2024-23:47:39] [I] Data transfers: Enabled
[01/24/2024-23:47:39] [I] Spin-wait: Disabled
[01/24/2024-23:47:39] [I] Multithreading: Disabled
[01/24/2024-23:47:39] [I] CUDA Graph: Disabled
[01/24/2024-23:47:39] [I] Separate profiling: Disabled
[01/24/2024-23:47:39] [I] Time Deserialize: Disabled
[01/24/2024-23:47:39] [I] Time Refit: Disabled
[01/24/2024-23:47:39] [I] NVTX verbosity: 0
[01/24/2024-23:47:39] [I] Persistent Cache Ratio: 0
[01/24/2024-23:47:39] [I] Inputs:
[01/24/2024-23:47:39] [I] === Reporting Options ===
[01/24/2024-23:47:39] [I] Verbose: Disabled
[01/24/2024-23:47:39] [I] Averages: 10 inferences
[01/24/2024-23:47:39] [I] Percentiles: 90,95,99
[01/24/2024-23:47:39] [I] Dump refittable layers:Disabled
[01/24/2024-23:47:39] [I] Dump output: Disabled
[01/24/2024-23:47:39] [I] Profile: Disabled
[01/24/2024-23:47:39] [I] Export timing to JSON file: 
[01/24/2024-23:47:39] [I] Export output to JSON file: 
[01/24/2024-23:47:39] [I] Export profile to JSON file: 
[01/24/2024-23:47:39] [I] 
[01/24/2024-23:47:39] [I] === Device Information ===
[01/24/2024-23:47:39] [I] Selected Device: Orin
[01/24/2024-23:47:39] [I] Compute Capability: 8.7
[01/24/2024-23:47:39] [I] SMs: 8
[01/24/2024-23:47:39] [I] Compute Clock Rate: 0.624 GHz
[01/24/2024-23:47:39] [I] Device Global Memory: 6480 MiB
[01/24/2024-23:47:39] [I] Shared Memory per SM: 164 KiB
[01/24/2024-23:47:39] [I] Memory Bus Width: 128 bits (ECC disabled)
[01/24/2024-23:47:39] [I] Memory Clock Rate: 0.624 GHz
[01/24/2024-23:47:39] [I] 
[01/24/2024-23:47:39] [I] TensorRT version: 8.5.2
[01/24/2024-23:47:40] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3895 (MiB)
[01/24/2024-23:47:42] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +329, now: CPU 574, GPU 4255 (MiB)
[01/24/2024-23:47:42] [I] Start parsing network model
[01/24/2024-23:47:42] [I] [TRT] ----------------------------------------------------------------
[01/24/2024-23:47:42] [I] [TRT] Input filename:   /home/sai/Documents/traffic/yolov8n.onnx
[01/24/2024-23:47:42] [I] [TRT] ONNX IR version:  0.0.8
[01/24/2024-23:47:42] [I] [TRT] Opset version:    17
[01/24/2024-23:47:42] [I] [TRT] Producer name:    pytorch
[01/24/2024-23:47:42] [I] [TRT] Producer version: 2.1.0
[01/24/2024-23:47:42] [I] [TRT] Domain:           
[01/24/2024-23:47:42] [I] [TRT] Model version:    0
[01/24/2024-23:47:42] [I] [TRT] Doc string:       
[01/24/2024-23:47:42] [I] [TRT] ----------------------------------------------------------------
[01/24/2024-23:47:43] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[01/24/2024-23:47:43] [I] Finish parsing network model
[01/24/2024-23:47:43] [I] [TRT] ---------- Layers Running on DLA ----------
[01/24/2024-23:47:43] [I] [TRT] ---------- Layers Running on GPU ----------
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.0/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.0/act/Sigmoid), /model.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.1/act/Sigmoid), /model.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.2/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.2/cv1/act/Sigmoid), /model.2/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.2/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.2/m.0/cv1/act/Sigmoid), /model.2/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.2/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(PWN(/model.2/m.0/cv2/act/Sigmoid), /model.2/m.0/cv2/act/Mul), /model.2/m.0/Add)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.2/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.2/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.2/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.2/cv2/act/Sigmoid), /model.2/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.3/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.3/act/Sigmoid), /model.3/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.4/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.4/cv1/act/Sigmoid), /model.4/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.4/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.4/m.0/cv1/act/Sigmoid), /model.4/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.4/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(PWN(/model.4/m.0/cv2/act/Sigmoid), /model.4/m.0/cv2/act/Mul), /model.4/m.0/Add)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.4/m.1/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.4/m.1/cv1/act/Sigmoid), /model.4/m.1/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.4/m.1/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(PWN(/model.4/m.1/cv2/act/Sigmoid), /model.4/m.1/cv2/act/Mul), /model.4/m.1/Add)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.4/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.4/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.4/m.0/Add_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.4/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.4/cv2/act/Sigmoid), /model.4/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.5/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.5/act/Sigmoid), /model.5/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.6/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.6/cv1/act/Sigmoid), /model.6/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.6/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.6/m.0/cv1/act/Sigmoid), /model.6/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.6/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(PWN(/model.6/m.0/cv2/act/Sigmoid), /model.6/m.0/cv2/act/Mul), /model.6/m.0/Add)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.6/m.1/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.6/m.1/cv1/act/Sigmoid), /model.6/m.1/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.6/m.1/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(PWN(/model.6/m.1/cv2/act/Sigmoid), /model.6/m.1/cv2/act/Mul), /model.6/m.1/Add)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.6/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.6/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.6/m.0/Add_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.6/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.6/cv2/act/Sigmoid), /model.6/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.7/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.7/act/Sigmoid), /model.7/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.8/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.8/cv1/act/Sigmoid), /model.8/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.8/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.8/m.0/cv1/act/Sigmoid), /model.8/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.8/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(PWN(/model.8/m.0/cv2/act/Sigmoid), /model.8/m.0/cv2/act/Mul), /model.8/m.0/Add)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.8/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.8/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.8/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.8/cv2/act/Sigmoid), /model.8/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.9/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.9/cv1/act/Sigmoid), /model.9/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POOLING: /model.9/m/MaxPool
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POOLING: /model.9/m_1/MaxPool
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POOLING: /model.9/m_2/MaxPool
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.9/cv1/act/Mul_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.9/m/MaxPool_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.9/m_1/MaxPool_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.9/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.9/cv2/act/Sigmoid), /model.9/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] RESIZE: /model.10/Resize
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.10/Resize_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.12/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.12/cv1/act/Sigmoid), /model.12/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.12/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.12/m.0/cv1/act/Sigmoid), /model.12/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.12/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.12/m.0/cv2/act/Sigmoid), /model.12/m.0/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.12/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.12/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.12/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.12/cv2/act/Sigmoid), /model.12/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] RESIZE: /model.13/Resize
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.13/Resize_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.15/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.15/cv1/act/Sigmoid), /model.15/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.15/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.15/m.0/cv1/act/Sigmoid), /model.15/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.15/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.15/m.0/cv2/act/Sigmoid), /model.15/m.0/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.15/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.15/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.15/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.15/cv2/act/Sigmoid), /model.15/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.16/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.0/cv2.0.0/conv/Conv || /model.22/cv3.0/cv3.0.0/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.16/act/Sigmoid), /model.16/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv2.0/cv2.0.0/act/Sigmoid), /model.22/cv2.0/cv2.0.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv3.0/cv3.0.0/act/Sigmoid), /model.22/cv3.0/cv3.0.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.12/cv2/act/Mul_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.0/cv2.0.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv3.0/cv3.0.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.18/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv2.0/cv2.0.1/act/Sigmoid), /model.22/cv2.0/cv2.0.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv3.0/cv3.0.1/act/Sigmoid), /model.22/cv3.0/cv3.0.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.18/cv1/act/Sigmoid), /model.18/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.0/cv2.0.2/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv3.0/cv3.0.2/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.18/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] SHUFFLE: /model.22/Reshape
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.22/Reshape_copy_output
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.18/m.0/cv1/act/Sigmoid), /model.18/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.18/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.18/m.0/cv2/act/Sigmoid), /model.18/m.0/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.18/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.18/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.18/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.18/cv2/act/Sigmoid), /model.18/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.19/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.1/cv2.1.0/conv/Conv || /model.22/cv3.1/cv3.1.0/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.19/act/Sigmoid), /model.19/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv2.1/cv2.1.0/act/Sigmoid), /model.22/cv2.1/cv2.1.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv3.1/cv3.1.0/act/Sigmoid), /model.22/cv3.1/cv3.1.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.9/cv2/act/Mul_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.1/cv2.1.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv3.1/cv3.1.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.21/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv2.1/cv2.1.1/act/Sigmoid), /model.22/cv2.1/cv2.1.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv3.1/cv3.1.1/act/Sigmoid), /model.22/cv3.1/cv3.1.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.21/cv1/act/Sigmoid), /model.21/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.1/cv2.1.2/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv3.1/cv3.1.2/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.21/m.0/cv1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] SHUFFLE: /model.22/Reshape_1
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.22/Reshape_1_copy_output
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.21/m.0/cv1/act/Sigmoid), /model.21/m.0/cv1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.21/m.0/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.21/m.0/cv2/act/Sigmoid), /model.21/m.0/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.21/Split_output_0 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.21/Split_output_1 copy
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.21/cv2/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.21/cv2/act/Sigmoid), /model.21/cv2/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.2/cv2.2.0/conv/Conv || /model.22/cv3.2/cv3.2.0/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv2.2/cv2.2.0/act/Sigmoid), /model.22/cv2.2/cv2.2.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv3.2/cv3.2.0/act/Sigmoid), /model.22/cv3.2/cv3.2.0/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.2/cv2.2.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv3.2/cv3.2.1/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv2.2/cv2.2.1/act/Sigmoid), /model.22/cv2.2/cv2.2.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(PWN(/model.22/cv3.2/cv3.2.1/act/Sigmoid), /model.22/cv3.2/cv3.2.1/act/Mul)
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv2.2/cv2.2.2/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/cv3.2/cv3.2.2/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] SHUFFLE: /model.22/Reshape_2
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] COPY: /model.22/Reshape_2_copy_output
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] SHUFFLE: /model.22/dfl/Reshape + /model.22/dfl/Transpose
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] SOFTMAX: /model.22/dfl/Softmax
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONVOLUTION: /model.22/dfl/conv/Conv
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] SHUFFLE: /model.22/dfl/Reshape_1
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONSTANT: /model.22/Constant_9_output_0
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] ELEMENTWISE: /model.22/Sub
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONSTANT: /model.22/Constant_10_output_0
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] ELEMENTWISE: /model.22/Add_1
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(/model.22/Constant_11_output_0 + (Unnamed Layer* 294) [Shuffle], PWN(/model.22/Add_2, /model.22/Div_1))
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] ELEMENTWISE: /model.22/Sub_1
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] CONSTANT: /model.22/Constant_12_output_0 + (Unnamed Layer* 299) [Shuffle]
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] ELEMENTWISE: /model.22/Mul_2
[01/24/2024-23:47:43] [I] [TRT] [GpuLayer] POINTWISE: PWN(/model.22/Sigmoid)
[01/24/2024-23:47:54] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +779, now: CPU 1124, GPU 5070 (MiB)
[01/24/2024-23:47:56] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +149, now: CPU 1207, GPU 5219 (MiB)
[01/24/2024-23:47:56] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
[01/24/2024-23:49:40] [W] [TRT] Tactic Device request: 538MB Available: 445MB. Device memory is insufficient to use tactic.
[01/24/2024-23:49:40] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 538 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:49:40] [W] [TRT] Tactic Device request: 538MB Available: 447MB. Device memory is insufficient to use tactic.
[01/24/2024-23:49:40] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 538 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:50:23] [W] [TRT] Tactic Device request: 806MB Available: 768MB. Device memory is insufficient to use tactic.
[01/24/2024-23:50:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 806 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:50:24] [W] [TRT] Tactic Device request: 806MB Available: 768MB. Device memory is insufficient to use tactic.
[01/24/2024-23:50:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 806 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:50:24] [W] [TRT] Tactic Device request: 806MB Available: 768MB. Device memory is insufficient to use tactic.
[01/24/2024-23:50:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 806 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:50:38] [W] [TRT] Tactic Device request: 1060MB Available: 707MB. Device memory is insufficient to use tactic.
[01/24/2024-23:50:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1060 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:50:38] [W] [TRT] Tactic Device request: 1060MB Available: 704MB. Device memory is insufficient to use tactic.
[01/24/2024-23:50:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1060 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:50:39] [W] [TRT] Tactic Device request: 1060MB Available: 702MB. Device memory is insufficient to use tactic.
[01/24/2024-23:50:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1060 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:09] [W] [TRT] Tactic Device request: 1066MB Available: 722MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1066 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:10] [W] [TRT] Tactic Device request: 1066MB Available: 722MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:10] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1066 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:18] [W] [TRT] Tactic Device request: 820MB Available: 708MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 820 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:18] [W] [TRT] Tactic Device request: 820MB Available: 708MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:18] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 820 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:18] [W] [TRT] Tactic Device request: 820MB Available: 708MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:18] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 820 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:20] [W] [TRT] Tactic Device request: 1093MB Available: 717MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1093 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:20] [W] [TRT] Tactic Device request: 1093MB Available: 717MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1093 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:20] [W] [TRT] Tactic Device request: 1093MB Available: 717MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:20] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1093 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:22] [W] [TRT] Tactic Device request: 1599MB Available: 718MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1599 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:22] [W] [TRT] Tactic Device request: 1599MB Available: 718MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:22] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1599 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:22] [W] [TRT] Tactic Device request: 1599MB Available: 718MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:22] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1599 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:23] [W] [TRT] Tactic Device request: 800MB Available: 726MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 800 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:23] [W] [TRT] Tactic Device request: 800MB Available: 726MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:23] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 800 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:23] [W] [TRT] Tactic Device request: 800MB Available: 726MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:23] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 800 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:24] [W] [TRT] Tactic Device request: 1590MB Available: 724MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1590 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:24] [W] [TRT] Tactic Device request: 1590MB Available: 724MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1590 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:24] [W] [TRT] Tactic Device request: 1590MB Available: 724MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1590 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:26] [W] [TRT] Tactic Device request: 796MB Available: 721MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 796 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:26] [W] [TRT] Tactic Device request: 796MB Available: 717MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 796 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:26] [W] [TRT] Tactic Device request: 796MB Available: 717MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:26] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 796 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:27] [W] [TRT] Tactic Device request: 1183MB Available: 717MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1183 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:27] [W] [TRT] Tactic Device request: 1183MB Available: 712MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:27] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 1183 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:28] [W] [TRT] Tactic Device request: 1183MB Available: 712MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:28] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 1183 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:33] [W] [TRT] Tactic Device request: 826MB Available: 705MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 826 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:33] [W] [TRT] Tactic Device request: 826MB Available: 705MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:33] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 826 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:33] [W] [TRT] Tactic Device request: 826MB Available: 706MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:33] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 826 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:38] [W] [TRT] Tactic Device request: 828MB Available: 702MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 828 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:38] [W] [TRT] Tactic Device request: 828MB Available: 701MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 828 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:38] [W] [TRT] Tactic Device request: 828MB Available: 701MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:38] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 828 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:38] [W] [TRT] Tactic Device request: 826MB Available: 700MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 826 detected for tactic 0x0000000000000004.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:39] [W] [TRT] Tactic Device request: 826MB Available: 700MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 826 detected for tactic 0x000000000000003c.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:51:39] [W] [TRT] Tactic Device request: 826MB Available: 700MB. Device memory is insufficient to use tactic.
[01/24/2024-23:51:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 826 detected for tactic 0x0000000000000074.
Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit().
[01/24/2024-23:52:29] [I] [TRT] Total Activation Memory: 6955204608
[01/24/2024-23:52:29] [I] [TRT] Detected 1 inputs and 3 output network tensors.
[01/24/2024-23:52:30] [I] [TRT] Total Host Persistent Memory: 212544
[01/24/2024-23:52:30] [I] [TRT] Total Device Persistent Memory: 0
[01/24/2024-23:52:30] [I] [TRT] Total Scratch Memory: 0
[01/24/2024-23:52:30] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 3 MiB, GPU 1070 MiB
[01/24/2024-23:52:30] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 232 steps to complete.
[01/24/2024-23:52:30] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 32.548ms to assign 9 blocks to 232 nodes requiring 19354624 bytes.
[01/24/2024-23:52:30] [I] [TRT] Total Activation Memory: 19354624
[01/24/2024-23:52:31] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -4, now: CPU 1537, GPU 5834 (MiB)
[01/24/2024-23:52:31] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +2, GPU +16, now: CPU 2, GPU 16 (MiB)
[01/24/2024-23:52:31] [I] Engine built in 291.515 sec.
[01/24/2024-23:52:31] [I] [TRT] Loaded engine size: 13 MiB
[01/24/2024-23:52:31] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1241, GPU 5713 (MiB)
[01/24/2024-23:52:31] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +12, now: CPU 0, GPU 12 (MiB)
[01/24/2024-23:52:31] [I] Engine deserialized in 0.0610085 sec.
[01/24/2024-23:52:31] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1241, GPU 5713 (MiB)
[01/24/2024-23:52:31] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +18, now: CPU 0, GPU 30 (MiB)
[01/24/2024-23:52:31] [I] Setting persistentCacheLimit to 0 bytes.
[01/24/2024-23:52:31] [I] Using random values for input images
[01/24/2024-23:52:31] [I] Created input binding for images with dimensions 1x3x640x640
[01/24/2024-23:52:31] [I] Using random values for output output0
[01/24/2024-23:52:31] [I] Created output binding for output0 with dimensions 1x84x8400
[01/24/2024-23:52:31] [I] Starting inference
[01/24/2024-23:52:35] [I] Warmup completed 11 queries over 200 ms
[01/24/2024-23:52:35] [I] Timing trace has 252 queries over 3.04795 s
[01/24/2024-23:52:35] [I] 
[01/24/2024-23:52:35] [I] === Trace details ===
[01/24/2024-23:52:35] [I] Trace averages of 10 runs:
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 12.2134 ms - Host latency: 13.0494 ms (enqueue 1.80647 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9389 ms - Host latency: 12.7767 ms (enqueue 1.78606 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9461 ms - Host latency: 12.7821 ms (enqueue 1.70046 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9246 ms - Host latency: 12.7574 ms (enqueue 1.68814 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9376 ms - Host latency: 12.7733 ms (enqueue 1.67764 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 12.0655 ms - Host latency: 12.901 ms (enqueue 1.73138 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 12.0493 ms - Host latency: 12.8857 ms (enqueue 2.11086 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9427 ms - Host latency: 12.7829 ms (enqueue 1.75826 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9469 ms - Host latency: 12.7815 ms (enqueue 1.6875 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9427 ms - Host latency: 12.7738 ms (enqueue 1.62587 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9301 ms - Host latency: 12.7675 ms (enqueue 1.61262 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9585 ms - Host latency: 12.7965 ms (enqueue 1.85985 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 12.529 ms - Host latency: 13.3588 ms (enqueue 1.91141 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 13.0371 ms - Host latency: 13.8789 ms (enqueue 2.03623 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9421 ms - Host latency: 12.7814 ms (enqueue 1.77043 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.942 ms - Host latency: 12.7751 ms (enqueue 1.61842 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9325 ms - Host latency: 12.7642 ms (enqueue 1.59297 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9462 ms - Host latency: 12.7803 ms (enqueue 1.5813 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9228 ms - Host latency: 12.7574 ms (enqueue 1.58745 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9351 ms - Host latency: 12.7693 ms (enqueue 1.6156 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9377 ms - Host latency: 12.77 ms (enqueue 1.64961 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9284 ms - Host latency: 12.7617 ms (enqueue 1.61633 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.93 ms - Host latency: 12.763 ms (enqueue 1.58835 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9316 ms - Host latency: 12.7665 ms (enqueue 1.57393 ms)
[01/24/2024-23:52:35] [I] Average on 10 runs - GPU latency: 11.9222 ms - Host latency: 12.7552 ms (enqueue 1.60393 ms)
[01/24/2024-23:52:35] [I] 
[01/24/2024-23:52:35] [I] === Performance summary ===
[01/24/2024-23:52:35] [I] Throughput: 82.6785 qps
[01/24/2024-23:52:35] [I] Latency: min = 12.5325 ms, max = 17.2322 ms, mean = 12.8589 ms, median = 12.772 ms, percentile(90%) = 12.8116 ms, percentile(95%) = 12.8294 ms, percentile(99%) = 16.2383 ms
[01/24/2024-23:52:35] [I] Enqueue Time: min = 1.54224 ms, max = 2.84167 ms, mean = 1.71052 ms, median = 1.6449 ms, percentile(90%) = 1.91736 ms, percentile(95%) = 2.07947 ms, percentile(99%) = 2.50354 ms
[01/24/2024-23:52:35] [I] H2D Latency: min = 0.550781 ms, max = 0.609375 ms, mean = 0.577053 ms, median = 0.576294 ms, percentile(90%) = 0.58905 ms, percentile(95%) = 0.594116 ms, percentile(99%) = 0.605713 ms
[01/24/2024-23:52:35] [I] GPU Compute Time: min = 11.7771 ms, max = 16.3903 ms, mean = 12.0241 ms, median = 11.9379 ms, percentile(90%) = 11.9702 ms, percentile(95%) = 11.9902 ms, percentile(99%) = 15.3837 ms
[01/24/2024-23:52:35] [I] D2H Latency: min = 0.167236 ms, max = 0.269531 ms, mean = 0.257708 ms, median = 0.257812 ms, percentile(90%) = 0.260986 ms, percentile(95%) = 0.262451 ms, percentile(99%) = 0.264893 ms
[01/24/2024-23:52:35] [I] Total Host Walltime: 3.04795 s
[01/24/2024-23:52:35] [I] Total GPU Compute Time: 3.03008 s
[01/24/2024-23:52:35] [W] * GPU compute time is unstable, with coefficient of variance = 4.34545%.
[01/24/2024-23:52:35] [W]   If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability.
[01/24/2024-23:52:35] [I] Explanations of the performance metrics are printed in the verbose logs.
[01/24/2024-23:52:35] [I] 
&&&& PASSED TensorRT.trtexec [TensorRT v8502] # ./trtexec --onnx=/home/sai/Documents/traffic/yolov8n.onnx --saveEngine=/home/sai/Documents/traffic/yolov8n2.engine

however when i ran the same code usiong this newly generated file i’m getting the below error

WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.
Loading yolov8n2.engine for TensorRT inference...
Traceback (most recent call last):
  File "/home/sai/Documents/traffic/yolof3.py", line 71, in <module>
    results = model.predict(frame, conf=0.8, agnostic_nms=True, iou=0.1)
  File "/home/sai/.local/lib/python3.8/site-packages/ultralytics/engine/model.py", line 268, in predict
    self.predictor.setup_model(model=self.model, verbose=is_cli)
  File "/home/sai/.local/lib/python3.8/site-packages/ultralytics/engine/predictor.py", line 341, in setup_model
    self.model = AutoBackend(
  File "/home/sai/.local/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/home/sai/.local/lib/python3.8/site-packages/ultralytics/nn/autobackend.py", line 214, in __init__
    metadata = json.loads(f.read(meta_len).decode("utf-8"))  # read metadata
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe8 in position 4: invalid continuation byte

Hi,

Thanks for the testing.

The utf-8 issue might relate to below link, please give it a check:
https://github.com/ultralytics/ultralytics/issues/1225

It looks like TensorRT can reach 82 qps so it does be possible to reach 30fps.
The post-processing in your source seems to take time to finish.

    tracks = tracker.update_tracks(detections, frame=frame)
    ...
    cv2.imshow('Video', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

Please check if you can use the Deepstream sample to accelerate the post-processing:

Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.