Hi everyone,
I hope someone can help me with this issue I’m facing while trying to use TensorRT with my YOLOv5 model on my NVIDIA Jetson Orin Nano.
Initially, I trained a YOLOv5 model, and it worked well, but it was running slowly. To improve performance, I decided to convert the YOLOv5 model to TensorRT. I used the following code to convert it:
import torch
import tensorrt as trt
import onnx
import os
import sys
# Adding YOLOv5 folder to the system path
sys.path.append('/home/onur/Desktop/projects/denemeV2/yolov5')
from yolov5.models.common import DetectMultiBackend
# PyTorch model path
model_path = "/home/onur/Desktop/projects/denemeV2/yolov5/runs/train/exp4/weights/last.pt"
onnx_path = model_path.replace(".pt", ".onnx")
tensorrt_path = model_path.replace(".pt", ".engine")
# Export PyTorch model to ONNX
def export_to_onnx(model_path, onnx_path):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DetectMultiBackend(model_path, device=device) # Fully load the model
model.eval()
dummy_input = torch.randn(1, 3, 640, 640, device=device)
torch.onnx.export(
model,
dummy_input,
onnx_path,
opset_version=11,
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} # Dynamic input and output sizes
)
# Convert ONNX model to TensorRT
def onnx_to_tensorrt(onnx_path, trt_path):
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, TRT_LOGGER)
with open(onnx_path, 'rb') as model:
if not parser.parse(model.read()):
print('ERROR: Failed to parse ONNX model')
for error in range(parser.num_errors):
print(parser.get_error(error))
return
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30) # Memory setting
# Create optimization profile for dynamic shapes
profile = builder.create_optimization_profile()
input_name = network.get_input(0).name
input_shape = (1, 3, 640, 640)
profile.set_shape(input_name, min=input_shape, opt=input_shape, max=input_shape)
config.add_optimization_profile(profile)
# Use FP32, FP16 is not required.
serialized_engine = builder.build_serialized_network(network, config)
if serialized_engine is None:
print('ERROR: Failed to build the engine')
return
# Save TensorRT engine
with open(trt_path, 'wb') as f:
f.write(serialized_engine)
# Run conversion steps
if __name__ == "__main__":
# Step 1: Export to ONNX
if not os.path.exists(onnx_path):
export_to_onnx(model_path, onnx_path)
print(f"ONNX model saved to {onnx_path}")
# Step 2: Convert to TensorRT
if os.path.exists(onnx_path):
onnx_to_tensorrt(onnx_path, tensorrt_path)
print(f"TensorRT engine saved to {tensorrt_path}")
The output indicated that the .engine
file was created successfully:
onur@ubuntu:~/Desktop/projects$ /bin/python /home/onur/Desktop/projects/denemeV2/convert.py
Fusing layers...
Model summary: 157 layers, 7020913 parameters, 0 gradients, 15.8 GFLOPs
/home/onur/Desktop/projects/denemeV2/yolov5/models/common.py:688: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
/home/onur/Desktop/projects/denemeV2/yolov5/models/yolo.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
ONNX model saved to /home/onur/Desktop/projects/denemeV2/yolov5/runs/train/exp4/weights/last.onnx
[11/18/2024-19:14:22] [TRT] [W] onnx2trt_utils.cpp:372: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
TensorRT engine saved to /home/onur/Desktop/projects/denemeV2/yolov5/runs/train/exp4/weights/last.engine
Then, I tried to use this TensorRT engine in the following Python code for live video inference:
import cv2
import time
import numpy as np
import pycuda.driver as cuda # CUDA kullanımı için gerekli
import pycuda.autoinit
import tensorrt as trt
# TensorRT Logger
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# TensorRT motor dosyasının yolu
engine_path = '/home/onur/Desktop/projects/denemeV2/yolov5/runs/train/exp4/weights/last.engine'
# TensorRT motorunu yükleme
with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
# TensorRT context oluşturma
context = engine.create_execution_context()
# Girdi ve çıktı için CUDA bellek ayırma
input_shape = (1, 3, 640, 640) # Modelin girdi boyutları
output_shape = (1, 25200, 85) # Modelin çıktı boyutları (YOLOv5 çıktısı için)
# CUDA belleği
d_input = cuda.mem_alloc(trt.volume(input_shape) * np.float32().nbytes)
d_output = cuda.mem_alloc(trt.volume(output_shape) * np.float32().nbytes)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# GStreamer pipeline tanımlama
def gstreamer_pipeline(
sensor_id=0,
capture_width=1280, # Resolution: 3280 x 2464 FR = 21, 3280 x 1848 FR = 28, 1920 x 1080 FR = 29, 1640 x 1232 FR = 29, 1280 x 720 FR = 59
capture_height=720,
display_width=960,
display_height=540,
framerate=30,
flip_method=6,
):
return (
"nvarguscamerasrc sensor-id=%d ! "
"video/x-raw(memory:NVMM), width=(int)%d, height=(int)%d, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
# Canlı video yakalama
cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=6), cv2.CAP_GSTREAMER)
prev_frame_time = 0
new_frame_time = 0
prev_infer_time = time.time()
infer_interval = 0.5 # Modelin yarı saniye aralıkla çalışması için
if not cap.isOpened():
print("Error: Unable to open camera")
exit()
while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("Error: Unable to read frame from camera.")
break
current_time = time.time()
# Model tahminini belirli bir aralıkla yap
if current_time - prev_infer_time > infer_interval:
# Girdi verisinin TensorRT formatında hazırlanması
img_resized = cv2.resize(frame, (640, 640))
img_transposed = np.transpose(img_resized, (2, 0, 1)).astype(np.float32) / 255.0
img_input = np.ascontiguousarray(img_transposed[np.newaxis, ...])
# Girdi verisini CUDA'ya kopyalama
cuda.memcpy_htod_async(d_input, img_input, stream)
# Modeli çalıştırma
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# Çıktı verisini CUDA'dan alma
output = np.empty(output_shape, dtype=np.float32)
cuda.memcpy_dtoh_async(output, d_output, stream)
stream.synchronize()
# Tahmin yapıldıktan sonra `prev_infer_time`ı güncelle
prev_infer_time = current_time
# FPS hesaplama
new_frame_time = time.time()
fps = 1 / (new_frame_time - prev_frame_time)
prev_frame_time = new_frame_time
# Virgüllü formatta FPS değeri
fps_text = "FPS: {:.2f}".format(fps)
# FPS değerini ekrana yazdırma
frame_with_fps = frame.copy() # Orijinal kareyi koruyarak üstüne yaz
cv2.putText(frame_with_fps, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
# Ekranda görüntüyü göster
cv2.imshow('TensorRT Inference', frame_with_fps)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cuda.mem_free(d_input)
cuda.mem_free(d_output)
cuda.Context.pop()
cv2.destroyAllWindows()
When I ran the script, the camera opened briefly and then immediately closed, and I received the following warnings and errors:
onur@ubuntu:~/Desktop/projects$ /bin/python /home/onur/Desktop/projects/denemeV2/5.3.3live_video_with_trained_model.py
GST_ARGUS: Creating output stream
...
[11/18/2024-19:30:37] [TRT] [E] 3: [executionContext.cpp::enqueueInternal::816] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::enqueueInternal::816, condition: bindings[x] || nullBindingOK)
[11/18/2024-19:30:37] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
Traceback (most recent call last):
File "/home/onur/Desktop/projects/denemeV2/5.3.3live_video_with_trained_model.py", line 91, in <module>
cuda.memcpy_dtoh_async(output, d_output, stream)
pycuda._driver.LogicError: cuMemcpyDtoHAsync failed: an illegal memory access was encountered
...
All Errors:
onur@ubuntu:~/Desktop/projects$ /bin/python /home/onur/Desktop/projects/denemeV2/5.3.3live_video_with_trained_model.py
GST_ARGUS: Creating output stream
CONSUMER: Waiting until producer is connected...
GST_ARGUS: Available Sensor modes :
GST_ARGUS: 3280 x 2464 FR = 21.000000 fps Duration = 47619048 ; Analog Gain range min 1.000000, max 10.625000; Exposure Range min 13000, max 683709000;
GST_ARGUS: 3280 x 1848 FR = 28.000001 fps Duration = 35714284 ; Analog Gain range min 1.000000, max 10.625000; Exposure Range min 13000, max 683709000;
GST_ARGUS: 1920 x 1080 FR = 29.999999 fps Duration = 33333334 ; Analog Gain range min 1.000000, max 10.625000; Exposure Range min 13000, max 683709000;
GST_ARGUS: 1640 x 1232 FR = 29.999999 fps Duration = 33333334 ; Analog Gain range min 1.000000, max 10.625000; Exposure Range min 13000, max 683709000;
GST_ARGUS: 1280 x 720 FR = 59.999999 fps Duration = 16666667 ; Analog Gain range min 1.000000, max 10.625000; Exposure Range min 13000, max 683709000;
GST_ARGUS: Running with following settings:
Camera index = 0
Camera mode = 4
Output Stream W = 1280 H = 720
seconds to Run = 0
Frame Rate = 59.999999
GST_ARGUS: Setup Complete, Starting captures for 0 seconds
GST_ARGUS: Starting repeat capture requests.
CONSUMER: Producer has connected; continuing.
[ WARN:0@1.056] global cap_gstreamer.cpp:1777 open OpenCV | GStreamer warning: Cannot query video position: status=0, value=-1, duration=-1
Gtk-Message: 19:30:36.879: Failed to load module "canberra-gtk-module"
[11/18/2024-19:30:37] [TRT] [E] 3: [executionContext.cpp::enqueueInternal::816] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::enqueueInternal::816, condition: bindings[x] || nullBindingOK
)
[11/18/2024-19:30:37] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
Traceback (most recent call last):
File "/home/onur/Desktop/projects/denemeV2/5.3.3live_video_with_trained_model.py", line 91, in <module>
cuda.memcpy_dtoh_async(output, d_output, stream)
pycuda._driver.LogicError: cuMemcpyDtoHAsync failed: an illegal memory access was encountered
[11/18/2024-19:30:38] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::61] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::61] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaStream::47] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
...
untime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaResources.cpp::~ScopedCudaEvent::24] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::61] Error Code 1: Cuda Runtime (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaDriverHelpers.cpp::operator()::94] Error Code 1: Cuda Driver (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaDriverHelpers.cpp::operator()::94] Error Code 1: Cuda Driver (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaDriverHelpers.cpp::operator()::94] Error Code 1: Cuda Driver (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaDriverHelpers.cpp::operator()::94] Error Code 1: Cuda Driver (an illegal memory access was encountered)
[11/18/2024-19:30:38] [TRT] [E] 1: [cudaDriverHelpers.cpp::operator()::94] Error Code 1: Cuda Driver (an illegal memory access was encountered)
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuStreamDestroy failed: an illegal memory access was encountered
GST_ARGUS: Cleaning up
CONSUMER: Done Success
GST_ARGUS: Done Success
I am struggling to understand what went wrong here. The TensorRT engine was successfully built, but I can’t seem to run the inference properly. The camera works, but it quickly shuts down, and I receive multiple CUDA-related errors about illegal memory access.
Could anyone help me figure out where I might be going wrong and what I can do to resolve this issue?
Any guidance would be greatly appreciated. Thank you! @AastaLLL, @AakankshaS @EduardoSalazar96, @allan.navarro, @proventusnova