Hi,
I’m running a simulation of a drone recording frames of its ground and want to see SURF able to find matches on Jetson Orin NX
My setup:
Jetpack 6.0
Ubuntu 22.04LTS
L4T 36.3.0
CUDA Arch 8.7
CUDA 12.2.140
OpenCV 4.10.0 with CUDA: YES
Python 3.10.12
I ran the test on my original computer that has the GPU of NVIDIA GeForce MX330 with the performance that averages 28ms on each frame of the video
compare that to the Jetson’s performance 45ms per frame. I tried many things in code to make it work. However the function of OpenCV surf.detectWithDescriptors()
will always result in ~45ms in processing
Is there a way to enable the Jetson to be able to perform the SURF detect and compute of 1280x720 images in sub 30ms?
Here is the code that I run as a reference:
import cv2
import time
import numpy as np
import matplotlib.pyplot as plt
from collections import deque
class SURFProcessor:
def __init__(self, min_matches=10, fps=50):
self.surf = cv2.cuda.SURF_CUDA_create(
_hessianThreshold=500,
_nOctaves=1,
_nOctaveLayers=1,
_extended=False,
_upright=True,
)
self.matcher = cv2.cuda.DescriptorMatcher_createBFMatcher(cv2.NORM_L2)
self.stream = cv2.cuda.Stream()
self.min_matches = min_matches
self.GPU_image = cv2.cuda_GpuMat(720, 1280, cv2.CV_8UC1)
self.current_GPU_descriptors = cv2.cuda_GpuMat()
self.previous_GPU_descriptors = cv2.cuda_GpuMat()
self.prev_gray = cv2.cuda.registerPageLocked(np.zeros((720, 1280), dtype=np.uint8))
self.prev_keypoints = None
self.prev_frame = None
self.fps = fps
def process_frame(self, current_frame):
if current_frame is None:
return
current_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
self.GPU_image.upload(current_gray, stream=self.stream)
current_gpu_keypoints, self.current_GPU_descriptors = self.surf.detectWithDescriptors(
self.GPU_image,
None,
)
current_keypoints = self.surf.downloadKeypoints(current_gpu_keypoints)
if (current_keypoints is not None and
self.prev_keypoints is not None and
len(current_keypoints) > 0):
matches = self.matcher.match(
self.current_GPU_descriptors,
self.previous_GPU_descriptors
)
# Update previous frame data
self.previous_GPU_descriptors = self.current_GPU_descriptors.clone()
self.prev_keypoints = current_keypoints
self.stream.waitForCompletion()
def main():
if cv2.cuda.getCudaEnabledDeviceCount() == 0:
print("CUDA is not available. Exiting...")
return
cap = cv2.VideoCapture('some_drone_video.avi')
if not cap.isOpened():
print("Error: Could not open video capture")
return
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('matched_frames4.avi',
fourcc,
fps,
(2560, 720))
processor = SURFProcessor(min_matches=10, fps=fps)
processing_times = []
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
t1 = time.time()
processor.process_frame(frame)
t2 = time.time()
processing_time = t2 - t1
processing_times.append(processing_time)
print(f"Frame {frame_count + 1} processing time: {processing_time:.4f} seconds")
frame_count += 1
avg_time = np.mean(processing_times) * 1000
std_time = np.std(processing_times) * 1000
max_time = np.max(processing_times) * 1000
min_time = np.min(processing_times) * 1000
cap.release()
print("\nPerformance Statistics:")
print(f"Average processing time: {avg_time:.1f}ms")
print(f"Standard deviation: {std_time:.1f}ms")
print(f"Maximum processing time: {max_time:.1f}ms")
print(f"Minimum processing time: {min_time:.1f}ms")
print(f"Frames per second: {1/np.mean(processing_times):.2f}")
if __name__ == "__main__":
main()