Okay now I found what I was looking for, actually @Honey_Patouceul the answer was one of your old posts :
gpu-acceleration-support-for-opencv-gstreamer-pipeline
So if someone encounters the same problem as I did, I wanted to contribute with a simple example as well. @dusty-nv’s jetson-utils library actually provides an easy use of capturing image and handling it inside the NVMM memory. (Just compile it with -DNVMM_ENABLE=1)
I also wrote a simple test script to check the actual result, this example for my csi camera. Using gstcamera of jetson-utils library allows you to handle the frame in cuda or even as cv::cuda::GpuMat.
void trying_jetsonutils(){
std::cout << "Trying NVMM read" << std::endl;
// create input stream
videoOptions opt;
opt.width = 3264;
opt.height = 1848;
opt.frameRate = 28;
opt.zeroCopy = false; // GPU access only for better speed
opt.resource = "csi://0";
// videoSource * input = videoSource::Create("csi://0", opt);
gstCamera * input = gstCamera::Create(opt);
if (!input) {
std::cerr << "Error: Failed to create input stream" << std::endl;
exit(-1);
}
// Read one frame to get resolution
uchar3* image = NULL;
if( !input->Capture(&image, 1000) )
{
std::cerr << "Error: failed to capture first video frame" << std::endl;
delete input;
exit(3);
}
cv::cuda::GpuMat dummy_frame(input->GetHeight(), input->GetWidth(), CV_8UC3);
int i = 0;
std::chrono::high_resolution_clock::time_point start_time;
std::chrono::high_resolution_clock::time_point end_time;
std::chrono::microseconds duration;
while( 1 ){
// capture next image
if( !input->Capture(&image, 1000) ){
std::cerr << "Error: failed to capture video frame" << std::endl;
continue;
}
// Some OpenCv processing
start_time = std::chrono::high_resolution_clock::now();
cv::cuda::GpuMat frame_in(input->GetHeight(), input->GetWidth(), CV_8UC3, image);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
std::cout << "Upload duration: " << duration.count() << " us "<< std::endl;
start_time = std::chrono::high_resolution_clock::now();
cv::cuda::cvtColor(frame_in, dummy_frame, cv::COLOR_RGB2GRAY);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
std::cout << "CvtColor duration: " << duration.count() << " us "<< std::endl;
cv::Mat cpu_frame;
start_time = std::chrono::high_resolution_clock::now();
dummy_frame.download(cpu_frame);
end_time = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
std::cout << "Download duration: " << duration.count() << " us "<< std::endl;
cv::imwrite("gpu_frame.png", cpu_frame);
if( !input->IsStreaming() )
break;
if (i > 10)
break;
i++;
}
delete input;
}
The upload time is around ~2us right now comparing to my old test it was around ~25000us which means the image pointer is not copied because it was already in the memory that gpu can handle.