Reading CSI camera input directly to GPU memory

Okay now I found what I was looking for, actually @Honey_Patouceul the answer was one of your old posts :
gpu-acceleration-support-for-opencv-gstreamer-pipeline

So if someone encounters the same problem as I did, I wanted to contribute with a simple example as well. @dusty-nv’s jetson-utils library actually provides an easy use of capturing image and handling it inside the NVMM memory. (Just compile it with -DNVMM_ENABLE=1)

I also wrote a simple test script to check the actual result, this example for my csi camera. Using gstcamera of jetson-utils library allows you to handle the frame in cuda or even as cv::cuda::GpuMat.

void trying_jetsonutils(){

	std::cout << "Trying NVMM read" << std::endl;

	// create input stream
	videoOptions opt;
	opt.width  = 3264;
	opt.height = 1848;
	opt.frameRate = 28;
	opt.zeroCopy = false; // GPU access only for better speed
	opt.resource = "csi://0";
	// videoSource * input = videoSource::Create("csi://0", opt);
	gstCamera * input = gstCamera::Create(opt);
	if (!input) {
		std::cerr << "Error: Failed to create input stream" << std::endl;
		exit(-1);
	}

	// Read one frame to get resolution
	uchar3* image = NULL;
	if( !input->Capture(&image, 1000) )
	{
		std::cerr << "Error: failed to capture first video frame" << std::endl;
		delete input;
		exit(3);
	}

	
	cv::cuda::GpuMat dummy_frame(input->GetHeight(), input->GetWidth(), CV_8UC3);
	
	int i = 0;

	std::chrono::high_resolution_clock::time_point start_time;
	std::chrono::high_resolution_clock::time_point end_time;
	std::chrono::microseconds duration;

	while( 1 ){
		// capture next image
		if( !input->Capture(&image, 1000) ){
			std::cerr << "Error: failed to capture video frame" << std::endl;
			continue;
		}
		

		// Some OpenCv processing
		start_time = std::chrono::high_resolution_clock::now();
		cv::cuda::GpuMat frame_in(input->GetHeight(), input->GetWidth(), CV_8UC3, image);
		end_time = std::chrono::high_resolution_clock::now();
		duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
		std::cout << "Upload duration: " << duration.count() << " us "<< std::endl;
		
		start_time = std::chrono::high_resolution_clock::now();
		cv::cuda::cvtColor(frame_in, dummy_frame, cv::COLOR_RGB2GRAY);
		end_time = std::chrono::high_resolution_clock::now();
		duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
		std::cout << "CvtColor duration: " << duration.count() << " us "<< std::endl;

		cv::Mat cpu_frame;
		start_time = std::chrono::high_resolution_clock::now();
		dummy_frame.download(cpu_frame);
		end_time = std::chrono::high_resolution_clock::now();
		duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
		std::cout << "Download duration: " << duration.count() << " us "<< std::endl;

		cv::imwrite("gpu_frame.png", cpu_frame);
		if( !input->IsStreaming() )
			break;
		if (i > 10)
			break;
		i++;
	}

	delete input;
}

The upload time is around ~2us right now comparing to my old test it was around ~25000us which means the image pointer is not copied because it was already in the memory that gpu can handle.

2 Likes