GPU Acceleration Support for OpenCV Gstreamer Pipeline

Additional note: The main bottleneck is opencv videoio. Another alternative is to use @dusty_nv 's jetson-utils library having much more efficient implementation.
If you’ve built and installed jetson-inference, it should already be installed in your Jetson. Note that this assumes a recent version with various video sources support, so be sure you have a version pulled after end of June 2020.

The following example reads frames from CSI camera, creates an opencv GpuMat with received image, in GPU converts BGR into HSV, extracts H for applying a binary threshold, then converts back to RGB and finally displays the transformed frame:

#include <iostream>
#include <vector>

#include <jetson-utils/videoSource.h>
#include <jetson-utils/videoOutput.h>

#include <opencv2/opencv.hpp>
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudaimgproc.hpp" 


int main(int argc, char **argv) {

	// create input stream
	videoOptions opt;
	opt.width  = 3264;
	opt.height = 2464;
	opt.frameRate = 21;
	opt.zeroCopy = false; // GPU access only for better speed
	videoSource * input = videoSource::Create("csi://0", opt);
	if (!input) {
		std::cerr << "Error: Failed to create input stream" << std::endl;
		exit(-1);
	}


	// create output stream
	videoOutput* output = videoOutput::Create("display://0");
	if( !output ) {
		std::cerr << "Error: Failed to create output stream" << std::endl;
		delete input;
		exit(-2);
	}


	// Read one frame to get resolution
	uchar3* image = NULL;
	if( !input->Capture(&image, 1000) )
	{
		std::cerr << "Error: failed to capture first video frame" << std::endl;
		delete output;
		delete input;
		exit(3);
	}


	/*
	 * processing loop
	 */
	cv::cuda::GpuMat d_Mat_HSV(input->GetHeight(), input->GetWidth(), CV_8UC3);
	std::vector<cv::cuda::GpuMat> d_hsv(3);
	double prev = (double) cv::getTickCount();
	while( 1 )
	{
		// capture next image
		if( !input->Capture(&image, 1000) )
		{
			std::cerr << "Error: failed to capture video frame" << std::endl;
			continue;
		}
		// log timing
		double cur = (double) cv::getTickCount();
		double delta = (cur - prev) / cv::getTickFrequency();
		std::cout<<"delta=" << delta << std::endl;
		prev=cur;

		// Some OpenCv processing
		cv::cuda::GpuMat frame_in(input->GetHeight(), input->GetWidth(), CV_8UC3, image);
		cv::cuda::cvtColor(frame_in, d_Mat_HSV, cv::COLOR_RGB2HSV);
		cv::cuda::split(d_Mat_HSV, d_hsv);
		cv::cuda::threshold(d_hsv[0], d_hsv[0], 100, 255, cv::THRESH_BINARY);
		cv::cuda::merge(d_hsv, d_Mat_HSV);
		cv::cuda::cvtColor(d_Mat_HSV, frame_in, cv::COLOR_HSV2RGB);

		// Display result
		output->Render((uchar3*)frame_in.data, input->GetWidth(), input->GetHeight());
		if( !output->IsStreaming() )
			break;
		if( !input->IsStreaming() )
			break;
	}

	delete input;
	delete output;
   	return 0;
}

I built against opencv-4.4.0-pre installed in /usr/local/opencv-4.4.0-pre, so:

g++ -std=c++11 -Wall -I/usr/local/opencv-4.4.0-pre/include/opencv4 -I/usr/local/cuda/targets/aarch64-linux/include test-jetson-utils-opencv.cpp -L/usr/local/opencv-4.4.0-pre/lib -lopencv_core -lopencv_cudaarithm -lopencv_cudaimgproc -ljetson-utils -o test-jetson-utils-opencv

My camera can only run at 21fps with this resolution, but it seems to work fine.

5 Likes