I ran the following code, with CUDA support for OpenCV -
#include <iostream>
#include "opencv2/opencv.hpp"
using namespace cv;
using namespace std;
int main (int argc, char* argv[])
{
int64 work_begin = getTickCount();
cv::cuda::GpuMat d_result, d_img;
//open the Webcam
VideoCapture cap(0,0);
// if not success, exit program
if (cap.isOpened() == false)
{
cout << "Cannot open Webcam" << endl;
return -1;
}
//get the frames rate of the video from webcam
double frames_per_second = cap.get(CAP_PROP_FPS);
cout << "Frames per seconds : " << frames_per_second << endl;
cout<<"Press Q to Quit" <<endl;
String win_name = "Webcam Video";
namedWindow(win_name); //create a window
while (true)
{
Mat frame;
bool flag = cap.read(frame); // read a new frame from video
d_img.upload(frame);
cv::cuda::threshold(d_img, d_result, 128.0, 255.0, cv::THRESH_BINARY);
cv::Mat h_result;
d_result.download(h_result);
//show the frame in the created window
imshow(win_name, frame);
imshow("Binary image", h_result);
//Measure difference in time ticks
int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency();
//Measure frames per second
double work_fps = freq / delta;
std::cout <<"Performance of Thresholding on GPU: " <<std::endl;
std::cout <<"Time: " << (1/work_fps) <<std::endl;
std::cout <<"FPS: " <<work_fps <<std::endl;
if (waitKey(1) == 'q')
{
break;
}
}
return 0;
}
Based on the second output you shared, OpenCV does use GPU but does not fully utilize it.
You can find in some slots, the GR3D_FREQ score raised to 36~38%.
That’s because the really GPU implementation is only the cv::cuda::threshold(.) function.
However, there is a memory copy before (d_img.upload) and after (d_result.download) the CUDA task.
Thresholding is a relatively fast job so the data transfer might be the bottleneck of your use case.
Although the GPU accelerates the thresholding job, the memory copy is an extra cost to run a task on GPU.
That’s might be the reason you cannot see an obvious improvement on GPU.