How to get performance of Video processing application

Hi all,
It is all in the title. I want to test the performance of my video processing algorithm on CPU and GPU. I hae alreay developed my code on CPU and GPU and it works fine. Now I want to generate the results to do a comparison in name of frame per second (FPS) and GFLOPS.
Any help please
Thanks in advance

Hi k.sehairi,

You can take a look at https://devblogs.nvidia.com/how-implement-performance-metrics-cuda-cc/. Beside, there are several samples under ~/NVIDIA_CUDA-9.0_Samples/ on Jetson Platform after installing JetPack.

Hi vickyy,
Thanks for your reply
when I try to use #include <cuda_runtime.h> I get fatal error : cuda_runtime.h: No such file or directory
This is my code. Any help is very appreciated
Thanks in advance

// Background subtraction with Background=empty scene
    #include <opencv2/core/core.hpp>
    #include <opencv2/imgproc/imgproc.hpp>
    #include <opencv2/highgui/highgui.hpp>
    #include <opencv2/video/video.hpp>
    #include <opencv2/video/background_segm.hpp>
    #include "opencv2/gpu/gpu.hpp"
    #include "opencv2/opencv.hpp"

    #include <iostream>
    #include <sstream>
    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    #include <math.h>
    #include <GL/glew.h>
    #include <GL/freeglut.h>

    #include <stdio.h>
    #include <string>

// CUDA utilities and system includes
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>

#include </usr/local/cuda-6.5/targets/armv7-linux-gnueabihf/include/helper_cuda.h>       // CUDA device initialization helper functions
#include <helper_cuda_gl.h>    // CUDA device + OpenGL initialization functions

// Shared Library Test Functions
#include <helper_functions.h>  // CUDA SDK Helper functions
#include <helper_cuda.h>      // includes for cuda initialization and error checking


//using namespace cv;
using namespace std;

int fpsCount = 0;        // FPS count for averaging
int fpsLimit = 1;        // FPS limit for sampling
unsigned int frameCount = 0;

void computeFPS()
{
    frameCount++;
    fpsCount++;

    if (fpsCount == fpsLimit)
    {
        char fps[256];
        float ifps = 1.0f / (sdkGetAverageTimerValue(&timer) / 1000.0f);
        sprintf(fps, "Background subtraction: %3.f fps ",ifps);

        glutSetWindowTitle(fps);
        fpsCount = 0;
        fpsLimit = (int)MAX(ifps, 1.0f);

        sdkResetTimer(&timer);
    }

}


int main()
{
cv::Mat frame,threshCPU;
cv::Mat backgr = cv::imread("/home/ubuntu/KouidriRakhrour/MD0/background720576.jpg"); 	
cv::VideoCapture input("/home/ubuntu/KouidriRakhrour/MD0/768x576.avi");


if (backgr.empty())
{
    std::cerr << "Can't load image";
    exit(-1);
}

    if (!input.isOpened()) {
    fprintf(stderr, "Failed to open input capture.\n");
    exit(-1);
    }

cv::gpu::GpuMat d_src(backgr); 
cv::gpu::GpuMat frame0,img_prev, gray_img, thresh, frameDelta;


//convert to grayscale and set the first frame
cv::gpu::cvtColor(d_src, img_prev, CV_BGR2GRAY);
// Apply Gaussian blur filter
cv::gpu::GaussianBlur(img_prev, img_prev, cv::Size(5, 5), 0);


    cv::VideoWriter output("768x576MD.avi",CV_FOURCC('X','V','I','D'),30,cv::Size(input.get(CV_CAP_PROP_FRAME_WIDTH),input.get(CV_CAP_PROP_FRAME_HEIGHT)),0);

	while(input.read(frame)) 
		{
                cv::gpu::GpuMat frame0(frame); // transfer the empty image from CPU memory to GPU memory
		//convert to grayscale
		cv::gpu::cvtColor(frame0, gray_img, CV_BGR2GRAY);
		cv::gpu::GaussianBlur(gray_img, gray_img, cv::Size(5, 5), 0);
		//compute difference between first frame and current frame
		cv::gpu::absdiff(img_prev, gray_img, frameDelta);
		cv::gpu::threshold(frameDelta, thresh, 25, 255, cv::THRESH_BINARY);

		cv::Mat threshCPU(thresh);

		output.write(threshCPU);

		cv::putText(threshCPU, "Motion Detected", cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(255,255,255),2);

		cv::imshow("Camera", threshCPU);
                sdkStopTimer(&timer);
                computeFPS();
                char c=cv::waitKey(1);
		if(c == 27)
			{
			//exit if ESC is pressed
			break;
			}
		}
}

You may compile with flag -I/usr/local/cuda/include and link with -L/usr/local/cuda/lib64 -lcudart.
You may also have to set LD_LIBRARY_PATH to have /usr/local/cuda/lib64 in shell before execution.

Hi Honey_Patouceul,
I tried Cuda samples and they works fine.
I have always this error: fatal error: cuda_runtime.h: No such file or directory #include <cuda_runtime.h>.
Honestly I don’t know how to compile with flags. I use Cmake… make with CMakeLists under ubuntu 14.04 LTS
whene I run Cmake… I see that the CUDA is well installed
– The C compiler identification is GNU 4.8.4
– The CXX compiler identification is GNU 4.8.4
– Check for working C compiler: /usr/bin/cc
– Check for working C compiler: /usr/bin/cc – works
– Detecting C compiler ABI info
– Detecting C compiler ABI info - done
– Check for working CXX compiler: /usr/bin/c++
– Check for working CXX compiler: /usr/bin/c++ – works
– Detecting CXX compiler ABI info
– Detecting CXX compiler ABI info - done
– Found CUDA: /usr/local/cuda-6.5 (found suitable exact version “6.5”)
– Configuring done
– Generating done
– Build files have been written to: /home/ubuntu/MyExamples/MD0GPUper/build

my CMakeLists contains these lines, the name of my code is backgro0.cpp and I renamed the generated program cv_backgr0

cmake_minimum_required (VERSION 2.8)
project(backgr0)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(cv_backgr0 backgr0.cpp)
target_link_libraries(cv_backgr0 ${OpenCV_LIBS})

Should I change anything in my CMakeLists or my program.
Any help is greatly appreciated
Thanks in advance

You may try something like this (sorry, I’m on a TX2 so cuda version and paths maybe different):

cmake_minimum_required (VERSION 2.8)
project(backgr0)
find_package(OpenCV REQUIRED)
find_package(CUDA REQUIRED)
find_package(GLUT REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} "/usr/local/cuda/samples/common/inc")

add_executable(cv_backgr0 backgr0.cpp)
target_link_libraries(cv_backgr0 ${OpenCV_LIBS} ${GLUT_LIBRARY})

You also need to declare the timer:

StopWatchInterface *timer = NULL;

Hi
thank you Honey_Patouceul for your help
I added also the initGL initialisation function and the instruction StopWatchInterface *timer = NULL;

void initGL(int *argc, char **argv)
{
    glutInit(argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);

    glewInit();

    /*if (!glewIsSupported("GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object"))
    {
        fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
        fprintf(stderr, "This sample requires:\n");
        fprintf(stderr, "  OpenGL version 1.5\n");
        fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
        fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
        exit(EXIT_FAILURE);
    }*/
}

It compiles correctly but when comes to the executable it generates this error:
In function `initGL(int*, char**)’:


backgr0.cpp:(.text+0x6ec): undefined reference to `glewInit’
collect2: error: ld returned 1 exit status
make[2]: *** [cv_backgr0] Error 1
make[1]: *** [CMakeFiles/cv_backgr0.dir/all] Error 2
make: *** [all] Error 2


it is always linked to the initialization of glut function
Thanks in advance for any help

So you would probably try this one as you also need GLEW now:

cmake_minimum_required (VERSION 2.8)
project(backgr0)
find_package(OpenCV REQUIRED)
find_package(CUDA REQUIRED)
find_package(GLUT REQUIRED)
find_package(GLEW REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} "/usr/local/cuda/samples/common/inc")

add_executable(cv_backgr0 backgr0.cpp)
target_link_libraries(cv_backgr0 ${OpenCV_LIBS} ${GLUT_LIBRARY} ${GLEW_LIBRARY})

Hi Honey_Patouceul,
Sorry for the late reply. I was traveling.
I managed somehow to generate the result in FPS using the code below

cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);

cudaEventRecord(start);

// your code 

                        if (frameCount==1700)
		        {
				cudaEventRecord(stop);
				cudaEventSynchronize(stop);
				float milliseconds = 0;
				cudaEventElapsedTime(&milliseconds, start, stop);
				fps=float(frameCount)/(milliseconds/1000);
				FILE *f = fopen("result.txt", "w");
				fprintf(f, "frame per second: %3.2f fps \n elapsed time: %3.2f ms", fps, milliseconds);
		        }

The video used has 1700 frames and the result is stored in .txt file.
For CPU time, I used this code:

clock_t cpu_startTime, cpu_endTime;
double cpu_ElapseTime=0;
cpu_startTime = clock();

// your code 	
                        if (frameCount==1700) // 
			{
                                cpu_endTime = clock();
				cpu_ElapseTime = ((cpu_endTime - cpu_startTime)/(float)CLOCKS_PER_SEC);
				fps=float(frameCount)/(cpu_ElapseTime);
				FILE *f = fopen("result.txt", "w");
				fprintf(f, "frame per second: %3.2f fps \n elapsed time: %3.2f s", fps, cpu_ElapseTime);
			}

I don’t know if it accurate but the results seems logic for me, can somebody comfirm that?
I tried also CPUtiming using myCPUTimer(); but I get this error ‘‘myCPUTimer’ was not declared in this scope’.

Now, I want to show the fps (for each frame) along with the result window as shown in CUDA imaging samples. I tried to modify my code and use OpenGL but I found many difficulties to display the result in existing OpenGL window, this is my code can any one help, please:

#define MAX_EPSILON_ERROR 5.0f
#define REFRESH_DELAY     10 //ms


static int wWidth   = 768; // Window width
static int wHeight  = 576; // Window height
static int imWidth  = 0;   // Image width
static int imHeight = 0;   // Image height

// Code to handle Auto verification
const int frameCheckNumber = 4;
int fpsCount = 0;      // FPS count for averaging
int fpsLimit = 1;      // FPS limit for sampling
unsigned int frameCount = 0;
unsigned int g_TotalErrors = 0;
StopWatchInterface *timer = NULL;
unsigned int g_Bpp;
unsigned int g_Index = 0;

bool g_bQAReadback = false;

// Display Data
static GLuint pbo_buffer = 0;  // Front and back CA buffers
struct cudaGraphicsResource *cuda_pbo_resource; // CUDA Graphics Resource (to transfer PBO)

static GLuint texid = 0;       // Texture for display
unsigned char *pixels = NULL;  // Image pixel data on the host
float imageScale = 1.f;        // Image exposure

int *pArgc   = NULL;
char **pArgv = NULL;


#define OFFSET(i) ((char *)NULL + (i))


//using namespace cv;
using namespace std;

void computeFPS()
{
    frameCount++;
    fpsCount++;

    if (fpsCount == fpsLimit)
    {
        char fps[256];
        float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
        sprintf(fps, "CUDA motion Detection (%s): %3.1f fps", "motion Detection", ifps);

        glutSetWindowTitle(fps);
        fpsCount = 0;

        sdkResetTimer(&timer);
    }
}

void timerEvent(int value)
{
    if(glutGetWindow())
    {
        glutPostRedisplay();
        glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
    }
}


void cleanup(void)
{
    cudaGraphicsUnregisterResource(cuda_pbo_resource);

    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
    glDeleteBuffers(1, &pbo_buffer);
    //glDeleteTextures(1, &texid);
    //deleteTexture();

    sdkDeleteTimer(&timer);

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
}


void initGL(int *argc, char **argv)
{
    glutInit(argc, argv);  // Initialiaze GLUT
    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); // set RGBA format and two framebuffer. One is used for drawing, the other one for display.
    glutInitWindowSize(wWidth, wHeight);
    glutCreateWindow("CUDA motion Detection");

    glewInit(); // Initialiaze OpenGL Extension Wrangler Library (provides efficient run-time mechanisms for determining which OpenGL extensions are supported on the target platform)

    if (!glewIsSupported("GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object"))
    {
        fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
        fprintf(stderr, "This sample requires:\n");
        fprintf(stderr, "  OpenGL version 1.5\n");
        fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
        fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
        exit(EXIT_FAILURE);
    }
}


int main(int argc, char **argv)
{




    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    initGL(&argc, argv);
    cudaGLSetGLDevice(gpuGetMaxGflopsDeviceId()); // sets a CUDA device to use OpenGL interoperability.

    sdkCreateTimer(&timer);
    sdkResetTimer(&timer);

    sdkStartTimer(&timer);


cv::Mat frame,threshCPU;
cv::Mat backgr = cv::imread("/home/ubuntu/KouidriRakhrour/MD0/background720576.jpg"); 	
cv::VideoCapture input("/home/ubuntu/KouidriRakhrour/MD0/768x576.avi");

cv::gpu::GpuMat d_src(backgr); 
cv::gpu::GpuMat frame0,img_prev, gray_img, thresh, frameDelta;


cv::gpu::cvtColor(d_src, img_prev, CV_BGR2GRAY);
cv::gpu::GaussianBlur(img_prev, img_prev, cv::Size(5, 5), 0);


    cv::VideoWriter output("768x576MD.avi",CV_FOURCC('X','V','I','D'),30,cv::Size(input.get(CV_CAP_PROP_FRAME_WIDTH),input.get(CV_CAP_PROP_FRAME_HEIGHT)),0);

	while(input.read(frame)) 
		{
                cv::gpu::GpuMat frame0(frame); // transfer the empty image from CPU memory to GPU memory
		cv::gpu::cvtColor(frame0, gray_img, CV_BGR2GRAY);
		cv::gpu::GaussianBlur(gray_img, gray_img, cv::Size(5, 5), 0);
		cv::gpu::absdiff(img_prev, gray_img, frameDelta);
		cv::gpu::threshold(frameDelta, thresh, 25, 255, cv::THRESH_BINARY);

		cv::Mat threshCPU(thresh);
		output.write(threshCPU);
		cv::imshow("CUDA motion Detection", threshCPU);

                glutSwapBuffers();
                sdkStopTimer(&timer);
                computeFPS();

                glutCloseFunc(cleanup);
                glutTimerFunc(REFRESH_DELAY, timerEvent,0);
                //glutMainLoop();
                char c=cv::waitKey(1);
		if(c == 27)
			{
			//exit if ESC is pressed
			break;
			}
		}
}

Hi Kamal,
Please go to http://answers.opencv.org/questions/
Not sure but probably there is existing implementation in OpenCV.

Hi DaneLLL,
Thanks for your reply,
I will try to find a solution, the problem is with the display function
It seems that I am using two display mode the OpenGL (Glut) and the openCV functions
I don’t know how to display the results in the previously Glut window.