Benchmark data of Few OpenCV funcation

Hi,

I am planning to use JETSON TX1 Board for my machine vision application.

Can anybody help me to give process time on attached image for following code function on JETSON TX1 using CUDA (GPU) and ARM NEON.

[/ TestOpenCv.cpp : Defines the entry point for the console application.
//

#include “stdafx.h”

cv::Mat intrinsic = cv::Mat(3, 3, CV_32FC1);
cv::Mat distcoeffs;

bool generateBarrelTemplate(cv::Mat &img, int board_w, int board_h){
cv::Mat gray;
std::vectorcv::Point2f corners;
std::vectorcv::Point3f obj;
std::vector<std::vectorcv::Point3f > object_points;
std::vector<std::vectorcv::Point2f > image_points;
std::vectorcv::Mat rvecs, tvecs;
int board_n = board_w * board_h;
for (int j = 0; j<board_n; j++)
{
obj.push_back(cv::Point3f((float)j / board_w, j%board_w, 0.0f));
}

cv::cvtColor(img, gray, CV_BGR2GRAY);

cv::Size board_sz = cv::Size(board_w, board_h);

bool found = cv::findChessboardCorners(gray, board_sz, corners, CV_CALIB_CB_ADAPTIVE_THRESH | CV_CALIB_CB_FILTER_QUADS);
if (!found){
	gray.release();
	if (corners.size() > 0){
		corners.clear();
	}
	return false;
}

object_points.push_back(obj);
image_points.push_back(corners);

cv::calibrateCamera(object_points, image_points, gray.size(), intrinsic, distcoeffs, rvecs, tvecs);

//Releasing memory
for (unsigned int i = 0; i < rvecs.size(); i++){
	rvecs[i].release();
}
rvecs.clear();
for (unsigned int i = 0; i < tvecs.size(); i++){
	tvecs[i].release();
}
tvecs.clear();

for (unsigned int i = 0; i < object_points.size(); i++){
	object_points[i].clear();
}
object_points.clear();
for (unsigned int i = 0; i < image_points.size(); i++){
	image_points[i].clear();
}
image_points.clear();
corners.clear();
obj.clear();
gray.release();
return true;

}

int _tmain(int argc, _TCHAR* argv)
{
//Image Size (1920 X 1200 pixels)
cv::Mat img = cv::imread(“chessboard3.bmp”);
cv::Mat cpu_img, cpu_filter, cpu_img2;
img.copyTo(cpu_filter);
img.copyTo(cpu_img);
img.copyTo(cpu_img2);

generateBarrelTemplate(img, 9, 6);

cv::cvtColor(img, cpu_img, CV_BGR2HSV);

int64 st_barrel = cv::getTickCount();
cv::undistort(cpu_img, cpu_filter, intrinsic, distcoeffs);
int64 ed_barrel = cv::getTickCount();
printf("\n barrel Time : %f", ((ed_barrel - st_barrel) / cv::getTickFrequency()) * 1000);

int64 st_hsl = cv::getTickCount();
cv::cvtColor(cpu_img2, cpu_filter, CV_BGR2HSV);
int64 ed_hsl = cv::getTickCount();
printf("\n cvt Time : %f", ((ed_hsl - st_hsl) / cv::getTickFrequency()) * 1000);

int64 st_filter = cv::getTickCount();
cv::bilateralFilter(cpu_img, cpu_filter, 10, 20, 30);
int64 ed_filter = cv::getTickCount();
printf("\n filter Time : %f", ((ed_filter - st_filter) / cv::getTickFrequency()) * 1000);

cv::imshow("processed_image", cpu_filter);
cv::waitKey(0);
return 0;

}

][/code]

chessboard3.bmp (6.59 MB)

This is how long it took

barrel Time : 185.057589
 cvt Time : 4.744276
 filter Time : 428.06142

But why are you not using any GPU-accelerated functions? The TX1 has a pretty powerful CPU, but that’s not really it’s selling point.