hello,nvidia
Since I want to know the speed performance between float and int/long type operation in cuda. I made a test program(relese version) like this :
#include <opencv2/imgproc/imgproc.hpp>
#include “opencv2/core.hpp”
#include <opencv2/core/utility.hpp>
#include “opencv2/highgui.hpp”
#include “opencv2/cudaarithm.hpp”
#include “device_launch_parameters.h”
#include “cuda_runtime_api.h”
#include “helper_cuda.h”
#include <time.h>
#define mySIZE 10000
global void testFloat()
{
float sum = 0.f;
float tmp = 1.f;
for (int i = 0; i < mySIZE; i++)
{
sum += (tmp * i * 1.123f) / 2.f;
}
printf("================%f\n", sum);
}
global void testInt()
{
long long sum = 0;
long long tmp = 1;
for (int i = 0; i < mySIZE; i++)
{
sum += (tmp * i * 1.123f ) / 2.f;
}
printf("+++++++++++++++++%lld\n", sum);
}
int main()
{
time_t str = clock();
testFloat << <1, 1 >> > ();
cudaDeviceSynchronize();
time_t end = clock();
printf("float time%fms\n", (double)(end - str) / 1000.f);
time_t str1 = clock();
testInt << <1, 1 >> > ();
cudaDeviceSynchronize();
time_t end1 = clock();
printf("long time%fms\n", (double)(end1 - str1) / 1000.f);
return 0;
}
do 10000 times add operation including muliply & divsion. And in case of speed optimazition, I printed the result finally.
but I feel the way to test is not correct. the FLOAT fast than INT 200 times. did I test correctly?
and the result:
================28072190.000000
float time248.643000ms
+++++++++++++++++28070078
long time0.902000ms
nvidia card P4