Time measurement

kdahm666 · September 12, 2009, 9:47pm

I’ve been testing around with Windows7, VS2008 and Cuda Project Wizard 2.0 (posted in this forum).

I noticed a performance hint by using timers instead of cudaEvents (using the default simple.cu from

the wizard).

With the following code, I got an execution time between 0.22ms and 0.26ms:

/************************************************************

************/

/* HelloCUDA															*/

/************************************************************

************/

int main(int argc, char* argv[])

{

	if(!InitCUDA()) {

		return 0;

	}

	char	*device_result	= 0;

	char	host_result[12]	={0};

	CUDA_SAFE_CALL( cudaMalloc((void**) &device_result, sizeof(char) * 11));

	unsigned int timer = 0;

	CUT_SAFE_CALL( cutCreateTimer( &timer));

	CUT_SAFE_CALL( cutStartTimer( timer));

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

	CUT_CHECK_ERROR("Kernel execution failed\n");

	CUDA_SAFE_CALL( cudaThreadSynchronize() );

	CUT_SAFE_CALL( cutStopTimer( timer));

	printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));

	CUT_SAFE_CALL( cutDeleteTimer( timer));

	CUDA_SAFE_CALL( cudaMemcpy(&host_result, device_result, sizeof(char) * 11, cudaMemcpyDeviceToHost));

	printf("%s\n", host_result);

	CUDA_SAFE_CALL( cudaFree(device_result));

	CUT_EXIT(argc, argv);

	return 0;

}

With this code, I got an execution time between 0.11ms and 0.14ms:

/************************************************************

************/

/* HelloCUDA															*/

/************************************************************

************/

int main(int argc, char* argv[])

{

	if(!InitCUDA()) {

		return 0;

	}

	char	*device_result	= 0;

	char	host_result[12]	={0};

	CUDA_SAFE_CALL( cudaMalloc((void**) &device_result, sizeof(char) * 11));

	cudaEvent_t start;

	cudaEvent_t stop;

	float elapsed;

	cutilSafeCall(cudaEventCreate(&start));

	cutilSafeCall(cudaEventCreate(&stop));

	cudaEventRecord(start, 0);

	// Invoke kernel

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

	cutilCheckMsg("Kernel invocation failed");

	// Measure time

	cudaEventRecord(stop, 0);

	cudaEventSynchronize(stop);

	cudaEventElapsedTime(&elapsed, start, stop);

	printf("Processing time: %f (ms)\n", elapsed);

	cudaEventDestroy(start);

	cudaEventDestroy(stop);

	CUDA_SAFE_CALL( cudaMemcpy(&host_result, device_result, sizeof(char) * 11, cudaMemcpyDeviceToHost));

	printf("%s\n", host_result);

	CUDA_SAFE_CALL( cudaFree(device_result));

	CUT_EXIT(argc, argv);

	return 0;

}

Whats reason for this performance hint? Both codes are based Cuda 2.2.

LSChien · September 13, 2009, 5:22am

[quote name=‘kdahm666’ post=‘587341’ date=‘Sep 12 2009, 01:47 PM’]

I’ve been testing around with Windows7, VS2008 and Cuda Project Wizard 2.0 (posted in this forum).

I noticed a performance hint by using timers instead of cudaEvents (using the default simple.cu from

the wizard).

With the following code, I got an execution time between 0.22ms and 0.26ms:

[codebox]// remove warmup time

for (int i = 0; i < numIterations; ++i){

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

}

cudaThreadSynchronize();

// evaluate performance of kernel function in average sense

 cutStartTimer(timer);

for (int i = 0; i < numIterations; ++i){

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

}

cudaThreadSynchronize();

cutStopTimer(timer);

naiveTime = cutGetTimerValue(timer);

printf(“GPU time: %0.3f ms\n”, naiveTime / numIterations);[/codebox]

kdahm666 · September 13, 2009, 8:22am

first you should remove warmup time and then evaluate in average sense since your kernel time is sub-minisecond

[codebox]// remove warmup time
for (int i = 0; i < numIterations; ++i){

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

}

cudaThreadSynchronize();
// evaluate performance of kernel function in average sense
 cutStartTimer(timer);

for (int i = 0; i < numIterations; ++i){

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

}	 
cudaThreadSynchronize();

cutStopTimer(timer);

naiveTime = cutGetTimerValue(timer);

printf(“GPU time: %0.3f ms\n”, naiveTime / numIterations);[/codebox]

Ok I’ll test it.

Topic		Replies	Views
Events vs Timers - big differences measurung kernel execution time CUDA Programming and Performance	0	3849	December 20, 2010
Events vs Timers - big differences measurung kernel execution time CUDA Programming and Performance	7	2233	December 21, 2010
Timer&Event CUDA Programming and Performance	3	3650	December 1, 2009
Timing cuda code I'm sorry for small for dÃ©ja-vu :-) CUDA Programming and Performance	12	36101	July 12, 2011
Mesuring Kernel Performance CUDA Programming and Performance	3	1137	September 29, 2009
how to evaluate the CUDA's performance how can i know the program is optimazed CUDA Programming and Performance	7	7418	July 24, 2008
timing performance of kernels how ? cudaprof vs cudaEventRecord vs cutStartTimer CUDA Programming and Performance	3	5360	March 21, 2009
Different GPU execution times using cuda events and cudaprof CUDA Programming and Performance	0	948	January 21, 2011
Compare Execution Times CPU vs GPU the proper way? CUDA Programming and Performance	5	6222	September 8, 2009
calculating execution time CUDA Programming and Performance	4	5622	June 22, 2009

Time measurement

Related topics