Time measurement

I’ve been testing around with Windows7, VS2008 and Cuda Project Wizard 2.0 (posted in this forum).

I noticed a performance hint by using timers instead of cudaEvents (using the default simple.cu from

the wizard).

With the following code, I got an execution time between 0.22ms and 0.26ms:

/************************************************************

************/

/* HelloCUDA															*/

/************************************************************

************/

int main(int argc, char* argv[])

{

	if(!InitCUDA()) {

		return 0;

	}

	char	*device_result	= 0;

	char	host_result[12]	={0};

	CUDA_SAFE_CALL( cudaMalloc((void**) &device_result, sizeof(char) * 11));

	unsigned int timer = 0;

	CUT_SAFE_CALL( cutCreateTimer( &timer));

	CUT_SAFE_CALL( cutStartTimer( timer));

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

	CUT_CHECK_ERROR("Kernel execution failed\n");

	CUDA_SAFE_CALL( cudaThreadSynchronize() );

	CUT_SAFE_CALL( cutStopTimer( timer));

	printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));

	CUT_SAFE_CALL( cutDeleteTimer( timer));

	CUDA_SAFE_CALL( cudaMemcpy(&host_result, device_result, sizeof(char) * 11, cudaMemcpyDeviceToHost));

	printf("%s\n", host_result);

	CUDA_SAFE_CALL( cudaFree(device_result));

	CUT_EXIT(argc, argv);

	return 0;

}

With this code, I got an execution time between 0.11ms and 0.14ms:

/************************************************************

************/

/* HelloCUDA															*/

/************************************************************

************/

int main(int argc, char* argv[])

{

	if(!InitCUDA()) {

		return 0;

	}

	char	*device_result	= 0;

	char	host_result[12]	={0};

	CUDA_SAFE_CALL( cudaMalloc((void**) &device_result, sizeof(char) * 11));

	cudaEvent_t start;

	cudaEvent_t stop;

	float elapsed;

	cutilSafeCall(cudaEventCreate(&start));

	cutilSafeCall(cudaEventCreate(&stop));

	cudaEventRecord(start, 0);

	// Invoke kernel

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

	cutilCheckMsg("Kernel invocation failed");

	// Measure time

	cudaEventRecord(stop, 0);

	cudaEventSynchronize(stop);

	cudaEventElapsedTime(&elapsed, start, stop);

	printf("Processing time: %f (ms)\n", elapsed);

	cudaEventDestroy(start);

	cudaEventDestroy(stop);

	CUDA_SAFE_CALL( cudaMemcpy(&host_result, device_result, sizeof(char) * 11, cudaMemcpyDeviceToHost));

	printf("%s\n", host_result);

	CUDA_SAFE_CALL( cudaFree(device_result));

	CUT_EXIT(argc, argv);

	return 0;

}

Whats reason for this performance hint? Both codes are based Cuda 2.2.

[quote name=‘kdahm666’ post=‘587341’ date=‘Sep 12 2009, 01:47 PM’]

I’ve been testing around with Windows7, VS2008 and Cuda Project Wizard 2.0 (posted in this forum).

I noticed a performance hint by using timers instead of cudaEvents (using the default simple.cu from

the wizard).

With the following code, I got an execution time between 0.22ms and 0.26ms:

[codebox]// remove warmup time
for (int i = 0; i < numIterations; ++i){

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

}

cudaThreadSynchronize();

// evaluate performance of kernel function in average sense

 cutStartTimer(timer);

for (int i = 0; i < numIterations; ++i){

	HelloCUDA<<<1, 1, 0>>>(device_result, 11);

}	 

cudaThreadSynchronize();

cutStopTimer(timer);

naiveTime = cutGetTimerValue(timer);

printf(“GPU time: %0.3f ms\n”, naiveTime / numIterations);[/codebox]

Ok I’ll test it.