Thank you for your advice. Here is what I do:
[codebox]////////////////////////////////////////////////////////////////////////////////
// Launch the CUDA kernels to fill in the texture data
////////////////////////////////////////////////////////////////////////////////
void Render()
{
// Draw the text window
DrawTextWin();
//
// map the resources we've registered so we can access them in CUDA
//
CUDA_SAFE_CALL( cudaD3D9MapResources(1, (IDirect3DResource9 **)&g_pTexture) );
CUDA_SAFE_CALL( cudaD3D9MapResources(1, (IDirect3DResource9 **)&g_pText) );
////
//// run the kernel which will populate the contents of that texture
////
void *pCGH;
CUDA_SAFE_CALL( cudaD3D9ResourceGetMappedPointer(&pCGH, g_pTexture, 0, 0) );
size_t pitchCGH = 0;
CUDA_SAFE_CALL( cudaD3D9ResourceGetMappedPitch(&pitchCGH, NULL, g_pTexture, 0, 0) );
void *pText;
CUDA_SAFE_CALL( cudaD3D9ResourceGetMappedPointer(&pText, g_pText, 0, 0) );
size_t pitchText = 0, size = 0;
CUDA_SAFE_CALL( cudaD3D9ResourceGetMappedPitch(&pitchText, NULL, g_pText, 0, 0) );
CUDA_SAFE_CALL( cudaD3D9ResourceGetMappedSize(&size, g_pText, 0, 0) );
// Start timer
CUT_SAFE_CALL ( cutResetTimer(g_handles.hTimer) );
CUT_SAFE_CALL ( cutStartTimer(g_handles.hTimer) );
//
// Calculate the CGH
//
calcCGH(pText, size, pitchText, pCGH, pitchCGH);
// Display time elapsed
CUDA_SAFE_CALL( cudaThreadSynchronize() );
CUT_SAFE_CALL( cutStopTimer(g_handles.hTimer) );
double gpuTime = cutGetTimerValue(g_handles.hTimer);
fprintf(stderr,"GPU time: %f msecs.\n", gpuTime);
//
// unmap the resources
//
CUDA_SAFE_CALL( cudaD3D9UnmapResources(1, (IDirect3DResource9 **)&g_pTexture) );
CUDA_SAFE_CALL( cudaD3D9UnmapResources(1, (IDirect3DResource9 **)&g_pText) );
//
// draw the scene using them
//
DrawCGH();
// Compute FPS
computeFPS();
}[/codebox]
First I draw a window with some text, then I map some D3D resources, then I start the timer and calculate the hologram. calcCGH is the CUDA kernel which sints on an external .cu file. When that is finished, I stop the timer, unmap the D3D resources and draw the hologram.
The timings for WinXP are:
GPU time: 3.567568 msecs.
GPU time: 3.582057 msecs.
GPU time: 3.636186 msecs.
GPU time: 3.579191 msecs.
GPU time: 3.591956 msecs.
…
And for Vista:
GPU time: 9.400078 msecs.
GPU time: 9.366274 msecs.
GPU time: 9.629995 msecs.
GPU time: 9.704026 msecs.
GPU time: 9.291684 msecs.
…
Any advice?