Hi,
I tried to lauch the kernel of the sample simpleTexture in a host thread, the thread code is
float angle = 0.5f; // angle to rotate image by (in radians)
float* d_data = NULL;
unsigned int width, height;
cudaArray* cu_array;
DWORD WINAPI Thread_Proc(LPVOID para)
{
CUDA_SAFE_CALL(cudaSetDevice(0));
cudaBindTexture( tex, cu_array);
dim3 dimBlock(8, 8, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
transformKernel<<< dimGrid, dimBlock, 0 >>>( d_data, width, height, angle);
CUT_CHECK_ERROR("Kernel execution failed");
cudaUnbindTexture( tex);
}
The global variables and memory is initialized by the main thread as done in the sample.
CUT_CHECK_ERROR report a “unspecified driver error” in the debug mode. The EmuDebug mode work well.
Any idea for this?