181.22 for Windows Vista x64
Toolkit and SDK v2.1 x64
Now, here’s an excerpt from the code where the error occurs (see the clarification at the bottom for library explanation):
[codebox]
double time;
Vector3<T> * plCharges;
size = steps*n*fieldLines.GetElemSize();
CUDA_SAFE_CALL(cudaMallocHost((void**) &plCharges, size));
QueryPerformanceCounter(&lstart);
CUDA_SAFE_CALL(cudaMemcpy(plCharges, fieldLines.GetDataPointer(), size, cudaMemcpyHostToHost));
QueryPerformanceCounter(&lend);
time = (double)(lend.QuadPart - lstart.QuadPart) / freq;
printf("\n memcpyHTH1 time: %.3f s\t batch: %.3f MB", time, (double)size/1024/1024);
printf("\n Bandwidth:\t %.3f MB/s\n", (double)size/time/1024/1024);
compSize = (fieldLines.GetElemSize()*2)/3;
size = steps*n*compSize;
CUDA_SAFE_CALL(cudaMalloc((void**) &coalVec.xyInterleaved, size));
QueryPerformanceCounter(&lstart);
/*CUDA_SAFE_CALL(cudaMemcpy2D(coalVec.xyInterleaved, compSize,
fieldLines.GetDataPointer(), fieldLines.GetElemSize(),
compSize, steps*n,
cudaMemcpyHostToDevice));
*/// Original non-paged to device copy - This does not fail
CUDA_SAFE_CALL(cudaMemcpy2D(coalVec.xyInterleaved, compSize,
plCharges, fieldLines.GetElemSize(),
compSize, steps*n,
cudaMemcpyHostToDevice));
QueryPerformanceCounter(&lend);
time = (double)(lend.QuadPart - lstart.QuadPart) / freq;
printf("\n memcpyHTD1 time: %.3f s\t batch: %.3f MB", time, (double)size/1024/1024);
printf("\n Bandwidth:\t %.3f MB/s\n", (double)size/time/1024/1024);
compSize/=2;
size = steps*n*compSize;
//printf(cudaGetErrorString(cudaGetLastError())); // When commented out, this printf reveals "no error"
// This call will fail with "unknown error", but even if this is placed above memcpy2D, the next cuda*() call will fail with "unknown error"
CUDA_SAFE_CALL(cudaMalloc((void**) &coalVec.z, size));
[/codebox]
I think the names are self-explanatory, but I will provide a brief explanation of what is going on:
Vector3 is a template of form {T x, y, z;};
fieldLines is a smart array of Vector3
‘n’ is the width of the array
‘steps’ is the height of the array
GetDataPointer() returns the memory location of the actual data array
GetElemSize() returns the size in bytes of an element of the array, in this case, the size of a Vector3
T is of type float
EDIT (for Linux and UNIX users): QueryPerformanceCounter() is used to time the memory copy.