I looked carefully at Sarnath’s recent discussion of using arrays of pointers which made perfect sense to me before I made this post but for some reason cudaFree in the following code is returning “unknown error”. This is simple code that just zeros out an array of float arrays. I feel like I must be missing something simple but I don’t see why cudaFree is complaining. The code throws no other errors and of course it runs fine with emulation.
Thanks for any help.
- Richard
[ Host code ]
extern "C" void ciZeroFloat( float *arrays[], int numArrays, int size ) {
const int xBlockDim = 128;
const int yBlockDim = 1;
int yGridBlocks = numArrays;
int xGridBlocks;
cudaStream_t thisStream;
cudaStreamCreate( &thisStream );
dim3 dimBlock(xBlockDim,yBlockDim);
if (size % xBlockDim == 0) xGridBlocks = (size)/xBlockDim;
else xGridBlocks = (size)/xBlockDim + 1;
dim3 dimGrid(xGridBlocks,yGridBlocks);
float** d_fPtrArray;
cutilSafeCall( cudaMalloc( (void**) &d_fPtrArray, numArrays*sizeof( float* ) ) );
for (int i=0; i<numArrays; i++) d_fPtrArray[i] = arrays[i];
kZeroFloat<<< dimGrid, dimBlock, 0, thisStream >>> ( d_fPtrArray, numArrays, size );
cutilSafeCall( cudaStreamDestroy(thisStream) );
cutilSafeCall( cudaFree( d_fPtrArray ) ); // returns -- unknown error
}
[Device code]
__global__ void
kZeroFloat( float *arrays[], int numArrays, int size ) {
int ndx = threadIdx.x + __mul24(blockIdx.x,blockDim.x);
int arrNum = blockIdx.y;
__syncthreads(); // no change with our without __syncthreads()
if (blockIdx.y < numArrays && ndx < size) {
arrays[blockIdx.y][ndx] = 0.0f;
}
}