Hy,
I’m sorry about my bad english, I’m french.
I have a problem with shared memory. I want to give to a device function an array allocate in shared memory but I don’t know how to do it.
I make something like that :
__device__ void myDeviceFunction(float sdata[], int tid)
{
sdata[tid] = tid;
}
__global__ void myGlobalFunction(float *d_data)
{
__shared__ float sdata[];
int globalId = threadIdx.x + blockIdx.x * blockDim.x;
sadata[threadIdx.x] = 0;
myDeviceFunction(sdata, threadIdx.x);
d_data[globalId] = sdata[threadIdx.x];
}
void myHostFunction()
{
float *d_data;
dim3 blockSize(256);
dim3 gridSize(512);
int smem = 256 * sizeof(float);
cudaMalloc( (void **)&d_data, 256 * 512 * sizeof(float) );
myGlobalFunction<<< gridSize, blockSize, smem >>>(d_data);
cudaFree( d_data );
}
but sdata still contains 0 after the launch of myDeviceFunction.
I hope I’m clear.
Thanks for your help.