Shared memory How to give it to a __device__ function

Hy,

I’m sorry about my bad english, I’m french.

I have a problem with shared memory. I want to give to a device function an array allocate in shared memory but I don’t know how to do it.

I make something like that :

__device__ void myDeviceFunction(float sdata[], int tid)

{

		sdata[tid] = tid;

}

__global__ void myGlobalFunction(float *d_data)

{

		__shared__ float sdata[];

	   

		int globalId = threadIdx.x + blockIdx.x * blockDim.x;

		sadata[threadIdx.x] = 0;

		myDeviceFunction(sdata, threadIdx.x);

		d_data[globalId] = sdata[threadIdx.x];

}

void myHostFunction()

{

	  float *d_data;

	  dim3 blockSize(256);

	  dim3 gridSize(512);

	  int smem = 256 * sizeof(float);

	  cudaMalloc( (void **)&d_data, 256 * 512 * sizeof(float) );

	  myGlobalFunction<<< gridSize, blockSize, smem >>>(d_data);

		 cudaFree( d_data );

}

but sdata still contains 0 after the launch of myDeviceFunction.

I hope I’m clear.

Thanks for your help.

It works just fine, try this code:

[codebox]

#include <stdio.h>

device void myDeviceFunction(int sdata, int tid) {

sdata[tid] = tid;

}

global void myGlobalFunction(int *d_data) {

extern __shared__ int sdata[];

int globalId = threadIdx.x + blockIdx.x * blockDim.x;

sdata[threadIdx.x] = 0;

myDeviceFunction(sdata, threadIdx.x);

d_data[globalId] = sdata[threadIdx.x];

}

int main(int argc, char** argv) {

int *d_data;

int *h_data;

int smem = 256*sizeof(int);

cudaMalloc( (void **)&d_data, 256 * sizeof(int) );

cudaMallocHost( (void **)&h_data, 256 * sizeof(int) );

myGlobalFunction<<< 1, 256, smem >>>(d_data);

cudaMemcpy(h_data,d_data,256 * sizeof(int), cudaMemcpyDeviceToHost);

for (unsigned int i=0;i< 256;++i)

	fprintf(stderr,"%3d: %3d\n",i,h_data[i]);

cudaFree( d_data );

cudaFreeHost(h_data);

return 0;

}

[/codebox]

N.

Yes it works…

Thanks for your help.