Hi,

I recently started using cuda and I have some troubles using shared memory. I hope someone here can help me figure out the problems within my code. The following code is trying to calculate the time averaged potential which is save in avg_potential, and the array named by potential is the values calculated in each iteration. The code passed the compile and linking, but didn’t run successfully.

// define number of threads and blocks

void avgCenterLinePotential( int arraysize, float *avgpotential,float *potential, int iteration)

{

int numThreads = 256; //arraysize>=256

int numBlocks = arraysize/numThreads; // assuming arraysize%numThreads = 0

averageCenterLinePotential <<< numBlocks, numThreads >>> ( arraysize,avgpotential, potential, iteration);

cutilCheckMsg(“avgCenterLinePotential kernel execution failed”);

}

// kernel

**global**

void averageCenterLinePotential( int arraysize, float *avg_potential, float *potential, int iteration)

{

int index = __umul24( blockIdx.x,blockDim.x ) + threadIdx.x;

if ( index >= arraysize ) return;

avg_potential[index] *= iteration;

```
int sharedarraysize = 256;
for ( int i = 0; i < intDivUp(arraysize, sharedarraysize); i++ ){
__shared__ float sharedArray[256];
if ( i*sharedarraysize+threadIdx.x < arraysize ) {
sharedArray[threadIdx.x] = potential[index];
}
__syncthreads();
if ( i*sharedarraysize+threadIdx.x < arraysize ) {
avg_potential[index]+=sharedArray[threadIdx.x];
}
__syncthreads();
}
avg_potential[index] /= iteration+1;
```

}