Hi, I am new to CUDA programming.
According to the programming guide, for a dynamic array in shared memory, one has to declare it as
extern __shared__ myArray[];
As a test, I tried to create a struct with dynamic member array in the following code.
It was compiled successfully and ran fine. Although the compile does not complain, but is it correct to do that? Why is “extern” not needed?
Thanks.
__device struct MyStruct
{
float myArray[];
}
__global__ void kernel(unsigned int arraySize, float *d_input, float *d_output)
{
//copy the values in array d_input to array d_output;
__shared__ MyStruct myObj;
float *localArray = myObj.myArray;
const unsigned int tid = blockDim.x* blockIdx.x + threadIdx.x;
if (tid < arraySize)
{
localArray[tid] = d_input[tid];
localArray[tid] +=1;
d_output[tid] = localArray[tid];
}
}
int main(int argc, char** argv)
{
//.....
}