Hi.

I have a question.

In the following program,

1-dimensional dA is allocated by cudaMalloc.

I want to use dA as 2-dimensional array in kernel.

Is the definition of (A) and (B) correct ?

#include<stdio.h>

#define NX (3)

#define NY (2)

**global** void kernel(float dA[NY][NX]){

int ix = blockIdx.x*blockDim.x + threadIdx.x;
int iy = blockIdx.y*blockDim.y + threadIdx.y;

dA[iy][ix] = float(ix + iy

*NX + 1);*

}

int main(void){

float A[NXNY];

}

int main(void){

float A[NX

float (

*dA)[NX]; // <=========(A)*

size_t size = NXNY

size_t size = NX

*sizeof(float);*

cudaMalloc((void**)&dA,size); // <=========(B)

kernel<<<1,dim3(NX,NY)>>>(dA);

cudaMemcpy(A,dA,size,cudaMemcpyDeviceToHost);

for(int i=0;i<NXNY;i++){

cudaMalloc((void**)&dA,size); // <=========(B)

kernel<<<1,dim3(NX,NY)>>>(dA);

cudaMemcpy(A,dA,size,cudaMemcpyDeviceToHost);

for(int i=0;i<NX

printf("%f\n",A[i]);

}

cudaFree(dA);

}