Hello,

I am a beginner of CUDA.

I have a problem about matrix summation(sample from CUDA_C_Programming_Guide).

Syntax is OK but I got the worng result.

Hope someone can tell me what is wrong?

###################

result:

0 , 0 , 0 ,

0 , 0 , 0 ,

0 , 0 , 0 ,

###################

OS: Linux

CUDA: V4

VGA: GeForce 210

The code is as below.

//

**global** void matrix_add( int a[3][3], int b[3][3], int c[3][3] ) {

```
int i = threadIdx.x;
int j = threadIdx.y;
c[i][j] = a[i][j] + b[i][j];
```

}

void matrix_2() {

int ary_size = 3;

```
int ary_a[3][3] = {1,2,3,4,5,6,7,8,9};
int ary_b[3][3] = {10,20,30,40,50,60,70,80,90};
int ary_c[3][3] = {0};
int ary_Da[3][3], ary_Db[3][3],ary_Dc[3][3];
int m_size = ary_size * ary_size * sizeof(int);
cudaMalloc( (void**)ary_Da, m_size);
cudaMalloc( (void**)ary_Db, m_size);
cudaMalloc( (void**)ary_Dc, m_size);
cudaMemcpy(ary_Da,ary_a,m_size,cudaMemcpyHostToDevice);
cudaMemcpy(ary_Db,ary_b,m_size,cudaMemcpyHostToDevice);
dim3 threadPerBlock(3,3);
dim3 blockPerGrid(1,1);
matrix_add<<<blockPerGrid,threadPerBlock>>>(ary_Da,ary_Db,ary_Dc);
cudaMemcpy(ary_c,ary_Dc,m_size,cudaMemcpyDeviceToHost);
int i = 0;
int j = 0;
while(i< ary_size) {
j =0;
while(j < ary_size) {
cout << ary_c[i][j] << " , ";
j++;
}
cout << endl;
i++;
}
cudaFree( ary_Da );
cudaFree( ary_Db );
cudaFree( ary_Dc );
```

}

int main(int argc, char** argv)

{

matrix_2();

```
return 0;
```

}