Hello,
I am a beginner of CUDA.
I have a problem about matrix summation(sample from CUDA_C_Programming_Guide).
Syntax is OK but I got the worng result.
Hope someone can tell me what is wrong?
###################
result:
0 , 0 , 0 ,
0 , 0 , 0 ,
0 , 0 , 0 ,
###################
OS: Linux
CUDA: V4
VGA: GeForce 210
The code is as below.
//
global void matrix_add( int a[3][3], int b[3][3], int c[3][3] ) {
int i = threadIdx.x;
int j = threadIdx.y;
c[i][j] = a[i][j] + b[i][j];
}
void matrix_2() {
int ary_size = 3;
int ary_a[3][3] = {1,2,3,4,5,6,7,8,9};
int ary_b[3][3] = {10,20,30,40,50,60,70,80,90};
int ary_c[3][3] = {0};
int ary_Da[3][3], ary_Db[3][3],ary_Dc[3][3];
int m_size = ary_size * ary_size * sizeof(int);
cudaMalloc( (void**)ary_Da, m_size);
cudaMalloc( (void**)ary_Db, m_size);
cudaMalloc( (void**)ary_Dc, m_size);
cudaMemcpy(ary_Da,ary_a,m_size,cudaMemcpyHostToDevice);
cudaMemcpy(ary_Db,ary_b,m_size,cudaMemcpyHostToDevice);
dim3 threadPerBlock(3,3);
dim3 blockPerGrid(1,1);
matrix_add<<<blockPerGrid,threadPerBlock>>>(ary_Da,ary_Db,ary_Dc);
cudaMemcpy(ary_c,ary_Dc,m_size,cudaMemcpyDeviceToHost);
int i = 0;
int j = 0;
while(i< ary_size) {
j =0;
while(j < ary_size) {
cout << ary_c[i][j] << " , ";
j++;
}
cout << endl;
i++;
}
cudaFree( ary_Da );
cudaFree( ary_Db );
cudaFree( ary_Dc );
}
int main(int argc, char** argv)
{
matrix_2();
return 0;
}