Hi all CUDA users
Some codes in my program are as follows:
typedef float matrix[3][3];
host device void m_add(matrix a,matrix b,matrix dest)
{
dest[XX][XX]=a[XX][XX]+b[XX][XX];
dest[XX][YY]=a[XX][YY]+b[XX][YY];
dest[XX][ZZ]=a[XX][ZZ]+b[XX][ZZ];
dest[YY][XX]=a[YY][XX]+b[YY][XX];
dest[YY][YY]=a[YY][YY]+b[YY][YY];
dest[YY][ZZ]=a[YY][ZZ]+b[YY][ZZ];
dest[ZZ][XX]=a[ZZ][XX]+b[ZZ][XX];
dest[ZZ][YY]=a[ZZ][YY]+b[ZZ][YY];
dest[ZZ][ZZ]=a[ZZ][ZZ]+b[ZZ][ZZ];
}
global void test_kernal(matrix a, matrix b,matrix c)
{
m_add(a,b,c);
}
device matrix a={{1,2,3},{4,5,6},{7,8,9}};
device matrix b={{1,2,3},{4,5,6},{7,8,9}};
device matrix c;
matrix h_c;
int main()
{
test_kernal<<<1,1>>>(a,b,c);
cudaMemcpy(h_c, c, sizeof(matrix), cudaMemcpyDeviceToHost);
for(int i=0;i<DIM;i++)
for(int j=0;j<DIM;j++)
printf(“%f\n”, h_c[i][j]);
return 0;
}
The purpose of this program is checking the using of the “typedef” name matrix.
And it seems that this program cann’t give right result. I have no idea of fixing the error.
Can someone give me some advice?
Appreciate any help in advance!