Hello,
I’ve copied the first source code from the CUDA Programming Guide which adds two matrixes, and I’ve completed it to make it a working example; I’ve included it some headers and initializated the matrix with some random entries. But when I excecute the program it does not as expected since the resulting matrix is zero. Could you please point me out why this is happening?
[codebox]#include <stdio.h>
#define N 256
float A[N][N], B[N][N], C[N][N];
int i,j;
// Kernel definition
global void MatAdd(float A[N][N], float B[N][N],
float C[N][N])
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i < N && j < N)
C[i][j] = A[i][j] + B[i][j];
}
int main()
{
// Kernel invocation
//initialization
srand(20);
for(i=0 ; i < N ; i++)
for(j=0 ; j < N ; j++)
{
A[i][j] = (int)(10*(rand()/((double)RAND_MAX+1)));
B[i][j] = (int)(10*(rand()/((double)RAND_MAX+1)));
}
dim3 dimBlock(16, 16);
dim3 dimGrid((N + dimBlock.x - 1) / dimBlock.x,
(N + dimBlock.y - 1) / dimBlock.y);
MatAdd<<<dimGrid, dimBlock>>>(A, B, C);
for(i=0 ; i < 10 ; i++)
{
printf("\n");
for(j=0 ; j < 10 ; j++)
printf("%d ", (int)A[i][j]);
}
printf("\n");
for(i=0 ; i < 10 ; i++)
{
printf("\n");
for(j=0 ; j < 10 ; j++)
printf("%d ", (int)B[i][j]);
}
printf("\nResult:");
for(i=0 ; i < 10 ; i++)
{
printf("\n");
for(j=0 ; j < 10 ; j++)
printf("%d ", (int)C[i][j]);
}
printf("\n");
}[/codebox]
I expect that it is printed the result from the sum of the two matrix, but I get that the C matrix has all zero values.
Thank you.