getting wrong values in matrix multiplication

this is my program

#define N 200
#define TILE_WIDTH 20

global void MatMul(intA, int B, int* C) {

int sum;
int idx = threadIdx.x;
int idy = threadIdx.y;
int bx = blockIdx.x;
int by = blockIdx.y;
int k ,uidx , uidy , i;
uidx = bxTILE_WIDTH + idx;
uidy = by
sum = 0;

// Allocating memory in shared memory

shared int temp1[TILE_WIDTH][TILE_WIDTH];
shared int temp2[TILE_WIDTH][TILE_WIDTH];

//copying the data to shared memory

for( i =0;i<N/TILE_WIDTH; i++)
temp1[idy][idx]= A[TILE_WIDTH*(byN+i) + idx+idyN];
temp2[idy][idx]= B[TILE_WIDTH*(bx+Ni) + idx+idyN];

// multiplying matrices in shared memory

for(k=0 ; k < TILE_WIDTH;k++) {
sum = sum + temp1[idy][k]*temp2[k][idx];

// synchronizing the threads

C[uidy*N + uidx] = sum;

int main( void ) {

int a[N][N], b[N][N], c[N][N]; //host copies of a,b,c

int *dev_a, *dev_b, *dev_c; //device copies of a,b,c

// allocate the memory on the GPU
cudaMalloc( (void**)&dev_a, N * N * sizeof(int) );
cudaMalloc( (void**)&dev_b, N * N * sizeof(int) );
cudaMalloc( (void**)&dev_c, N * N * sizeof(int) );

// fill the matrices ‘a’ and ‘b’ on the CPU

for (int i=0; i<N; i++) {
for (int j=0; j < N; j++) {
a_[j] = j+3;
b[j] = i+6;

//copy above a,b values to device

cudaMemcpy( dev_a, a, N * N * sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( dev_b, b, N * N * sizeof(int), cudaMemcpyHostToDevice );
// Prepare timer
cudaEvent_t start, stop;
float time;


//start record
cudaEventRecord(start, 0);

// Kernel invocation with N threads
dim3 dimGrid(10,10,1);
dim3 dimBlock(TILE_WIDTH,TILE_WIDTH,1);
MatMul<<>> (dev_a, dev_b, dev_c);

//stop record
cudaEventRecord(stop, 0);

//this is operation time
cudaEventElapsedTime(&time, start, stop);

//clean up

//copy result to host
cudaMemcpy(c, dev_c, N * N * sizeof(int), cudaMemcpyDeviceToHost );

for (int i=0; i < N; i++){
for (int j=0; j < N; j++){

printf( “%d “, c[j]);


//free the allocated memory in device
cudaFree( dev_a );
cudaFree( dev_b );
cudaFree( dev_c );
printf(”\n multiplication done!!!\n”);
printf(" time elapsed in ms=%f\n",time);
return 0;

i am getting a matrix of value 2829400
i checked in matlab the value should be a matrix of value 2871200_

Every element of the product matrix is the same, and equal to


which evaluates to 2829400.

Check your Matlab calculation again.

thanks…i was doing wrong calculation in matlab…