Matrix multiplcation peoblem

hey friends i m new to this forumn n cuda also
i m trying to make the program of matrix multiplication from many days but one or the other problem is occuring…
plz help me freinds i really need to crack this out

here is the program:-
[b][font=“Lucida Console”]

#include<stdio.h>
#include<cuda.h>
global void mult(int *a,int *b,int *c, int N)
{

    int pvalue=0;
    int r = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    for(int i=0;i< N;++i)
    {
            int m = a[r * N + i];
            int n  = b[i * N + col];
            pvalue = pvalue+( m * n);
    }
    c[r* N + col] = pvalue;

}

int main()
{
int *a_h,*b_h,*c_h,n;
int *a_d,*b_d,*c_d;
int i;
int c;
printf(“enter the number of rows and columns”);
scanf(“%d”,&n);

    a_h=(int *)malloc(sizeof(int)*n*n);
    b_h=(int *)malloc(sizeof(int)*n*n);
    c_h=(int *)malloc(sizeof(int)*n*n);
    cudaMalloc((void**)&a_d,sizeof(int)*n*n);
    cudaMalloc((void**)&b_d,sizeof(int)*n*n);
    cudaMalloc((void**)&c_d,sizeof(int)*n*n);
for(int i=0;i<(n*n);i++)
    {
            a_h[i]=1;
    }

    for(int i=0;i<(n*n);i++)
    {
            b_h[i]=1;                                             
}

    c=(sizeof(int)*n*n);
    cudaMemcpy(a_d,a_h,c,cudaMemcpyHostToDevice);
    cudaMemcpy(b_d,b_h,c,cudaMemcpyHostToDevice);
    mult<<<1,9>>nclude<stdio.h>	
 cudaMemcpy(c_h,c_d,c,cudaMemcpyDeviceToHost);

//Printing*************
printf(“\n Multiplication”);
for(i=0;i<nn;i++)
{
printf(" %d “,c_d[i]);
if(i==n)printf(”\n");
}
//Releasing memmiry
**********
free(a_h);
free(b_h);
free(c_h);
cudaFree(a_d);
cudaFree(b_d);
cudaFree(c_d);
}

[/font][/b]

output is not coming correct
plz help me out
[font=“Tahoma”][i]out put that is comming:-


[/i][/font]

Looks like you suffer from a lil typo… You try reading from device-side memory instead of your host arrays you copied your results back to. :)

thanks :">