hey friends i m new to this forumn n cuda also
i m trying to make the program of matrix multiplication from many days but one or the other problem is occuring…
plz help me freinds i really need to crack this out
here is the program:-
[b][font=“Lucida Console”]
#include<stdio.h>
#include<cuda.h>
global void mult(int *a,int *b,int *c, int N)
{
int pvalue=0;
int r = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
for(int i=0;i< N;++i)
{
int m = a[r * N + i];
int n = b[i * N + col];
pvalue = pvalue+( m * n);
}
c[r* N + col] = pvalue;
}
int main()
{
int *a_h,*b_h,*c_h,n;
int *a_d,*b_d,*c_d;
int i;
int c;
printf(“enter the number of rows and columns”);
scanf(“%d”,&n);
a_h=(int *)malloc(sizeof(int)*n*n);
b_h=(int *)malloc(sizeof(int)*n*n);
c_h=(int *)malloc(sizeof(int)*n*n);
cudaMalloc((void**)&a_d,sizeof(int)*n*n);
cudaMalloc((void**)&b_d,sizeof(int)*n*n);
cudaMalloc((void**)&c_d,sizeof(int)*n*n);
for(int i=0;i<(n*n);i++)
{
a_h[i]=1;
}
for(int i=0;i<(n*n);i++)
{
b_h[i]=1;
}
c=(sizeof(int)*n*n);
cudaMemcpy(a_d,a_h,c,cudaMemcpyHostToDevice);
cudaMemcpy(b_d,b_h,c,cudaMemcpyHostToDevice);
mult<<<1,9>>nclude<stdio.h>
cudaMemcpy(c_h,c_d,c,cudaMemcpyDeviceToHost);
//Printing*************
printf(“\n Multiplication”);
for(i=0;i<nn;i++)
{
printf(" %d “,c_d[i]);
if(i==n)printf(”\n");
}
//Releasing memmiry**********
free(a_h);
free(b_h);
free(c_h);
cudaFree(a_d);
cudaFree(b_d);
cudaFree(c_d);
}
[/font][/b]
output is not coming correct
plz help me out
[font=“Tahoma”][i]out put that is comming:-
[/i][/font]