Matrix Multiplication Help

tomw2005 · August 10, 2009, 11:56am

Hi.

I’m trying to make a basic matrix multiplication program. The program compiles but currently is giving wrong/nonsense answers. I’m not sure whether the problem lies in the CUDA code or the CPU side. If anyone could help me spot this I’d be very thankful.

Here’s the code:

[codebox]#include <stdio.h>

#include <stdlib.h>

#include <cutil.h>

#include “cublas.h”

#define BLOCK_SIZE 16

// Kernel

global void CudaMul(float *A, float *B, float *C, int noc0, int noc1, int nor1)

{

int k;

int i = blockDim.x*blockIdx.x + threadIdx.x;

int j = blockDim.y*blockIdx.y + threadIdx.y;

if(i < noc0 && j < nor1)

{

	C[j*noc1 + i] = 0;

	for(k = 0; k < noc0; k++)

	{

		C[j*noc1 + i] += A[j*noc0 + k] * B[k*noc1 + i];

	}

}

}

// Main

int main ()

{

float* matrixA;

float* matrixB;

float* matrixC;

int HA, HB, HC, WA, WB, WC, i;

// get matrix size from data file

int* size;

size = (int*)malloc(2 * sizeof(size[0]));

FILE *sizes;

if ( (sizes = fopen( "SZ.dat", "r" )) == NULL ) printf("Can't open file!\n");

for(i=0; i<2; i++)

{

	fscanf(sizes, "%d", &size[i]);

}

fclose(sizes);

// Matrix size

HA = size[0];

WA = size[1];

HB = size[1];

WB = size[0];

HC = HA;

WC = WB;

unsigned int mem_size_A = HA * WA * sizeof(matrixA[0]);

unsigned int mem_size_B = HB * WB * sizeof(matrixB[0]);

unsigned int mem_size_C = HC * WC * sizeof(matrixC[0]);

/* Allocate host memory for the matrices */

matrixA = (float*)malloc(mem_size_A);

if (matrixA == 0) {

    fprintf (stderr, "!!!! host memory allocation error (Matrix A)\n");

    return EXIT_FAILURE;

}

matrixB = (float*)malloc(mem_size_B);

if (matrixB == 0) {

    fprintf (stderr, "!!!! host memory allocation error (Matrix B)\n");

    return EXIT_FAILURE;

}

matrixC = (float*)malloc(mem_size_C);

if (matrixC == 0) {

    fprintf (stderr, "!!!! host memory allocation error (Matrix C)\n");

    return EXIT_FAILURE;

}

// Get data from fiiles

FILE *file;

if ( (file = fopen( "AW.dat", "r" )) == NULL ) printf("Can't open file!\n");

for (i=0;i<(HA*WA);i++)

{

	fscanf( file, "%f", &matrixA[i] );

}

fclose(file);



FILE *file2;

if ( (file2 = fopen( "RR.dat", "r" )) == NULL ) printf("Can't open file!\n");

for (i=0;i<(HB*WB);i++)

{

	fscanf( file2, "%f", &matrixB[i] );

}

fclose(file2);

/* Allocate device memory for the matrices */

float* d_A;

float* d_B;

float* d_C;

cublasAlloc(HA * WA, sizeof(d_A[0]), (void**)&d_A);

cublasAlloc(HB * WB, sizeof(d_B[0]), (void**)&d_B);

cublasAlloc(HC * WC, sizeof(d_C[0]), (void**)&d_C);

// Transfer matrices

cublasSetVector(HA * WA, sizeof(matrixA[0]), matrixB, 1, d_A, 1);

cublasSetVector(HB * WB, sizeof(matrixB[0]), matrixB, 1, d_B, 1);

cublasSetVector(HC * WC, sizeof(matrixC[0]), matrixC, 1, d_C, 1);

// set kernel config

dim3 threads( BLOCK_SIZE, BLOCK_SIZE );

dim3 grid( WC / threads.x,  HC / threads.y );

// invoke kernel

CudaMul<<< grid, threads >>>(d_A, d_B, d_C, WA, WB, HC);

// Copy result to device

cudaMemcpy(matrixC, d_C, mem_size_C, cudaMemcpyDeviceToHost);

// Print results to file

FILE *file3;

file3 = fopen ("DC.dat", "w");

for (i=0; i<HC*WC; i++)

{

	fprintf(file3, "%f ", matrixC[i]);

}

fclose(file3);

 // find C

float* CC;

float* CC_bp;

/* Allocate host memory for CC */

CC = (float*)malloc(HC * WC * sizeof(CC[0]));

if (CC == 0) {

	fprintf (stderr, "!!!! host memory allocation error (CC)\n");

	return EXIT_FAILURE;

}

/* Calculate CC*/

CC[0] = matrixC[0];

for (i=1; i<HC*WC; i++)

{

	CC[i] = (((1+CC[i-1]/100)*(1+matrixC[i]/100))-1)*100;

}

// Print results to file

FILE *file4;

file4 = fopen ("CC.dat", "w");

for (i=0; i<HC*WC; i++)

{

	fprintf(file4, "%f ", CC[i]);

}

fclose(file4);

/* Allocate host memory for CC_bp */

CC_bp = (float*)malloc(WC * HC * sizeof(CC_bp[0]));

if (CC_bp == 0)

{

	fprintf (stderr, "!!!! host memory allocation error (CC_bp)\n");

	return EXIT_FAILURE;

}

/* Calculate CC_bp*/

for (i=0; i<HC*WC; i++)

{

	CC_bp[i] = CC[i] * 100;

}

/* Print CC_bp */

FILE *file5;

if ( (file5 = fopen( "CCbp.dat", "w" )) == NULL ) printf("Can't open file!\n");

for (i=0; i<WC*HC; i++)

{

	fprintf(file5, "%f s\n", (i+1), CC_bp[i]);

}

fclose(file5);

// clean up memory

free(matrixA);

free(matrixB);

free(matrixC);

free(size);

free(CC);

free(CC_bp);

cudaFree(d_A);

cudaFree(d_B);

cudaFree(d_C);

return EXIT_SUCCESS;

}[/codebox]

Thank you in advance!

Tom W

David_Lisin · August 11, 2009, 9:10am

Hi, i have posted this answer that should prove to be useful. CUBLAS will give generally better results in matrix multiplication. Hope it helps, David Lisin.

http://forums.nvidia.com/index.php?showtop…;hl=david+lisin

tomw2005 · August 12, 2009, 3:31pm

Thanks I’ll have a look at that. I was however able to solve my problem in the end.

Tom W

Abdo · August 18, 2009, 8:52pm

hI Tom W

How you can solve your problem, I have the same problem, the answer for multiplication is fake.

thanks a lot

Abdo

tomw2005 · August 19, 2009, 9:31am

There are better ways of doing it using CUBLAS or the examples from the SDK.

However when I have some free time I’ll look into posting the code which works.

Tom W

Abdo · August 19, 2009, 4:36pm

thank you very much for your concern

I will try will CUBLAS and I wait your code

thanks a lot

Abdo

Topic		Replies	Views
Matrix multiplication---not getting correct answer? answer for matrix multiplicatin seems to be wron CUDA Programming and Performance	0	3217	August 1, 2009
matrix multiplication--wrong answer CUDA Programming and Performance	6	3845	August 20, 2009
Matrix Multiplication CUDA Programming and Performance	5	1701	September 17, 2009
Matrix Multiplication by cublasSgemm CUDA Programming and Performance	1	7537	March 26, 2010
CUBLAS issues Some simple question about CUBLAS CUDA Programming and Performance	1	1283	August 22, 2011
Matrix multiplcation peoblem CUDA Programming and Performance	2	1127	July 9, 2010
Matrix multiplication from CUDA programming guide CUDA Programming and Performance	0	1858	November 23, 2009
simple matrix (or matrix vector) multiplication using CUBLAS CUDA Programming and Performance	9	5698	November 25, 2009
Matrix Multiplication with CUBLAS and MATLAB CUDA Programming and Performance	9	30333	August 14, 2009
vector matrix multiplication, share my code:) CUDA Programming and Performance	1	5864	October 11, 2011

Matrix Multiplication Help

Related topics