Hey all,
So I’m trying to do a simple matrix multiplication using the cublas function cublasSgemv, but I am not getting the values back that I am expecting. I am using a 5X5 matrix with all values initialized to 1(for testing purposes). I am multiplying this matrix by a 5 element vector initialized to {1,2,3,4,5} respectively. My results vector is giving me the values of: 55, 60, 65, 70, 75. Those numbers don’t seem right to me but maybe I am just misunderstanding the algorithm that the function is carrying out. Here is my code:
cudaSetDevice( cutGetMaxGflopsDeviceId() );
cublasInit();
float *h_Matrix, *d_Matrix, *h_Multiplier, *d_Multiplier, *h_Results, *d_Results;
float elapsedTime, selapsedTime = 0;
size_t matrix_Size, multiplier_Size;
matrix_Size = ROWS * COLS * sizeof(float);
multiplier_Size = ROWS * sizeof(float);
//allocate memory for the matrices
h_Matrix =(float*) malloc(matrix_Size);
cublasAlloc(ROWS * COLS, sizeof(float), (void**)&d_Matrix);
//allocate memory for the multipliers
h_Multiplier =(float*) malloc(multiplier_Size);
cublasAlloc(ROWS, sizeof(float), (void**)&d_Multiplier);
h_Results =(float*) malloc(multiplier_Size);
cublasAlloc(ROWS , sizeof(float), (void**)&d_Results);
//set values in Matrix to 1
for(int i = 0; i < TOTAL; i++)
h_Matrix[i]= (float)1;
for(int f = 0; f < ROWS; f++)
{
h_Multiplier[f] = (float)f + 1;
h_Results[f] = (float)1;
}
cublasSetMatrix(ROWS,COLS,sizeof(float),h_Matrix,1,d_Matrix,
1);
cublasSetVector(ROWS,sizeof(float),h_Multiplier,1,d_Multipli
er,1);
cublasSetVector(ROWS,sizeof(float),h_Results,1,d_Results,1);
cublasSgemv('n', ROWS, COLS, 1, d_Matrix, ROWS, d_Multiplier,1,0,d_Results , 1);
cudaThreadSynchronize();
h_Results =(float*) malloc(multiplier_Size);
cublasGetVector(ROWS, sizeof(float), d_Results, 1, h_Results, 1);
cublasFree(d_Matrix);
cublasFree(d_Multiplier);
cublasFree(d_Results);
cublasShutdown();
Any information would greatly be appreciated. Thanks in advance!!