I’m having trouble getting the correct result using cublasSgemm. Basically, I just copied the simpleCUBLAS SDK example with some minor changes, and as far as I can tell from the documentation, I’m using the right input arguments. Any help with where I’m going wrong is appreciated. I’m guessing it’s probably something stupid I’m overlooking.
The C initialization:
[codebox] float* in1 = new float[5*3];
float* in2 = new float[3*5];
float* out1 = new float[5*5];
for (int i = 0; i < 3*5; i++) {
in1[i] = i;
in2[i] = 25-i;
}
call_MultiplyMatrices_GPU(h_in1, h_in2, 5, 3, 5);[/codebox]
The cublas calls:
[codebox]void call_MultiplyMatrices_GPU(float* h_in1, float* h_in2, uint32 in1_rows, uint32 in1_cols, uint32 in2_cols)
{
cublasStatus status;
status = cublasInit();
uint32 m = in1_rows; // = out1_rows
uint32 k = in1_cols; // = in2_rows
uint32 n = in2_cols; // = out1_cols
float *d_in1, *d_in2, *d_out1;
status = cublasAlloc(m*k, sizeof(float), (void**)&d_in1);
status = cublasAlloc(k*n, sizeof(float), (void**)&d_in2);
status = cublasAlloc(m*n, sizeof(float), (void**)&d_out1);
float* h_out1 = (float*)malloc(m*n * sizeof(float));
for (uint32 i = 0; i < m*n; i++)
h_out1[i] = 0;
status = cublasSetVector(m*k, sizeof(float), h_in1, 1, d_in1, 1);
status = cublasSetVector(k*n, sizeof(float), h_in2, 1, d_in2, 1);
status = cublasSetVector(m*n, sizeof(float), h_out1, 1, d_out1, 1);
cublasSgemm(‘n’, ‘n’, m, n, k, 1.0f, d_in1, m, d_in2, k, 0.0f, d_out1, m);
status = cublasGetVector(m*n, sizeof(float), d_out1, 1, h_out1, 1);
for (uint32 i = 0; i < 5; i++)
{
for (uint32 j = 0; j < 5; j++)
printf("%4.2f\t", h_out1[i*5 + j]);
printf("\n");
}
status = cublasFree(d_in1);
status = cublasFree(d_in2);
cublasFree(d_out1);
cublasShutdown();
}[/codebox]
Output:
[codebox]Cublas:
350.00 422.00 494.00 566.00 638.00
305.00 368.00 431.00 494.00 557.00
260.00 314.00 368.00 422.00 476.00
215.00 260.00 305.00 350.00 395.00
170.00 206.00 242.00 278.00 314.00
C:
50.00 47.00 44.00 41.00 38.00
230.00 218.00 206.00 194.00 182.00
410.00 389.00 368.00 347.00 326.00
590.00 560.00 530.00 500.00 470.00
770.00 731.00 692.00 653.00 614.00[/codebox]