Hi!
i just started with cuda and I am very excited about all the stuff I can do. But right now I am stuck. Probably cause I am not a C - Pro. But perhaps one of you guys can help me:
I wanted to implement a complex matrix multiplication and although every cublasStatus return value is fine - i get a totally wrong result. Actually I don’t know what cublasCgemm is calculating there. The results are far away from being comprehensible and still confuses me…
thanks in advance,
bjoern
[codebox]
int main(int argc, char** argv)
{
cuComplex* h_A;
cuComplex* h_B;
cuComplex* h_C;
cuComplex* d_A = 0;
cuComplex* d_B = 0;
cuComplex* d_C = 0;
cuComplex alpha = { 1, 0 };
cuComplex beta = { 0, 0 };
/* Initialize CUBLAS */
cublasInit();
int pRowsA = 6;
int pColsA = 3;
int pRowsB = 3;
int pColsB = 2;
int pRowsC = pRowsA;
int pColsC = pColsB;
/* Allocate host memory for the matrices */
h_A = (cuComplex*)malloc(pRowsA * pColsA * sizeof(h_A[0]));
h_B = (cuComplex*)malloc(pRowsB * pColsB * sizeof(h_B[0]));
h_C = (cuComplex*)malloc(pRowsC * pColsC * sizeof(h_C[0]));
/* setting values */
[…]
int mA_lda = pRowsA;
int mA_ldb = pRowsA;
int mB_lda = pRowsB;
int mB_ldb = pRowsB;
int mC_lda = pRowsC;
int mC_ldb = pRowsC;
/* Allocate device memory for the matrices */
cublasAlloc(pRowsA * pColsA, sizeof(h_A[0]), (void**)&d_A);
cublasAlloc(pRowsB * pColsB, sizeof(h_B[0]), (void**)&d_B);
cublasAlloc(pRowsC * pColsC, sizeof(h_C[0]), (void**)&d_C);
/* Initialize the device matrices with the host matrices */
cublasStatus cpA = cublasSetMatrix(pRowsA, pColsA, sizeof(h_A[0]), h_A, mA_lda, d_A, mA_ldb);
cublasStatus cpB = cublasSetMatrix(pRowsB, pColsB, sizeof(h_B[0]), h_B, mB_lda, d_B, mB_ldb);
cublasStatus cpC = cublasSetMatrix(pRowsC, pColsC, sizeof(h_C[0]), h_C, mC_lda, d_C, mC_ldb);
cublasCgemm(‘n’, ‘n’, pRowsA, pColsA, pColsB, alpha, d_A, pRowsA, d_B, pColsB, beta, d_C, pRowsA);
cublasStatus cgRes = cublasGetError();
cublasStatus cgm = cublasGetMatrix(pRowsC, pColsC, sizeof(h_C[0]), d_C, mC_lda, h_C, mC_lda);
for(int i = 0; i < 12; i++) {
cuComplex t = h_C[i];
printf("\n re: %f o,: %f ", t.x, t.y );
}
/* Memory clean up */
free(h_A);
free(h_B);
free(h_C);
cublasFree(d_A);
cublasFree(d_B);
cublasFree(d_C);
/* Shutdown */
cublasShutdown();
return EXIT_SUCCESS;
}[/codebox]