Hi,
I try to use cublasZgemm with a Quadro FX 5800 but I don’t get the expected results :
A : 3.000000 3.000000 3.000000 0.000000 9.000000 1.000000 5.000000 2.000000
B : 6.000000 1.000000 4.000000 8.000000 1.000000 1.000000 6.000000 1.000000
C : 9.000000 7.000000 6.000000 3.000000 6.000000 5.000000 9.000000 1.000000
C : -104.000000 312.000000 -52.000000 164.000000 66.000000 170.000000 38.000000
instead of :
C : -8.000000 116.000000 -60.000000 108.000000 70.000000 178.000000 -58.000000 318.000000
I searched in the CUBLAS programming guide but all I’m doing seems ok (I use -arch sm_13 to compile the code). Is it possible that the library just doesn’t gives the good answers ?
Thanks for your answers
Best regards
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <cublas.h>
unsigned long long time_log(unsigned long long & last){
struct timeval now;
gettimeofday(&now, NULL);
unsigned long long dnow = now.tv_sec * 1000000L + now.tv_usec;
unsigned long long diff = dnow-last;
last = dnow;
return diff;
}
int main(int argc, char * argv[]){
unsigned long long timeref=0;
unsigned long long diffCalc, diffInit, diffAlloc, diffEcriture, diffLecture, diffLiberation;
int m = atoi(argv[1]), n = atoi(argv[2]), k = atoi(argv[3]);
double * A = (double*) malloc(2*m*k*sizeof(double)), *B = (double*) malloc(2*k*n*sizeof(double)), *C = (double*) malloc(2*m*n*sizeof(double));
cuDoubleComplex * Ad, * Bd, * Cd;
cublasStatus retStatus;
int sizeA = 2*m*k, sizeB = 2*k*n, sizeC = 2*m*n;
srand(123);
double * start = A, *end = start+sizeA;
for(;start!=end;++start)
*start = rand()%10;
start = B;
end = start+sizeB;
for(;start!=end;++start)
*start = rand()%10;
start = C;
end = start+sizeC;
for(;start!=end;++start)
*start = rand()%10;
cuDoubleComplex alpha, beta;
alpha = make_cuDoubleComplex(atof(argv[4]), atof(argv[4]));
beta = make_cuDoubleComplex(atof(argv[5]), atof(argv[5]));
printf("A : ");
start = A;
end = start+sizeA;
for(;start!=end;++start)
printf("%f ", *start);
printf("\n");
printf("B : ");
start = B;
end = start+sizeB;
for(;start!=end;++start)
printf("%f ", *start);
printf("\n");
printf("C : ");
start = C;
end = start+sizeC;
for(;start!=end;++start)
printf("%f ", *start);
printf("\n");
time_log(timeref);
cublasInit();
diffInit = time_log(timeref);
cublasAlloc (sizeA/2, sizeof(cuDoubleComplex), (void**)&Ad);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasAlloc\n");
}
cublasAlloc (sizeB/2, sizeof(cuDoubleComplex), (void**)&Bd);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasAlloc\n");
}
cublasAlloc (sizeC/2, sizeof(cuDoubleComplex), (void**)&Cd);
diffAlloc = time_log(timeref);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasAlloc\n");
}
cublasSetMatrix (m, k, sizeof(cuDoubleComplex), A, k, (void*)Ad, k);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasSetMatrix\n");
}
cublasSetMatrix (k, n, sizeof(cuDoubleComplex), B, n, (void*)Bd, n);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasSetMatrix\n");
}
cublasSetMatrix (m, n, sizeof(cuDoubleComplex), C, n, (void*)Cd, n);
diffEcriture = time_log(timeref);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasSetMatrix\n");
}
cublasZgemm ('n', 'n', m, n, k, alpha, (cuDoubleComplex *) Ad, k, (cuDoubleComplex *) Bd, n, beta, (cuDoubleComplex *) Cd, n);
diffCalc = time_log(timeref);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasZgemm : ");
if(retStatus == CUBLAS_STATUS_NOT_INITIALIZED)
printf("CUBLAS_STATUS_NOT_INITIALIZED\n");
else if (retStatus == CUBLAS_STATUS_INVALID_VALUE)
printf("CUBLAS_STATUS_INVALID_VALUE\n");
else if (retStatus == CUBLAS_STATUS_ARCH_MISMATCH)
printf("CUBLAS_STATUS_ARCH_MISMATCH\n");
else if (retStatus == CUBLAS_STATUS_EXECUTION_FAILED)
printf("CUBLAS_STATUS_EXECUTION_FAILED");
else
printf("IN YOUR ASS\n");
}
cublasGetMatrix (m, k, sizeof(cuDoubleComplex), Ad, k, A, k);
cublasGetMatrix (k, n, sizeof(cuDoubleComplex), Bd, n, B, n);
cublasGetMatrix (m, n, sizeof(cuDoubleComplex), Cd, n, C, n);
retStatus = cublasGetError ();
if (retStatus != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS: an error occured in cublasGetMatrix\n");
}
diffLecture = time_log(timeref);
cublasFree(Ad);
cublasFree(Bd);
cublasFree(Cd);
diffLiberation = time_log(timeref);
cublasShutdown();
printf("A : ");
start = A;
end = start+sizeA;
for(;start!=end;++start)
printf("%f ", *start);
printf("\n");
printf("B : ");
start = B;
end = start+sizeB;
for(;start!=end;++start)
printf("%f ", *start);
printf("\n");
printf("C : ");
start = C;
end = start+sizeC;
for(;start!=end;++start)
printf("%f ", *start);
printf("\n");
printf("%llu usec requises pour l'initialisation \n",diffInit);
printf("%llu usec requises pour l'allocation des données \n",diffAlloc);
printf("%llu usec requises pour l'écriture des données \n",diffEcriture);
printf("%llu usec requises pour le calcul \n",diffCalc);
printf("%llu usec requises pour la lecture des données \n",diffLecture);
printf("%llu usec requises pour la libération des données \n",diffLiberation);
free(A);
free(B);
free(C);
return 0;
}
Tested with the call ./prog 2 2 2 2 2