Hello,
I’m new here and a beginner in programing NVDIA Cards. I read some pages about the CUBLAS implementation. I coded a sample to understand the use of CUBLAS by learning from the SDK examples.
Here is the sample code:
#include "cublas.h"
int main (int argc, char *argv[]) {
int n = 256;
float alpha = 1.0f;
float *x = new float[n];
float *y = new float[n];
float *xptr;
float *yptr;
for(int k = 0; k < n; k++) {
x[k] = 0.1;
y[k] = 0.1;
}
cublasStatus state;
if(cublasInit() == CUBLAS_STATUS_NOT_INITIALIZED) {
printf("CUBLAS init error.\n");
return -1;
}
state = cublasAlloc(n, sizeof(*y), (void**)&yptr);
if(state != CUBLAS_STATUS_SUCCESS) {
printf("Error allocation video memory.\n");
return -1;
}
state = cublasAlloc(n, sizeof(*x), (void**)&xptr);
if(state != CUBLAS_STATUS_SUCCESS) {
printf("Error allocation video memory.\n");
return -1;
}
state = cublasSetVector(n, sizeof(*x), x, 1, xptr, 1);
if(state != CUBLAS_STATUS_SUCCESS) {
printf("Error copy to video memory.\n");
return -1;
}
state = cublasSetVector(n, sizeof(*y), y, 1, yptr, 1);
if(state != CUBLAS_STATUS_SUCCESS) {
printf("Error copy to video memory.\n");
return -1;
}
// Call CUBLAS implementation
cublasSaxpy(n, alpha, xptr, 1, yptr, 1);
state = cublasGetError();
if (state != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS execution error.\n");
return -1;
}
state = cublasGetVector(n, sizeof(*yptr), yptr, 1, y, 1);
if(state != CUBLAS_STATUS_SUCCESS) {
printf("Error copy from video memory.\n");
return -1;
}
if(cublasFree(xptr) != CUBLAS_STATUS_SUCCESS) {
printf("Error freeing video memory.\n");
return -1;
}
if(cublasFree(yptr) != CUBLAS_STATUS_SUCCESS) {
printf("Error freeing video memory.\n");
return -1;
}
if(cublasShutdown() != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS shutdown error.\n");
return -1;
}
if(x != NULL) delete[] x;
if(y != NULL) delete[] y;
return 0;
}
Here is my question:
Did I understand the CUBLAS things right? Is this the way to call CUBLAS?
I ask this, because I benchmarked this subroutine vs. the vecLIB on OS X 10.5.2.
(Mac Book Pro / NV8600GT 128MB) and it seems that the CUBLAS is very slow.
But I don’t trust my benchmark :D I’m new in the C/C++ World - so beginners
make often things wrong… External Image
Big thanks for any support or suggestions about this post.
Greetings from Germany
Manolo