Hi,
I’m trying to call cublasSgbmv function with the following arguments, but somehow the results seem to be incorrect (shown below). I’m also pasting the code I’m using for invoking cublasSgbmv function. I looked at the CUBLAS documentation as well as SDK samples, but couldn’t figure out where I’m wrong. Any pointer on what I might be doing wrong in the code will be much appreciated. I’ve tried this code on both Windows and Ubuntu (using emulation mode).
Thanks in advance,
Balwinder
// Code
int testCUBlas()
{
cublasStatus status;
float* h_A;
float* h_Y;
float* h_X;
float* h_ref_Y;
float* d_A;
float* d_Y;
float* d_X;
float alpha = 1.0f;
float beta = 1.0f;
int i, m=6, n=6, kl=2, ku=1, lda=kl+ku+1;
float error_norm;
float ref_norm;
float diff;
float* p = genRegularBandMatrix(m, n, kl, ku);
// Initialize CUBLAS
status = cublasInit();
if (checkStatus(status, "Initiatilzation failed.")) return EXIT_FAILURE;
// Allocate host memory for the matrices
h_A = genCompactBandMatrix(p, m, n, kl, ku);
h_X = getRandomVector(n);
h_Y = getRandomVector(n); // We use this one for passing to cublasSgbmv
// Allocate device memory for the matrices
status = cublasAlloc(lda*n, sizeof(float), (void**)&d_A);
if (checkStatus(status, "device memory allocation error (A).")) return EXIT_FAILURE;
status = cublasAlloc(n, sizeof(float), (void**)&d_Y);
if (checkStatus(status, "device memory allocation error (Y).")) return EXIT_FAILURE;
status = cublasAlloc(n, sizeof(float), (void**)&d_X);
if (checkStatus(status, "device memory allocation error (X).")) return EXIT_FAILURE;
// Initialize the device matrices with the host matrices
status = cublasSetVector(lda*n, sizeof(float), h_A, 1, d_A, 1);
if (checkStatus(status, "device access error (write A).")) return EXIT_FAILURE;
status = cublasSetVector(n, sizeof(float), h_Y, 1, d_Y, 1);
if (checkStatus(status, "device access error (write Y).")) return EXIT_FAILURE;
status = cublasSetVector(n, sizeof(float), h_X, 1, d_X, 1);
if (checkStatus(status, "device access error (write X).")) return EXIT_FAILURE;
// Clear last error
cublasGetError();
printf("Regular band matrix:");
printMatrix(p, m, n);
// Performs operation using cublas
printf("\nSGBMV using CUBLAS implementation:");
printArgs('N', m, n, kl, ku, alpha, d_A, lda, d_X, 1, beta, d_Y, 1);
cublasSgbmv('N', m, n, kl, ku, alpha, d_A, lda, d_X, 1, beta, d_Y, 1);
status = cublasGetError();
if (checkStatus(status, "kernel execution error.")) return EXIT_FAILURE;
// Allocate host memory for reading back the result from device memory
h_ref_Y = (float*)malloc(n * sizeof(float));
if (h_ref_Y == 0) {
fprintf (stderr, "!!!! host memory allocation error (h_ref_Y)\n");
return EXIT_FAILURE;
}
// Read the result back
status = cublasGetVector(n, sizeof(float), d_Y, 1, h_ref_Y, 1);
if (checkStatus(status, "device access error (read d_Y).")) return EXIT_FAILURE;
// Check results
printf("Results: ");
printVector(h_ref_Y, n);
// Memory clean up
free(h_A);
free(h_X);
free(h_Y);
free(h_ref_Y);
status = cublasFree(d_A);
if (checkStatus(status, "memory free error (A).")) return EXIT_FAILURE;
status = cublasFree(d_Y);
if (checkStatus(status, "memory free error (Y).")) return EXIT_FAILURE;
status = cublasFree(d_X);
if (checkStatus(status, "memory free error (X).")) return EXIT_FAILURE;
// Shutdown
status = cublasShutdown();
if (checkStatus(status, "shutdown error (A).")) return EXIT_FAILURE;
return EXIT_SUCCESS;
}
// Results
Starting the test ...
Regular band matrix:
{0.0, 1.0, 0.0, 0.0, 0.0, 0.0}
{1.0, 2.0, 3.0, 0.0, 0.0, 0.0}
{2.0, 3.0, 4.0, 5.0, 0.0, 0.0}
{0.0, 4.0, 5.0, 6.0, 7.0, 0.0}
{0.0, 0.0, 6.0, 7.0, 8.0, 9.0}
{0.0, 0.0, 0.0, 8.0, 9.0, 10.0}
SGBMV using CUBLAS implementation:
---------INPUT ARGUMENTS-------------
TRANS=N, M=6, N=6, KL=2, KU=1, ALPHA=1.00
LDA=4, INCX=1, INCY=1, BETA=1.00
Matrix A:
{0.0, 1.0, 3.0, 5.0, 7.0, 9.0}
{0.0, 2.0, 4.0, 6.0, 8.0, 10.0}
{1.0, 3.0, 5.0, 7.0, 9.0, 0.0}
{2.0, 4.0, 6.0, 8.0, 0.0, 0.0}
Vector X:
1.00, 2.00, 3.00, 4.00, 5.00, 6.00
Vector Y:
1.00, 2.00, 3.00, 4.00, 5.00, 6.00
-------------------------------------
Actual Results: 16.00, 35.00, 30.00, 89.00, 91.00, 92.00
Expected Results: 3.00, 16.00, 43.00, 86.00, 145.00, 143.00