Calling cublasSgbmv Incorrect results

Hi,

I’m trying to call cublasSgbmv function with the following arguments, but somehow the results seem to be incorrect (shown below). I’m also pasting the code I’m using for invoking cublasSgbmv function. I looked at the CUBLAS documentation as well as SDK samples, but couldn’t figure out where I’m wrong. Any pointer on what I might be doing wrong in the code will be much appreciated. I’ve tried this code on both Windows and Ubuntu (using emulation mode).

Thanks in advance,

Balwinder

// Code

int testCUBlas()

{	

	cublasStatus status;

	float* h_A;

	float* h_Y;

	float* h_X;

	float* h_ref_Y;

	float* d_A;

	float* d_Y;

	float* d_X;

	float alpha = 1.0f;

	float beta = 1.0f;

	int i, m=6, n=6, kl=2, ku=1, lda=kl+ku+1;

	float error_norm;

	float ref_norm;

	float diff;

	float* p = genRegularBandMatrix(m, n, kl, ku);

	// Initialize CUBLAS

	status = cublasInit();

	if (checkStatus(status, "Initiatilzation failed.")) return EXIT_FAILURE;

	// Allocate host memory for the matrices

	h_A = genCompactBandMatrix(p, m, n, kl, ku);

	h_X = getRandomVector(n);

	h_Y = getRandomVector(n); // We use this one for passing to cublasSgbmv   

	// Allocate device memory for the matrices

	status = cublasAlloc(lda*n, sizeof(float), (void**)&d_A);

	if (checkStatus(status, "device memory allocation error (A).")) return EXIT_FAILURE;

	status = cublasAlloc(n, sizeof(float), (void**)&d_Y);

	if (checkStatus(status, "device memory allocation error (Y).")) return EXIT_FAILURE;

	status = cublasAlloc(n, sizeof(float), (void**)&d_X);

	if (checkStatus(status, "device memory allocation error (X).")) return EXIT_FAILURE;

	// Initialize the device matrices with the host matrices

	status = cublasSetVector(lda*n, sizeof(float), h_A, 1, d_A, 1);

	if (checkStatus(status, "device access error (write A).")) return EXIT_FAILURE;

	status = cublasSetVector(n, sizeof(float), h_Y, 1, d_Y, 1);

	if (checkStatus(status, "device access error (write Y).")) return EXIT_FAILURE;

	status = cublasSetVector(n, sizeof(float), h_X, 1, d_X, 1);

	if (checkStatus(status, "device access error (write X).")) return EXIT_FAILURE;

	// Clear last error

	cublasGetError();

	printf("Regular band matrix:");

	printMatrix(p, m, n);

	

	// Performs operation using cublas

	printf("\nSGBMV using CUBLAS implementation:");

	printArgs('N', m, n, kl, ku, alpha, d_A, lda, d_X, 1, beta, d_Y, 1);

	cublasSgbmv('N', m, n, kl, ku, alpha, d_A, lda, d_X, 1, beta, d_Y, 1);

	status = cublasGetError();

	if (checkStatus(status, "kernel execution error.")) return EXIT_FAILURE;

	// Allocate host memory for reading back the result from device memory

	h_ref_Y = (float*)malloc(n * sizeof(float));

	if (h_ref_Y == 0) {

		fprintf (stderr, "!!!! host memory allocation error (h_ref_Y)\n");

		return EXIT_FAILURE;

	}

	// Read the result back

	status = cublasGetVector(n, sizeof(float), d_Y, 1, h_ref_Y, 1);

	if (checkStatus(status, "device access error (read d_Y).")) return EXIT_FAILURE;

	// Check results

	printf("Results: ");

	printVector(h_ref_Y, n);

	// Memory clean up

	free(h_A);

	free(h_X);

	free(h_Y);

	free(h_ref_Y);

	status = cublasFree(d_A);

	if (checkStatus(status, "memory free error (A).")) return EXIT_FAILURE;

	status = cublasFree(d_Y);

	if (checkStatus(status, "memory free error (Y).")) return EXIT_FAILURE;

	status = cublasFree(d_X);

	if (checkStatus(status, "memory free error (X).")) return EXIT_FAILURE;

	// Shutdown

	status = cublasShutdown();

	if (checkStatus(status, "shutdown error (A).")) return EXIT_FAILURE;

	return EXIT_SUCCESS;

}

// Results

Starting the test ...

Regular band matrix:

{0.0, 1.0, 0.0, 0.0, 0.0, 0.0}

{1.0, 2.0, 3.0, 0.0, 0.0, 0.0}

{2.0, 3.0, 4.0, 5.0, 0.0, 0.0}

{0.0, 4.0, 5.0, 6.0, 7.0, 0.0}

{0.0, 0.0, 6.0, 7.0, 8.0, 9.0}

{0.0, 0.0, 0.0, 8.0, 9.0, 10.0}

SGBMV using CUBLAS implementation:

---------INPUT ARGUMENTS-------------

TRANS=N, M=6, N=6, KL=2, KU=1, ALPHA=1.00

LDA=4, INCX=1, INCY=1, BETA=1.00

Matrix A:

{0.0, 1.0, 3.0, 5.0, 7.0, 9.0}

{0.0, 2.0, 4.0, 6.0, 8.0, 10.0}

{1.0, 3.0, 5.0, 7.0, 9.0, 0.0}

{2.0, 4.0, 6.0, 8.0, 0.0, 0.0}

Vector X:

1.00, 2.00, 3.00, 4.00, 5.00, 6.00

Vector Y:

1.00, 2.00, 3.00, 4.00, 5.00, 6.00

-------------------------------------

Actual Results: 16.00, 35.00, 30.00, 89.00, 91.00, 92.00

Expected Results: 3.00, 16.00, 43.00, 86.00, 145.00, 143.00