I’m unable to properly call smbv
BLAS function to receive the required result.
This is my example:
sbmv
is y = alpha*A*x + beta*y
A is a 4x4 band matrix in the general band format { 0, 8, 9, 5, 1, 2, 3, 4 };
, where 1, 2, 3, 4
is a diagonal and 8, 9, 5
is the 1st superdiagonal (and sub one too). X and Y are vectors of {1, 1, 1, 1}. With alpha = 2
and b = 1
, the resulting vector is supposed to be {19. 39. 35. 19.}
However, the cublasSsbmv
result is { 35.00 31.00 13.00 15.00 }
, which is not what I need. Could you please help me make the right call?
The code snippet (the full program attached):
#define K 1 /*band size*/
#define M 2 /*cols*/
#define N 4 /*rows*/
int sbmv() {
// ......
float a[M * N] = { 0, 8, 9, 5, 1, 2, 3, 4 };
float x[N] = { 1, 1, 1, 1 };
float y[N] = { 1, 1, 1, 1 };
float alpha = 2.0, beta = 1.0;
for (int j = 0; j < N; j++) {
for (int i = 0; i < M; i++) printf("%7.0f", a[IDX2C(j, i, N)]);
printf("\n");
}
printf("----\n");
for (int j = 0; j < N; j++) printf("%7.0f", y[j]);
printf("\n---------------------------------\n");
float* devPtrA, * devPtrX, * devPtrY;
assert(cublasCreate(&handle) == CUBLAS_STATUS_SUCCESS);
assert(cudaMalloc((void**)&devPtrA, M * N * sizeof(*a)) == cudaSuccess);
assert(cudaMalloc((void**)&devPtrX, N * sizeof(*a)) == cudaSuccess);
assert(cudaMalloc((void**)&devPtrY, N * sizeof(*a)) == cudaSuccess);
assert(cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M) == CUBLAS_STATUS_SUCCESS);
assert(cublasSetVector(N, sizeof(*a), x, 1, devPtrX, 1) == CUBLAS_STATUS_SUCCESS);
assert(cublasSetVector(N, sizeof(*a), y, 1, devPtrY, 1) == CUBLAS_STATUS_SUCCESS);
// SBMV call
assert(cublasSsbmv(handle, CUBLAS_FILL_MODE_UPPER, N, K,
&alpha, devPtrA, M, devPtrX, 1, &beta, devPtrY, 1) == CUBLAS_STATUS_SUCCESS);
// ...
for (int j = 0; j < N; j++) printf("%7.0f ", y[j]);
printf("\n----\n");
return EXIT_SUCCESS;
}
sbmv_call.c (2 KB)