Hello,
I tried calling cusparseSpMV ( ) with OpenACC. So far the program runs, but the output is incorrect. My guess is that there is something wrong with the data transfer from the device to the host. But I failed to check the error.
program text_spMV
use cusparse
use openacc
implicit none
type(cusparseHandle) :: handle
type(cusparseSpMatDescr) :: matrix
type(cusparseDnVecDescr) :: vecX, vecY
real(kind=8), dimension(:), allocatable :: Values
real(kind=8), dimension(:), allocatable :: d_x, d_y
integer(kind=4) :: nx, ny, nnz, istat, i
integer(kind=4) :: alpha = 1, beta = 0
integer(kind=8), dimension(:), allocatable :: RowOffsets, ColInd
integer(kind=8) :: bufferSize
integer(1), pointer :: externalBuffer(:)
nx = 4
ny = 5
nnz = 9
ALLOCATE(RowOffsets(nx + 1), ColInd(nnz), Values(nnz))
ALLOCATE(d_x(ny), d_y(nx))
!$acc data create(RowOffsets, ColInd, Values, d_x, d_y)
RowOffsets = [1, 3, 5, 8, 10]
ColInd = [1, 2, 2, 3, 1, 4, 5, 3, 5]
Values = [1.0, 4.0, 2.0, 3.0, 5.0, 7.0, 8.0, 9.0, 6.0]
d_x = [1.0, 2.0, 3.0, 4.0, 5.0]
d_y = [0.0, 0.0, 0.0, 0.0]
!$acc update device(RowOffsets, ColInd, Values, d_x, d_y)
istat = cusparseCreate(handle)
istat = cusparseCreateCsr(matrix, nx, ny, nnz, RowOffsets, ColInd, Values, &
CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
istat = cusparseCreateDnVec(vecX, ny, d_x, CUDA_R_64F)
istat = cusparseCreateDnVec(vecY, nx, d_y, CUDA_R_64F)
istat = cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, matrix, vecX, &
beta, vecY, CUDA_R_64F, CUSPARSE_SPMV_CSR_ALG1, bufferSize)
ALLOCATE(externalBuffer(bufferSize))
!$acc data create(externalBuffer)
istat = cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, matrix, vecX, beta, vecY, CUDA_R_64F, &
CUSPARSE_SPMV_CSR_ALG1, externalBuffer)
!$acc update host(d_y)
do i = 1, nx
print *, 'SOL1(', i, '):', d_y(i)
end do
!$acc end data
!$acc end data
end program text_spMV
My compile command is:
nvfortran -Mpreprocess -fast -acc=gpu -cudalib=cusparse -o text_spMV.exe text_spMV.f90
The output is all zero:
SOL1( 1 ): 0.000000000000000
SOL1( 2 ): 0.000000000000000
SOL1( 3 ): 0.000000000000000
SOL1( 4 ): 0.000000000000000
Could you please tell me the reason for my mistake? Thank you very much!