Hello,
I have written a test code that calls cusparseDgtsv2_nopivot function in cusparse library to solve tridiagonal matrix, in which openacc is used for data transmission, mainly referring to tcusparse3.f90.
Here is my program
PROGRAM TDMA
use openacc
use cusparse
implicit none
integer, parameter :: npts = 31
integer :: cusparseCreate_status
type(cusparseHandle) :: handle
integer :: m, n, ldb
real(8) :: dl(npts), d(npts), du(npts)
real(8) :: B(npts)
integer :: i
integer :: istat
integer(8) :: bufferSizeInBytes
integer(1), pointer:: buffer(:)
cusparseCreate_status = cusparseCreate(handle)
!$acc data create(dl,d,du,B)
m = npts
n = 1
ldb = npts
dl = 1.0
dl(1) = 0.0
d = 2.0
du = 1.0
du(npts) = 0.0
do i = 1, 16
B(i) = i
B(32 - i) = i
end do
!%acc update device(dl,d,du,B)
print *, 'CREATE cusparseCreate_status: '
if (cusparseCreate_status == CUSPARSE_STATUS_SUCCESS) then
print *, 'CUSPARSE_STATUS_SUCCESS'
elseif (cusparseCreate_status == CUSPARSE_STATUS_NOT_INITIALIZED) then
print *, 'CUSPARSE_STATUS_NOT_INITIALIZED'
elseif (cusparseCreate_status == CUSPARSE_STATUS_ALLOC_FAILED) then
print *, 'CUSPARSE_STATUS_ALLOC_FAILED'
elseif (cusparseCreate_status == CUSPARSE_STATUS_ARCH_MISMATCH) then
print *, 'CUSPARSE_STATUS_ARCH_MISMATCHED'
end if
istat = cusparseDgtsv2_nopivot_buffersizeext(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes)
allocate(buffer(bufferSizeInBytes))
istat = cusparseDgtsv2_nopivot(handle, m, n, dl, d, du, B, ldb, buffer)
print *, 'Dgtsv STATUS: '
if (istat == CUSPARSE_STATUS_SUCCESS) then
print *, 'CUSPARSE_STATUS_SUCCESS'
elseif (istat == CUSPARSE_STATUS_NOT_INITIALIZED) then
print *, 'CUSPARSE_STATUS_NOT_INITIALIZED'
elseif (istat == CUSPARSE_STATUS_ALLOC_FAILED) then
print *, 'CUSPARSE_STATUS_ALLOC_FAILED'
elseif (istat == CUSPARSE_STATUS_INVALID_VALUE) then
print *, 'CUSPARSE_STATUS_INVALID_VALUE'
elseif (istat == CUSPARSE_STATUS_ARCH_MISMATCH) then
print *, 'CUSPARSE_STATUS_ARCH_MISMATCHED'
elseif (istat == CUSPARSE_STATUS_EXECUTION_FAILED) then
print *, 'CUSPARSE_STATUS_EXECUTION_FAILED'
elseif (istat == CUSPARSE_STATUS_INTERNAL_ERROR) then
print *, 'CUSPARSE_STATUS_INTERNAL_ERROR'
end if
!$acc update host(dl,d,du,B)
!$acc end data
print *, 'The solution is: '
do i = 1, npts
print *, 'SOL(', i, '):', B(i)
end do
END PROGRAM TDMA
After running, it displays:
nvfortran -Mpreprocess -fast -acc=gpu -cudalib=cusparse -o gtsv2acc.exe gtsv2acc.f90
./gtsv2acc.exe
CREATE cusparseCreate_status:
CUSPARSE_STATUS_SUCCESS
Dgtsv STATUS:
CUSPARSE_STATUS_SUCCESS
Failing in Thread:1
Accelerator Fatal Error: call to cuMemcpyDtoHAsync returned error 700: Illegal address during kernel execution
File: /home/lixinyu/5555/5555/gtsv2acc.f90
Function: tdma:1
Line: 64
make: *** [makefile:15: run] Error 1
The result shows that the cusparseDgtsv2_nopivot function seems to have been called successfully, but I am still not sure how to transfer the result of the function to the host and print it.
If you have any other recommendation for the code, it would be helpful.
Looking forward your replies,thanks!