Hello,
I have recently been trying to use cuda in Fortan for a project which requires finding the inverse of a matrix. I am first finding the Bunch-Kaufman factorization of a symmetric matrix via cusolverDnDsytrf then attempting to find the inverse with cusolverDnDsytri.
I did find this post which is similar to my problem: Wrong output of Matrix Inversion using cuSOLVER. However, as far as I can tell I am not messing around with non-blocking streams. I’m also new to using cuda though, so I’m not sure..
For compilation I’m using nvfortran from nvhpc version 24.9.
Would anyone be able to help figure out what I’m doing wrong?
Here is an example with a 3 by 3 symmetric matrix:
program symmetric_inverse
use cusolverDn
use cublas
integer n,lwork_factor,lwork_solve,error,col,row
parameter(n=3)
double precision A(n,n), A_inv(n,n), I(n,n)
double precision, device :: dA_inv(n,n), dI(n,n), dA(n,n)
double precision, allocatable, device :: dwork_factor(:)
double precision, allocatable, device :: dwork_solve(:)
integer, device :: pivots(n)
integer, device :: devinfo
integer local_devinfo
type(cusolverDnHandle) :: h
A = reshape((/ -1, 2, 0, 2, -5, 0, 0, 0, -1 /), shape(A))
C send A to device so A can be inverted
dA_inv = A
C get the cusolver handle
error = cusolverDnCreate(h)
if (error .ne. CUBLAS_STATUS_SUCCESS) then
print*, 'Could not create handle', error
stop
endif
C get the buffer size needed for LDL^T calculations
error = cusolverDnDsytrf_buffersize(h,n,dA_inv,n,lwork_factor)
if (error .ne. CUBLAS_STATUS_SUCCESS) then
print*, 'Could not get get LDL^T buffer size', error
stop
endif
C allocate the workspace
allocate(dwork_factor(lwork_factor))
C Compute the LDL^T factorization
error = cusolverDnDsytrf(h,CUBLAS_FILL_MODE_UPPER,
& n,dA_inv,n,pivots,dwork_factor,
& lwork_factor,devinfo)
local_devinfo = devinfo
if ((error.ne.CUBLAS_STATUS_SUCCESS).or.(local_devinfo.ne.0)) then
print*, 'Could not find the LDL^T factorization', error
print*, 'SYTRF Info:', local_devinfo
stop
endif
C get the buffer size needed for inverse calculations
error = cusolverDnDsytri_buffersize(h,CUBLAS_FILL_MODE_UPPER,
& n,dA_inv,n,pivots,lwork_solve)
if(error.ne.CUBLAS_STATUS_SUCCESS) then
print*, 'Could not get inverse buffer size', error
stop
endif
C Find the inverse of the matrix
allocate(dwork_solve(lwork_solve))
error = cusolverDnDsytri(h,CUBLAS_FILL_MODE_UPPER,
& n,dA_inv,n,pivots,dwork_solve,
& lwork_solve,devinfo)
local_devinfo = devinfo
if((error.ne.CUBLAS_STATUS_SUCCESS).or.(local_devinfo.ne.0)) then
print*, 'Could not compute the inverse from LDL', error
print*, 'SYTRI Info:', local_devinfo
stop
endif
C get the inverse from device
A_inv = dA_inv
C fill in the lower triangle of A
C since upper is used for results in previous blas calls
do row=1, n
do col=row+1, n
A_inv(col,row) = A_inv(row,col)
enddo
enddo
C send A and the now symmetric A_inv to device
dA_inv = A_inv
dA = A
C do A*A_inv
call cublasDgemm('N','N',n,n,n,1.0d0,dA,n,dA_inv,n,0.0d0,dI,n)
C get result of A*A_inv
I = dI
C print the results
print*, A, '\n'
print*, A_inv, '\n'
print*, I, '\n'
deallocate(dwork_factor)
deallocate(dwork_solve)
error = cusolverDnDestroy(h)
end
Thank you!