Hello,
I have a symmetric matrix that I store just the upper part of it in the csr format. Since the symmetric matrices in the cuSPARSE library are not usually recommended, I convert the upper triangular stored csr matrix to a general csr format in the first part of the code. Then I multiplied it with a matrix in the shape of N=3, P=1 in this example.
Although the conversion part is not optimal, it worked fine and gave the correct result for the matrix. However, the second part of the code (matrix matrix multiplication) gave the following errors.
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsecreatecsr (csr.f90: 122)
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm_buffersize (csr.f90: 127)
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm (csr.f90: 139)
0 inform, 0 warnings, 3 severes, 0 fatal for sptoden
The code is as follows:
program sptoden
use openacc
use cusparse
implicit none
! CUDA API variables
type(cusparseHandle) :: h
type(cusparseMatDescr) :: descrA
INTEGER(8) :: bsize
type(c_devptr) :: buffer
type(cusparseSpMatDescr) :: matA
type(cusparseDnMatDescr) :: matZ
type(cusparseDnMatDescr) :: matY
type(cusparseDnMatDescr) :: matV
INTEGER :: status
DOUBLE PRECISION :: alpha, beta
INTEGER, dimension(3) :: nnzPerRowZ
INTEGER :: nna
DOUBLE PRECISION, dimension(5) :: csrValA
INTEGER, dimension(5) :: csrColIndA
INTEGER, dimension(4) :: csrRowPtrA
DOUBLE PRECISION, allocatable :: csrValZ(:)
INTEGER, allocatable :: csrColIndZ(:)
INTEGER, allocatable :: csrRowPtrZ(:)
DOUBLE PRECISION, dimension(3,3) :: Z
DOUBLE PRECISION, dimension(3) :: Y, V
INTEGER :: M,I
INTEGER :: N,P
N = 3
P = 1
Y = (/ 1.0d0, 2.0d0, 3.0d0 /)
csrValA = (/ 1.0d0, 3.0d0, -2.0d0, 5.0d0, -1.0d0 /)
csrRowPtrA = (/ 1, 4, 5, 6 /)
csrColIndA = (/ 1, 2, 3, 2, 3 /)
! The expansion of the sparse matrix A (It is supposed to be upper triangular symmetric.)
! 1.0 3.0 -2.0
! 0.0 5.0 0.0
! 0.0 0.0 -1.0
! ---------------------------------------------------------------------------------------
Z = 0.0d0
! Transformation from symmetric to general matrix-----------------------------------------
status = cusparseCreate(h)
if (status /= CUSPARSE_STATUS_SUCCESS) &
write(*,*) 'cusparseCreate error for transformation: ', status
status = cusparseCreateMatDescr(descrA)
status = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)
status = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE)
status = cusparseSetStream(h, acc_get_cuda_stream(acc_async_sync))
!$ACC data copyin(csrValA, csrRowPtrA, csrColIndA) copy(Z)
!$acc host_data use_device(csrValA, csrRowPtrA, csrColIndA, Z)
status = cusparseDcsr2dense(h, 3, 3, descrA, csrValA, csrRowPtrA, csrColIndA, Z, 3)
!$acc end host_data
!$acc end data
! ----------------------------------------------------------------------------------------
! Conversion from non-symmetrical to symmetrical------------------------------------------
DO M = 2,3
DO I = 1,(M-1)
Z(M,I) = Z(I,M)
END DO
END DO
! ----------------------------------------------------------------------------------------
!$acc data copyin(Z) create(nnzPerRowZ) copyout(nnzPerRowZ)
!$acc host_data use_device(Z, nnzPerRowZ)
status = cusparseDnnz_v2(h, CUSPARSE_DIRECTION_ROW, &
3, 3, descrA, Z, 3, nnzPerRowZ, nna)
!$acc end host_data
!$acc end data
ALLOCATE(csrValZ(nna))
ALLOCATE(csrRowPtrZ(4))
ALLOCATE(csrColIndZ(nna))
!$acc data copy(Z, nnzPerRowZ) create(csrValZ, csrRowPtrZ, csrColIndZ) copyout(csrValZ, csrRowPtrZ, csrColIndZ)
!$acc host_data use_device(Z, nnzPerRowZ, csrValZ, csrRowPtrZ, csrColIndZ)
status = cusparseDdense2csr(h, 3, 3, descrA, Z, 3, &
nnzPerRowZ, csrValZ, csrRowPtrZ, csrColIndZ)
!$acc end host_data
!$acc end data
write(*,*) csrValZ
write(*,*) csrRowPtrZ
write(*,*) csrColIndZ
DO M = 1,3
DO N = 1,3
write(*,*) Z(M,N)
END DO
END DO
! 2nd part of the code-----------------Matrix-Matrix Multiplication
!$acc data copyin(csrValZ, csrRowPtrZ, csrColIndZ) copy(Y,V)
!$acc host_data use_device(csrValZ, csrRowPtrZ, csrColIndZ,Y,V)
status = cusparseCreateDnMat(matY, N, P, N, Y, CUDA_R_64F, CUSPARSE_ORDER_COL)
status = cusparseCreateDnMat(matV, N, P, N, V, CUDA_R_64F, CUSPARSE_ORDER_COL)
status = cusparseCreateCsr(matZ, N, N, nna, csrRowPtrZ, csrColIndZ, csrValZ, &
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, &
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
status = cusparseSpMM_buffersize(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matZ, matY, beta, matV, CUDA_R_64F, CUSPARSE_CSRMM_ALG1, bsize)
IF (bsize .GT. 0) buffer = acc_malloc(bsize)
status = cusparseSpMM(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matZ, matY, beta, matV, CUDA_R_64F, CUSPARSE_CSRMM_ALG1, buffer)
IF (bsize.gt.0) CALL acc_free(buffer)
!$acc end host_data
!$acc end data
write(*,*) V
DEALLOCATE(csrValZ)
DEALLOCATE(csrRowPtrZ)
DEALLOCATE(csrColIndZ)
end program sptoden
The Makefile is as follows:
FC=nvfortran
TIMER=/usr/bin/time
OPT=
NOPT=-fast -Minfo=opt $(OPT)
FCFLAGS = -Mpreprocess -fast -acc=gpu -cuda -cudalib=cusparse
csr: csr.o
$(TIMER) ./csr.o $(STEPS)
csr.o: csr.f90
$(FC) $(FCFLAGS) -o $@ $< $(NOPT) -Minfo=accel -acc
clean:
rm -f *.o *.exe *.s *.mod a.out
I could not figure out why it does not work.
In fact, the transformation from symmetric sparse to general sparse is pointless in the problem. I just explained it what I wanted to do. I suppose that the problem is related to the matrix matrix multiplication.
Thanks
- Yunus