# CuSPARSE MM Multiplication: Preprocess and SPMM_CSR_ALG3 Error

Hi,

It is a matrix matrix multiplication using OpenACC data directives and cuSPARSE libraries. It consists of 3 parts: a subroutine, a main code, and a Makefile.

The main code:

``````PROGRAM MAIN

IMPLICIT NONE

! FORTRAN arrays start at 1

INTEGER N                                      ! The number of rows of Y (the same as the columns of the dense A)
INTEGER P                                      ! The number of columns of Y

INTEGER NNA                                    ! The number of nonzero elements in the matrix

DOUBLE PRECISION, DIMENSION(5,3) :: Y          ! The matrix multipled by the CSR matrix

DOUBLE PRECISION, DIMENSION(11) :: A           ! Vector that holds all nonzero values
INTEGER, DIMENSION(6) :: IA                    ! CSR row indices
INTEGER, DIMENSION(11) :: JA                   ! CSR column indices

N = 5                              ! 1,2,3,4,5
P = 3                              ! 1,2,3

NNA = 11

Y(1,1:P) =  (/ 1.0, 2.0, 3.0 /)
Y(2,1:P) =  (/ 0.0, 1.0, 5.0 /)
Y(3,1:P) =  (/ 1.0, 4.0, 9.0 /)
Y(4,1:P) =  (/ 1.0, 1.0, 0.0 /)
Y(5,1:P) =  (/ 0.0, 2.0, 8.0 /)

A =  (/ 1.0, 4.0, 2.0, 3.0, 5.0, 7.0, 8.0, 9.0, 6.0, 8.0, 3.0/)
IA = (/ 1, 3, 5, 8, 10, 12 /)
JA = (/ 1, 2, 2, 3, 1, 4, 5, 3, 5, 1, 4 /)

CALL CSRSYMMV(N,P,NNA,IA,JA,A,Y)

END PROGRAM
``````

The subroutine:

``````SUBROUTINE CSRSYMMV(N,P,NNA,IA,JA,A,Y)

USE openacc
USE cusparse
!\$ USE OMP_LIB

IMPLICIT NONE

type(cusparseHandle) :: h
type(cusparseMatDescr) :: descrA

integer(8) :: bsize
type(c_devptr) :: buffer
type(cusparseSpMatDescr) :: matA
type(cusparseDnMatDescr) :: matY, matV

INTEGER :: status
REAL(8) :: alpha, beta

INTEGER :: I

INTEGER N
INTEGER P

INTEGER NNA
INTEGER IA(N+1)
INTEGER JA(NNA)
DOUBLE PRECISION A(NNA)

DOUBLE PRECISION Y(N,P)
DOUBLE PRECISION V(N,P)

!initialize CUSPARSE and matrix descriptor
status = cusparseCreate(h)
if (status /= CUSPARSE_STATUS_SUCCESS) &
write(*,*) 'cusparseCreate error: ', status
status = cusparseCreateMatDescr(descrA)
status = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)
status = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE)
status = cusparseSetStream(h, acc_get_cuda_stream(acc_async_sync))

alpha = 1.0
beta  = 0.0

!\$acc data copyin(IA, JA, A, Y) create(V) copyout(V)
!\$acc host_data use_device(A,IA,JA,Y,V)

! -------------------------------------------------------------------------------

status = cusparseCreateDnMat(matY, N, P, N, Y, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status

status = cusparseCreateDnMat(matV, N, P, N, V, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status

! -------------------------------------------------------------------------------

status = cusparseCreateCsr(matA, N, N, NNA, IA, JA, A, &
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, &
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseCreateCsr: ",status

! -------------------------------------------------------------------------------

status = cusparseSpMM_buffersize(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, bsize)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseSpMM_buffersize: ",status

print *,"SpMM buffersize required: ",bsize
IF (bsize .GT. 0) buffer = acc_malloc(bsize)

! -------------------------------------------------------------------------------

status = cusparseSpMM_preprocess(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer)

! -------------------------------------------------------------------------------

status = cusparseSpMM(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseSpMM: ",status

! -------------------------------------------------------------------------------
!status = cusparseDnVecGetValues(vecV, V(:,K))
!IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseDnVecGetValues: ",status

IF (bsize.gt.0) CALL acc_free(buffer)

!\$acc end host_data
!\$acc end data

DO I= 1,3
write(*,'(5(1x,f7.2))') V(:,I)
END DO

END SUBROUTINE CSRSYMMV
``````

The Makefile:

TEST = document
FC = nvfortran
EXE = exe
FCFLAGS = -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse

all: build run verify

\$(FC) \$(FCFLAGS) -o \$(TEST).\$(EXE) \$(DAD).f90 \$<

run: \$(TEST).\$(EXE)
\$(RUN) ./\$(TEST).\$(EXE)

verify:

clean:
@echo ‘Mustafying up…’
@rm -rf *.\$(EXE) *.dwf *.pdb *.mod prof

When I execute the code without the cusparseSpMM_buffersize directive and using the CUSPARSE_CSRMM_ALG1 algorithm, it does not give any error. When I add them, it gives the errors:

nvfortran -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse -o document.exe cudamat.f90 document.f90
cudamat.f90:
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm_buffersize (cudamat.f90: 66)
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm (cudamat.f90: 80)
NVFORTRAN-S-0038-Symbol, cusparse_spmm_csr_alg3, has not been explicitly declared (cudamat.f90)
NVFORTRAN-S-0038-Symbol, cusparsespmm_preprocess, has not been explicitly declared (cudamat.f90)
0 inform, 0 warnings, 4 severes, 0 fatal for csrsymmv

Cannot we use CUSPARSE_SPMM_CSR_ALG# instead of CUSPARSE_CSRMM_ALG# because I faced with the same problem with CUSPARSE_SPMM_CSR_ALG1 in this code?
Also it says cusparseSpMM_preprocess has not been explicitly declared. What should I do?

It looks like you are using some CUDA 11.2 and later features that we don’t have in our cusparse module yet. I will enter a bug to get the Fortran module updated.