CuSPARSE MM Multiplication: Preprocess and SPMM_CSR_ALG3 Error

Hi,

It is a matrix matrix multiplication using OpenACC data directives and cuSPARSE libraries. It consists of 3 parts: a subroutine, a main code, and a Makefile.

The main code:

PROGRAM MAIN

IMPLICIT NONE

! FORTRAN arrays start at 1

INTEGER N                                      ! The number of rows of Y (the same as the columns of the dense A)
INTEGER P                                      ! The number of columns of Y

INTEGER NNA                                    ! The number of nonzero elements in the matrix	

DOUBLE PRECISION, DIMENSION(5,3) :: Y          ! The matrix multipled by the CSR matrix

DOUBLE PRECISION, DIMENSION(11) :: A           ! Vector that holds all nonzero values
INTEGER, DIMENSION(6) :: IA                    ! CSR row indices
INTEGER, DIMENSION(11) :: JA                   ! CSR column indices

N = 5                              ! 1,2,3,4,5
P = 3                              ! 1,2,3

NNA = 11     

Y(1,1:P) =  (/ 1.0, 2.0, 3.0 /)
Y(2,1:P) =  (/ 0.0, 1.0, 5.0 /)
Y(3,1:P) =  (/ 1.0, 4.0, 9.0 /)
Y(4,1:P) =  (/ 1.0, 1.0, 0.0 /)
Y(5,1:P) =  (/ 0.0, 2.0, 8.0 /)

A =  (/ 1.0, 4.0, 2.0, 3.0, 5.0, 7.0, 8.0, 9.0, 6.0, 8.0, 3.0/)
IA = (/ 1, 3, 5, 8, 10, 12 /)
JA = (/ 1, 2, 2, 3, 1, 4, 5, 3, 5, 1, 4 /)


CALL CSRSYMMV(N,P,NNA,IA,JA,A,Y)


END PROGRAM

The subroutine:

SUBROUTINE CSRSYMMV(N,P,NNA,IA,JA,A,Y)

USE openacc
USE cusparse
!$ USE OMP_LIB

IMPLICIT NONE

type(cusparseHandle) :: h
type(cusparseMatDescr) :: descrA

integer(8) :: bsize
type(c_devptr) :: buffer
type(cusparseSpMatDescr) :: matA
type(cusparseDnMatDescr) :: matY, matV

INTEGER :: status
REAL(8) :: alpha, beta

INTEGER :: I

INTEGER N
INTEGER P

INTEGER NNA
INTEGER IA(N+1)
INTEGER JA(NNA)
DOUBLE PRECISION A(NNA)

DOUBLE PRECISION Y(N,P)
DOUBLE PRECISION V(N,P)


!initialize CUSPARSE and matrix descriptor
status = cusparseCreate(h)
if (status /= CUSPARSE_STATUS_SUCCESS) &
  write(*,*) 'cusparseCreate error: ', status
status = cusparseCreateMatDescr(descrA)
status = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)
status = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE)
status = cusparseSetStream(h, acc_get_cuda_stream(acc_async_sync))

alpha = 1.0
beta  = 0.0	

!$acc data copyin(IA, JA, A, Y) create(V) copyout(V)
!$acc host_data use_device(A,IA,JA,Y,V)
  
! -------------------------------------------------------------------------------
  
status = cusparseCreateDnMat(matY, N, P, N, Y, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status
  
status = cusparseCreateDnMat(matV, N, P, N, V, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status
  
! -------------------------------------------------------------------------------
  
status = cusparseCreateCsr(matA, N, N, NNA, IA, JA, A, &
                   CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, &
                   CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseCreateCsr: ",status
  
! -------------------------------------------------------------------------------
  
status = cusparseSpMM_buffersize(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
                 alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, bsize)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseSpMM_buffersize: ",status
  
print *,"SpMM buffersize required: ",bsize  
IF (bsize .GT. 0) buffer = acc_malloc(bsize)
  
! -------------------------------------------------------------------------------
  
status = cusparseSpMM_preprocess(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
                 alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer)
  
! -------------------------------------------------------------------------------

status = cusparseSpMM(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
                 alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer) 
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseSpMM: ",status	  
  
! -------------------------------------------------------------------------------
!status = cusparseDnVecGetValues(vecV, V(:,K))
!IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseDnVecGetValues: ",status
  
IF (bsize.gt.0) CALL acc_free(buffer)

!$acc end host_data
!$acc end data	

DO I= 1,3
  write(*,'(5(1x,f7.2))') V(:,I)
END DO

END SUBROUTINE CSRSYMMV

The Makefile:

################################################################################

Copyright (c) 2017, NVIDIA Corporation. All rights reserved.

Please refer to the NVIDIA end user license agreement (EULA) associated

with this source code for terms and conditions that govern your use of

this software. Any use, reproduction, disclosure, or distribution of

this software and related documentation outside the terms of the EULA

is strictly prohibited.

################################################################################

TEST = document
DAD = cudamat
FC = nvfortran
EXE = exe
FCFLAGS = -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse

all: build run verify

build: $(TEST).f90 $(DAD).f90
$(FC) $(FCFLAGS) -o $(TEST).$(EXE) $(DAD).f90 $<

run: $(TEST).$(EXE)
$(RUN) ./$(TEST).$(EXE)

verify:

clean:
@echo ‘Mustafying up…’
@rm -rf *.$(EXE) *.dwf *.pdb *.mod prof

When I execute the code without the cusparseSpMM_buffersize directive and using the CUSPARSE_CSRMM_ALG1 algorithm, it does not give any error. When I add them, it gives the errors:

nvfortran -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse -o document.exe cudamat.f90 document.f90
cudamat.f90:
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm_buffersize (cudamat.f90: 66)
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm (cudamat.f90: 80)
NVFORTRAN-S-0038-Symbol, cusparse_spmm_csr_alg3, has not been explicitly declared (cudamat.f90)
NVFORTRAN-S-0038-Symbol, cusparsespmm_preprocess, has not been explicitly declared (cudamat.f90)
0 inform, 0 warnings, 4 severes, 0 fatal for csrsymmv

Cannot we use CUSPARSE_SPMM_CSR_ALG# instead of CUSPARSE_CSRMM_ALG# because I faced with the same problem with CUSPARSE_SPMM_CSR_ALG1 in this code?
Also it says cusparseSpMM_preprocess has not been explicitly declared. What should I do?

It looks like you are using some CUDA 11.2 and later features that we don’t have in our cusparse module yet. I will enter a bug to get the Fortran module updated.