CuSPARSE MM Multiplication: Preprocess and SPMM_CSR_ALG3 Error

yunus.altintop.2 · August 13, 2021, 7:24pm

Hi,

It is a matrix matrix multiplication using OpenACC data directives and cuSPARSE libraries. It consists of 3 parts: a subroutine, a main code, and a Makefile.

The main code:

PROGRAM MAIN

IMPLICIT NONE

! FORTRAN arrays start at 1

INTEGER N                                      ! The number of rows of Y (the same as the columns of the dense A)
INTEGER P                                      ! The number of columns of Y

INTEGER NNA                                    ! The number of nonzero elements in the matrix	

DOUBLE PRECISION, DIMENSION(5,3) :: Y          ! The matrix multipled by the CSR matrix

DOUBLE PRECISION, DIMENSION(11) :: A           ! Vector that holds all nonzero values
INTEGER, DIMENSION(6) :: IA                    ! CSR row indices
INTEGER, DIMENSION(11) :: JA                   ! CSR column indices

N = 5                              ! 1,2,3,4,5
P = 3                              ! 1,2,3

NNA = 11     

Y(1,1:P) =  (/ 1.0, 2.0, 3.0 /)
Y(2,1:P) =  (/ 0.0, 1.0, 5.0 /)
Y(3,1:P) =  (/ 1.0, 4.0, 9.0 /)
Y(4,1:P) =  (/ 1.0, 1.0, 0.0 /)
Y(5,1:P) =  (/ 0.0, 2.0, 8.0 /)

A =  (/ 1.0, 4.0, 2.0, 3.0, 5.0, 7.0, 8.0, 9.0, 6.0, 8.0, 3.0/)
IA = (/ 1, 3, 5, 8, 10, 12 /)
JA = (/ 1, 2, 2, 3, 1, 4, 5, 3, 5, 1, 4 /)


CALL CSRSYMMV(N,P,NNA,IA,JA,A,Y)


END PROGRAM

The subroutine:

SUBROUTINE CSRSYMMV(N,P,NNA,IA,JA,A,Y)

USE openacc
USE cusparse
!$ USE OMP_LIB

IMPLICIT NONE

type(cusparseHandle) :: h
type(cusparseMatDescr) :: descrA

integer(8) :: bsize
type(c_devptr) :: buffer
type(cusparseSpMatDescr) :: matA
type(cusparseDnMatDescr) :: matY, matV

INTEGER :: status
REAL(8) :: alpha, beta

INTEGER :: I

INTEGER N
INTEGER P

INTEGER NNA
INTEGER IA(N+1)
INTEGER JA(NNA)
DOUBLE PRECISION A(NNA)

DOUBLE PRECISION Y(N,P)
DOUBLE PRECISION V(N,P)


!initialize CUSPARSE and matrix descriptor
status = cusparseCreate(h)
if (status /= CUSPARSE_STATUS_SUCCESS) &
  write(*,*) 'cusparseCreate error: ', status
status = cusparseCreateMatDescr(descrA)
status = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)
status = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE)
status = cusparseSetStream(h, acc_get_cuda_stream(acc_async_sync))

alpha = 1.0
beta  = 0.0	

!$acc data copyin(IA, JA, A, Y) create(V) copyout(V)
!$acc host_data use_device(A,IA,JA,Y,V)
  
! -------------------------------------------------------------------------------
  
status = cusparseCreateDnMat(matY, N, P, N, Y, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status
  
status = cusparseCreateDnMat(matV, N, P, N, V, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status
  
! -------------------------------------------------------------------------------
  
status = cusparseCreateCsr(matA, N, N, NNA, IA, JA, A, &
                   CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, &
                   CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseCreateCsr: ",status
  
! -------------------------------------------------------------------------------
  
status = cusparseSpMM_buffersize(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
                 alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, bsize)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseSpMM_buffersize: ",status
  
print *,"SpMM buffersize required: ",bsize  
IF (bsize .GT. 0) buffer = acc_malloc(bsize)
  
! -------------------------------------------------------------------------------
  
status = cusparseSpMM_preprocess(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
                 alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer)
  
! -------------------------------------------------------------------------------

status = cusparseSpMM(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
                 alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer) 
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseSpMM: ",status	  
  
! -------------------------------------------------------------------------------
!status = cusparseDnVecGetValues(vecV, V(:,K))
!IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseDnVecGetValues: ",status
  
IF (bsize.gt.0) CALL acc_free(buffer)

!$acc end host_data
!$acc end data	

DO I= 1,3
  write(*,'(5(1x,f7.2))') V(:,I)
END DO

END SUBROUTINE CSRSYMMV

The Makefile:

################################################################################

Copyright (c) 2017, NVIDIA Corporation. All rights reserved.

Please refer to the NVIDIA end user license agreement (EULA) associated

with this source code for terms and conditions that govern your use of

this software. Any use, reproduction, disclosure, or distribution of

this software and related documentation outside the terms of the EULA

is strictly prohibited.

################################################################################

TEST = document
DAD = cudamat
FC = nvfortran
EXE = exe
FCFLAGS = -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse

all: build run verify

build: $(TEST).f90 $(DAD).f90
$(FC) $(FCFLAGS) -o $(TEST).$(EXE) $(DAD).f90 $<

run: $(TEST).$(EXE)
$(RUN) ./$(TEST).$(EXE)

verify:

clean:
@echo ‘Mustafying up…’
@rm -rf *.$(EXE) *.dwf *.pdb *.mod prof

When I execute the code without the cusparseSpMM_buffersize directive and using the CUSPARSE_CSRMM_ALG1 algorithm, it does not give any error. When I add them, it gives the errors:

nvfortran -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse -o document.exe cudamat.f90 document.f90
cudamat.f90:
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm_buffersize (cudamat.f90: 66)
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm (cudamat.f90: 80)
NVFORTRAN-S-0038-Symbol, cusparse_spmm_csr_alg3, has not been explicitly declared (cudamat.f90)
NVFORTRAN-S-0038-Symbol, cusparsespmm_preprocess, has not been explicitly declared (cudamat.f90)
0 inform, 0 warnings, 4 severes, 0 fatal for csrsymmv

Cannot we use CUSPARSE_SPMM_CSR_ALG# instead of CUSPARSE_CSRMM_ALG# because I faced with the same problem with CUSPARSE_SPMM_CSR_ALG1 in this code?
Also it says cusparseSpMM_preprocess has not been explicitly declared. What should I do?

bleback · August 17, 2021, 5:04pm

It looks like you are using some CUDA 11.2 and later features that we don’t have in our cusparse module yet. I will enter a bug to get the Fortran module updated.

Topic		Replies	Views
cuSPARSE Library with OpenACC data Directives: cusparseDnVecGetValues not resolvable Legacy PGI Compilers cuda	6	812	October 11, 2021
cuSPARSE generic procedure could not be resolved NVFORTRAN-S-0155 nvc, nvc++ and nvfortran cuda	9	811	November 22, 2021
Fortran CUSPARSE Bindings CUDA Programming and Performance	1	1974	May 22, 2012
cuSPARSE mixed precision throws error GPU-Accelerated Libraries cusparse	2	40	July 31, 2024
Problem of two large sparse matrices multiplication in cuParse CUDA Programming and Performance	6	3692	November 21, 2016
CUSPARSE_STATUS_INVALID_VALUE when using cusparseSpMM GPU-Accelerated Libraries	3	2290	July 14, 2019
cusparseScsrmv transpose mode is not working CUDA Programming and Performance	17	1491	July 9, 2018
Using cusparseDgtsv2_nopivot() with OpenACC in Fortran code nvc, nvc++ and nvfortran cuda	6	422	November 13, 2023
Some questions for the function "cusparseDcsrsv2_solve" nvc, nvc++ and nvfortran cuda	12	55	August 9, 2024
Problems in the use of cusparseSpGEMM in CUDA Fortran GPU-Accelerated Libraries cusparse	16	1388	October 25, 2023