Hi,
It is a matrix matrix multiplication using OpenACC data directives and cuSPARSE libraries. It consists of 3 parts: a subroutine, a main code, and a Makefile.
The main code:
PROGRAM MAIN
IMPLICIT NONE
! FORTRAN arrays start at 1
INTEGER N ! The number of rows of Y (the same as the columns of the dense A)
INTEGER P ! The number of columns of Y
INTEGER NNA ! The number of nonzero elements in the matrix
DOUBLE PRECISION, DIMENSION(5,3) :: Y ! The matrix multipled by the CSR matrix
DOUBLE PRECISION, DIMENSION(11) :: A ! Vector that holds all nonzero values
INTEGER, DIMENSION(6) :: IA ! CSR row indices
INTEGER, DIMENSION(11) :: JA ! CSR column indices
N = 5 ! 1,2,3,4,5
P = 3 ! 1,2,3
NNA = 11
Y(1,1:P) = (/ 1.0, 2.0, 3.0 /)
Y(2,1:P) = (/ 0.0, 1.0, 5.0 /)
Y(3,1:P) = (/ 1.0, 4.0, 9.0 /)
Y(4,1:P) = (/ 1.0, 1.0, 0.0 /)
Y(5,1:P) = (/ 0.0, 2.0, 8.0 /)
A = (/ 1.0, 4.0, 2.0, 3.0, 5.0, 7.0, 8.0, 9.0, 6.0, 8.0, 3.0/)
IA = (/ 1, 3, 5, 8, 10, 12 /)
JA = (/ 1, 2, 2, 3, 1, 4, 5, 3, 5, 1, 4 /)
CALL CSRSYMMV(N,P,NNA,IA,JA,A,Y)
END PROGRAM
The subroutine:
SUBROUTINE CSRSYMMV(N,P,NNA,IA,JA,A,Y)
USE openacc
USE cusparse
!$ USE OMP_LIB
IMPLICIT NONE
type(cusparseHandle) :: h
type(cusparseMatDescr) :: descrA
integer(8) :: bsize
type(c_devptr) :: buffer
type(cusparseSpMatDescr) :: matA
type(cusparseDnMatDescr) :: matY, matV
INTEGER :: status
REAL(8) :: alpha, beta
INTEGER :: I
INTEGER N
INTEGER P
INTEGER NNA
INTEGER IA(N+1)
INTEGER JA(NNA)
DOUBLE PRECISION A(NNA)
DOUBLE PRECISION Y(N,P)
DOUBLE PRECISION V(N,P)
!initialize CUSPARSE and matrix descriptor
status = cusparseCreate(h)
if (status /= CUSPARSE_STATUS_SUCCESS) &
write(*,*) 'cusparseCreate error: ', status
status = cusparseCreateMatDescr(descrA)
status = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)
status = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE)
status = cusparseSetStream(h, acc_get_cuda_stream(acc_async_sync))
alpha = 1.0
beta = 0.0
!$acc data copyin(IA, JA, A, Y) create(V) copyout(V)
!$acc host_data use_device(A,IA,JA,Y,V)
! -------------------------------------------------------------------------------
status = cusparseCreateDnMat(matY, N, P, N, Y, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status
status = cusparseCreateDnMat(matV, N, P, N, V, CUDA_R_64F, CUSPARSE_ORDER_COL)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseCreateDnMat: ",status
! -------------------------------------------------------------------------------
status = cusparseCreateCsr(matA, N, N, NNA, IA, JA, A, &
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, &
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseCreateCsr: ",status
! -------------------------------------------------------------------------------
status = cusparseSpMM_buffersize(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, bsize)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) print *,"cusparseSpMM_buffersize: ",status
print *,"SpMM buffersize required: ",bsize
IF (bsize .GT. 0) buffer = acc_malloc(bsize)
! -------------------------------------------------------------------------------
status = cusparseSpMM_preprocess(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer)
! -------------------------------------------------------------------------------
status = cusparseSpMM(h, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, &
alpha, matA, matY, beta, matV, CUDA_R_64F, CUSPARSE_SPMM_CSR_ALG3, buffer)
IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseSpMM: ",status
! -------------------------------------------------------------------------------
!status = cusparseDnVecGetValues(vecV, V(:,K))
!IF (status.ne.CUSPARSE_STATUS_SUCCESS) PRINT *,"cusparseDnVecGetValues: ",status
IF (bsize.gt.0) CALL acc_free(buffer)
!$acc end host_data
!$acc end data
DO I= 1,3
write(*,'(5(1x,f7.2))') V(:,I)
END DO
END SUBROUTINE CSRSYMMV
The Makefile:
################################################################################
Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
Please refer to the NVIDIA end user license agreement (EULA) associated
with this source code for terms and conditions that govern your use of
this software. Any use, reproduction, disclosure, or distribution of
this software and related documentation outside the terms of the EULA
is strictly prohibited.
################################################################################
TEST = document
DAD = cudamat
FC = nvfortran
EXE = exe
FCFLAGS = -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse
all: build run verify
build: $(TEST).f90 $(DAD).f90
$(FC) $(FCFLAGS) -o $(TEST).$(EXE) $(DAD).f90 $<
run: $(TEST).$(EXE)
$(RUN) ./$(TEST).$(EXE)
verify:
clean:
@echo ‘Mustafying up…’
@rm -rf *.$(EXE) *.dwf *.pdb *.mod prof
When I execute the code without the cusparseSpMM_buffersize directive and using the CUSPARSE_CSRMM_ALG1 algorithm, it does not give any error. When I add them, it gives the errors:
nvfortran -Mpreprocess -fast -acc=gpu -cuda -Mcudalib=cusparse -o document.exe cudamat.f90 document.f90
cudamat.f90:
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm_buffersize (cudamat.f90: 66)
NVFORTRAN-S-0155-Could not resolve generic procedure cusparsespmm (cudamat.f90: 80)
NVFORTRAN-S-0038-Symbol, cusparse_spmm_csr_alg3, has not been explicitly declared (cudamat.f90)
NVFORTRAN-S-0038-Symbol, cusparsespmm_preprocess, has not been explicitly declared (cudamat.f90)
0 inform, 0 warnings, 4 severes, 0 fatal for csrsymmv
Cannot we use CUSPARSE_SPMM_CSR_ALG# instead of CUSPARSE_CSRMM_ALG# because I faced with the same problem with CUSPARSE_SPMM_CSR_ALG1 in this code?
Also it says cusparseSpMM_preprocess has not been explicitly declared. What should I do?