Problems in the use of cusparseSpGEMM in OpenACC

Hello,I tried to call cusparseSpGEMM and I followed this example and wrote a code using openacc (Problems in the use of cusparseSpGEMM in CUDA Fortran).
The following error occurred: Segmentation fault (core dumped)
I don’t know how to solve it. Looking forward to your answers. Thank you very much!
Here is my code:
The computational environment is V100 16GB with CUDA 12.0

program spgemm_text
    use openacc
	use cusparse
	implicit none
	
	type(cusparseHandle) :: handle
	type(cusparseSpMatDescr) :: A_matrix,B_matrix,C_matrix
	type(cusparseSpGEMMDescr) :: SpGEMMDescr
	
	integer(kind=8):: istat,i
	integer(kind=8):: A_num_rows = 4
	integer(kind=8):: A_num_cols = 4
	integer(kind=8):: A_nnz = 9
	integer(kind=8):: B_num_rows = 4 
	integer(kind=8):: B_num_cols = 4
	integer(kind=8):: B_nnz = 8
	integer(kind=8):: C_num_rows = 4 
	integer(kind=8):: C_num_cols = 4
	integer(kind=8):: C_nnz
	
	integer(kind=8), dimension(:), allocatable :: A_csrOffsets ,A_columns
    integer(kind=8), dimension(:), allocatable :: B_csrOffsets ,B_columns
	integer(kind=8), dimension(:), allocatable :: C_csrOffsets ,C_columns
    real(kind=8), dimension(:), allocatable :: A_values, B_values, C_values
	real(kind=8)::alpha=1.0,beta=0.0
	
	integer(kind=8) :: bufferSize1
	integer(1), pointer :: buffer1(:)
	integer(kind=8) :: bufferSize2
	integer(1), pointer :: buffer2(:)
	
	ALLOCATE(A_csrOffsets(A_num_rows + 1), A_columns(A_nnz), A_values(A_nnz))
	ALLOCATE(B_csrOffsets(B_num_rows + 1), B_columns(B_nnz), B_values(B_nnz))
	
	A_csrOffsets = [1, 4, 5, 8, 10]
    A_columns = [1, 3, 4, 2, 1, 3, 4, 2, 4]
    A_values = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
	B_csrOffsets = [1, 3, 5, 8, 10]
    B_columns = [1, 4, 2, 4, 1, 2, 3, 2]
    B_values = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]	
	C_columns=0
	C_csrOffsets=0
	C_values=0.0
	
	!$acc data copyin(A_csrOffsets, B_csrOffsets, A_columns, B_columns, A_values, B_values, C_csrOffsets, C_columns, C_values)
	istat = cusparseCreate(handle)
	istat=cusparseSpGEMM_createDescr(SpGEMMDescr)
		
	istat=cusparseCreateCsr(A_matrix, A_num_rows, A_num_cols, A_nnz, A_csrOffsets, A_columns,A_values,&
	                        CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
	istat=cusparseCreateCsr(B_matrix, B_num_rows, B_num_cols, B_nnz, B_csrOffsets, B_columns,B_values,&
	                        CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
	istat=cusparseCreateCsr(C_matrix, C_num_rows, C_num_cols, 0,& 
	                        null(),null(),null(),&
							!C_csrOffsets, C_columns,C_values,&
                            CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F)
	if(istat/=CUSPARSE_STATUS_SUCCESS) print *, 'cusparseCreateCsr error: ', istat
	nullify(buffer1)
	istat=cusparseSpGEMM_workEstimation(handle,&
                                        CUSPARSE_OPERATION_NON_TRANSPOSE,CUSPARSE_OPERATION_NON_TRANSPOSE,&
                                        alpha,A_matrix,B_matrix,beta,C_matrix,&
                                        CUDA_R_64F,CUSPARSE_SPGEMM_DEFAULT,&
                                        SpGEMMDescr,bufferSize1,buffer1)
										!SpGEMMDescr,bufferSize1,null())
	if(istat/=CUSPARSE_STATUS_SUCCESS) print *, 'cusparseSpGEMM_workEstimation error: ', istat
	ALLOCATE(buffer1(bufferSize1))
	!$acc data create(buffer1)
	istat=cusparseSpGEMM_workEstimation(handle,&
                                        CUSPARSE_OPERATION_NON_TRANSPOSE,CUSPARSE_OPERATION_NON_TRANSPOSE,&
                                        alpha,A_matrix,B_matrix,beta,C_matrix,&
                                        CUDA_R_64F,CUSPARSE_SPGEMM_DEFAULT,&
                                        SpGEMMDescr,bufferSize1,buffer1)
    if(istat/=CUSPARSE_STATUS_SUCCESS) print *, 'cusparseSpGEMM_workEstimation error: ', istat
	nullify(buffer2)
    istat=cusparseSpGEMM_compute(handle,&
                                CUSPARSE_OPERATION_NON_TRANSPOSE,CUSPARSE_OPERATION_NON_TRANSPOSE,&
                                alpha,A_matrix,B_matrix,beta,C_matrix,&
                                CUDA_R_64F,CUSPARSE_SPGEMM_DEFAULT,&
                                SpGEMMDescr,bufferSize2,buffer2)
								!SpGEMMDescr,bufferSize2,null())
	ALLOCATE(buffer2(bufferSize2))
	!$acc data create(buffer2)
    istat=cusparseSpGEMM_compute(handle,&
                                CUSPARSE_OPERATION_NON_TRANSPOSE,CUSPARSE_OPERATION_NON_TRANSPOSE,&
                                alpha,A_matrix,B_matrix,beta,C_matrix,&
                                CUDA_R_64F,CUSPARSE_SPGEMM_DEFAULT,&
                                SpGEMMDescr,bufferSize2,buffer2)
	if(istat/=CUSPARSE_STATUS_SUCCESS) print *, 'cusparseSpGEMM_workEstimation error: ', istat
	
	istat=cusparseSpMatGetSize(C_matrix,C_num_rows,C_num_cols,C_nnz)
    allocate(C_csrOffsets(C_num_rows+1),C_columns(C_nnz),C_values(C_nnz))
	
	istat=cusparseCsrSetPointers(C_matrix, C_csrOffsets, C_columns, C_values)
    istat=cusparseSpGEMM_copy(handle,&
                             CUSPARSE_OPERATION_NON_TRANSPOSE,CUSPARSE_OPERATION_NON_TRANSPOSE,&
                             alpha,A_matrix,B_matrix,beta,C_matrix,&
                             CUDA_R_64F,CUSPARSE_SPGEMM_DEFAULT,SpGEMMDescr)
	
	deallocate(buffer1(bufferSize1))
    deallocate(buffer2(bufferSize2))
	istat=cusparseSpGEMM_destroyDescr(SpGEMMDescr)
    istat=cusparseDestroySpMat(A_matrix)
    istat=cusparseDestroySpMat(B_matrix)
    istat=cusparseDestroySpMat(C_matrix)
    istat=cusparseDestroy(handle)
	
	!$acc update host(C_csrOffsets,C_columns,C_values)
    print*,'C_csrOffsets:',C_csrOffsets
	print*,'C_columns:',C_columns
    print*,'C_values:',C_values
	!$acc end data
	!$acc end data	
    !$acc end data
end program spgemm_text

Hi @lxyonline2887 . Can you check at which step that the segmentation fault happens?

Hi, I’ve tried to inspect this code piecewise. The problem may arise in the initialization of the three arrays defining the matrix C, but because the size of two arrays in the vector C is unknown, it is not clear to me how to allocate their memory on the GPU.

Do you mean these allocations? At this point, the size of C is known.