Problem with cusparseDcsrsv_solve() function

Hi all!

I have tried using cusparseDcsrsv_solve() to for solving matrix-vector equation, with matrix in CSR (Compressed Sparse Row) format. If I set matrix type to be CUSPARSE_MATRIX_TYPE_TRIANGULAR, I get status = 0 (CUSPARSE_STATUS_SUCCESS) and output CSR_X1 as {0.0,0.0,0.0} on calling cusparseDcsrsv_solve(), but the same status becomes 5 (CUSPARSE_STATUS_MAPPING_ERROR I suppose) when I set matrix type to be CUSPARSE_MATRIX_TYPE_GENERAL. Please help me out regarding this.
A simple matrix I have considered is:
3.0 0.0 0.0
4.0 1.0 0.0
0.0 3.0 2.0
The right hand side vector is:
3.0
6.0
10.0

The matrix when converted to CSR format will be:
Value-- 3.0 4.0 1.0 3.0 2.0
Column- 0 0 1 1 2
Ptr---- 0 1 3 5

The following is the code for the same.

void fun1()
{
  --------
  --------
  --------
  int espmat1=5,neq1=3; //espmat1 is equivalent of nnz (number of non-zero elements) in standard defintions
  double csr_MATGLB1[] = {3.0,4.0,1.0,3.0,2.0};
  int csr_colIND1[] = {0,0,1,1,2};
  int csr_rowPTR1[] = {0,1,3,5};
  double csr_X1[neq1];
  double csr_R1[] = {3.0,6.0,10.0};
  
  
  getSolver(neq1,espmat1,csr_MATGLB1,csr_colIND1,csr_rowPTR1,csr_X1,csr_R1,ret_csr_MATGLB1,ret_csr_colIND1,ret_csr_rowPTR1);

  v_print(csr_X1,3,"X1 solved"); 
  --------------
  --------------
}

cudaError_t getSolver(int neq,int espmat,double *matrix,int *index,int *ptr,double *Xvec,double *Rvec,double *retmat,int *retind,int *retptr)
{
	cudaError_t cudaStatus;

	double *dev_csr_MATGLB=NULL,*dev_X=NULL,*dev_RGLB=NULL;
	int *dev_csr_rowPTR=NULL,*dev_csr_colINDEX=NULL;
	

	cudaMalloc(&dev_csr_MATGLB,espmat*sizeof(double));
	cudaMemcpy(dev_csr_MATGLB,matrix,espmat*sizeof(double),cudaMemcpyHostToDevice);
	cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "1---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }


	cudaMalloc(&dev_csr_colINDEX,espmat*sizeof(int));
	cudaMemcpy(dev_csr_colINDEX,index,espmat*sizeof(int),cudaMemcpyHostToDevice);
	cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "2---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }


	cudaMalloc(&dev_csr_rowPTR,(neq+1)*sizeof(int));
	cudaMemcpy(dev_csr_rowPTR,ptr,(neq+1)*sizeof(int),cudaMemcpyHostToDevice);
	cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "3---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }


	cudaMalloc(&dev_X,neq*sizeof(double));


	cudaMalloc(&dev_RGLB,neq*sizeof(double));
	cudaMemcpy(dev_RGLB,Rvec,neq*sizeof(double),cudaMemcpyHostToDevice);
	cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "4---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }

	printf("\n****************SUCCESS Solver part Initialised**************************\n");


	cusparseStatus_t status;
        cusparseHandle_t handle=0;
        cusparseMatDescr_t descr=0;
	cusparseSolveAnalysisInfo_t info=0;
	cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE;
	double alpha = 1.00;

	status= cusparseCreate(&handle); 
	if (status != CUSPARSE_STATUS_SUCCESS) 
	{ 
		printf("CUSPARSE Library initialization failed\n");  
	} 
	
	// create and setup matrix descriptor 
	status= cusparseCreateMatDescr(&descr); 
	if (status != CUSPARSE_STATUS_SUCCESS) 
	{ 
		printf("Matrix descriptor initialization failed\n"); 
	} 
	
	status = cusparseCreateSolveAnalysisInfo(&info);
	

	if (status != CUSPARSE_STATUS_SUCCESS) 
	{ 
		printf("Solve analysis info initialization failed\n");  
	} 

	cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_TRIANGULAR); 
	cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); 
	

	//SOLVER
	status = cusparseDcsrsv_solve(handle,trans,neq,&alpha,descr,dev_csr_MATGLB,dev_csr_rowPTR,dev_csr_colINDEX,info,dev_RGLB,dev_X); 
	printf("Solve status: %d\n",status);
	

	cudaMemcpy(Xvec,dev_X,neq*sizeof(double),cudaMemcpyDeviceToHost);
	cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "5---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }
	

	printf("\n****************SUCCESS Solver part Solved**************************\n");
	

	cudaStatus = cudaDeviceSynchronize();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel!\n", cudaStatus);
        return cudaStatus;
        } 
	

	cudaFree(dev_csr_MATGLB);
	cudaFree(dev_X);
	cudaFree(dev_RGLB);
	cudaFree(dev_csr_rowPTR);
	cudaFree(dev_csr_colINDEX);


}

Thanks!

Your code is missing a proper analysis phase.

Please read the cusparse documentation:

http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrsvsolve

“This function performs the solve phase of the solution of a sparse triangular linear system”

and

“info structure with information collected during the analysis phase (that should have been passed to the solve phase unchanged).”

Here is a complete code with that issue fixed:

$ cat t852.cu
#include <stdio.h>
#include <cusparse.h>

cudaError_t getSolver(int neq,int espmat,double *matrix,int *index,int *ptr,double *Xvec,double *Rvec,double *retmat,int *retind,int *retptr)
{
        cudaError_t cudaStatus;

        double *dev_csr_MATGLB=NULL,*dev_X=NULL,*dev_RGLB=NULL;
        int *dev_csr_rowPTR=NULL,*dev_csr_colINDEX=NULL;

cudaMalloc(&dev_csr_MATGLB,espmat*sizeof(double));
        cudaMemcpy(dev_csr_MATGLB,matrix,espmat*sizeof(double),cudaMemcpyHostToDevice);
        cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "1---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }

cudaMalloc(&dev_csr_colINDEX,espmat*sizeof(int));
        cudaMemcpy(dev_csr_colINDEX,index,espmat*sizeof(int),cudaMemcpyHostToDevice);
        cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "2---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }

cudaMalloc(&dev_csr_rowPTR,(neq+1)*sizeof(int));
        cudaMemcpy(dev_csr_rowPTR,ptr,(neq+1)*sizeof(int),cudaMemcpyHostToDevice);
        cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "3---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }

cudaMalloc(&dev_X,neq*sizeof(double));

cudaMalloc(&dev_RGLB,neq*sizeof(double));
        cudaMemcpy(dev_RGLB,Rvec,neq*sizeof(double),cudaMemcpyHostToDevice);
        cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "4---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }

        printf("\n****************SUCCESS Solver part Initialised**************************\n");

cusparseStatus_t status;
        cusparseHandle_t handle=0;
        cusparseMatDescr_t descr=0;
        cusparseSolveAnalysisInfo_t info=0;
        cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE;
        double alpha = 1.00;

        status= cusparseCreate(&handle);
        if (status != CUSPARSE_STATUS_SUCCESS)
        {
                printf("CUSPARSE Library initialization failed\n");
        }

        // create and setup matrix descriptor
        status= cusparseCreateMatDescr(&descr);
        if (status != CUSPARSE_STATUS_SUCCESS)
        {
                printf("Matrix descriptor initialization failed\n");
        }

        status = cusparseCreateSolveAnalysisInfo(&info);

if (status != CUSPARSE_STATUS_SUCCESS)
        {
                printf("Solve analysis info initialization failed\n");
        }

        cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
        //ANALYSIS

        status = cusparseDcsrsv_analysis(handle, trans, neq, espmat, descr, dev_csr_MATGLB, dev_csr_rowPTR, dev_csr_colINDEX, info);
        printf("Analysis status: %d\n",status);

        //SOLVER
        status = cusparseDcsrsv_solve(handle,trans,neq,&alpha,descr,dev_csr_MATGLB,dev_csr_rowPTR,dev_csr_colINDEX,info,dev_RGLB,dev_X);
        printf("Solve status: %d\n",status);

cudaMemcpy(Xvec,dev_X,neq*sizeof(double),cudaMemcpyDeviceToHost);
        cudaStatus = cudaGetLastError();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "5---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        return cudaStatus;
        }

printf("\n****************SUCCESS Solver part Solved**************************\n");

cudaStatus = cudaDeviceSynchronize();
        if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel!\n", cudaStatus);
        return cudaStatus;
        }

cudaFree(dev_csr_MATGLB);
        cudaFree(dev_X);
        cudaFree(dev_RGLB);
        cudaFree(dev_csr_rowPTR);
        cudaFree(dev_csr_colINDEX);

        return cudaStatus;
}

void fun1()
{
  int espmat1=5,neq1=3; //espmat1 is equivalent of nnz (number of non-zero elements) in standard defintions
  double csr_MATGLB1[] = {3.0,4.0,1.0,3.0,2.0};
  int csr_colIND1[] = {0,0,1,1,2};
  int csr_rowPTR1[] = {0,1,3,5};
  double csr_X1[neq1];
  double csr_R1[] = {3.0,6.0,10.0};
  double ret_csr_MATGLB1[] = {3.0,4.0,1.0,3.0,2.0};
  int ret_csr_colIND1[] = {0,0,1,1,2};
  int ret_csr_rowPTR1[] = {0,1,3,5};

getSolver(neq1,espmat1,csr_MATGLB1,csr_colIND1,csr_rowPTR1,csr_X1,csr_R1,ret_csr_MATGLB1,ret_csr_colIND1,ret_csr_rowPTR1);

  // printf(csr_X1,3,"X1 solved");
}

int main(){

  fun1();
}
$ nvcc -o t852 t852.cu -lcusparse
$ ./t852

****************SUCCESS Solver part Initialised**************************
Analysis status: 0
Solve status: 0

****************SUCCESS Solver part Solved**************************
$

Note that after the analysis phase, if the matrix is not changed, the solve phase may be repeated with different RHS vectors, without rerunning the analysis phase. Apparently the default info provided by cusparseCreateSolveAnalysisInfo in this case is sufficient when using CUSPARSE_MATRIX_TYPE_TRIANGULAR but not sufficient when using CUSPARSE_MATRIX_TYPE_GENERAL. Nevertheless, even if it appears to work for this particular example when using CUSPARSE_MATRIX_TYPE_TRIANGULAR, the analysis phase should not be skipped.

Thank you very much for fixing the issue.