As the cusparse function was correct, I focused and checked again my matrix and I fixed a bug so now the incomplete cholesky looks correct (no more indefinite values).
Also, in the documentation it is written that the dev_rowPtrR has m+1 which contains the beginning of all row and the end of the last one+1. My pointer contained only the beginning of each row, I changed it to have the last element.
But I still have some problem with my residual. I will check it again.
This is the rest of the conjugate gradient (I didn’t follow exactly what Maxim Naumov did):
// create the info and analyse the lower and upper triangular factors
cusparseSolveAnalysisInfo_t inforRt = 0;
cusparseSolveAnalysisInfo_t inforR = 0;
cusparseCreateSolveAnalysisInfo(&inforRt ) ;
cusparseCreateSolveAnalysisInfo(&inforR ) ;
cusparseStatus=cusparseScsrsv_analysis(cusparseHandle , CUSPARSE_OPERATION_TRANSPOSE ,N , nnz_Symmetric, descrR , dev_valR , dev_row_ptrR , dev_colIndR , inforRt);
if(cusparseStatus!=CUSPARSE_STATUS_SUCCESS)
fprintf(stderr, "cusparseScsrsv_analysis inforRt returned error code %d !", cusparseStatus);
cusparseStatus=cusparseScsrsv_analysis(cusparseHandle , CUSPARSE_OPERATION_NON_TRANSPOSE ,N , nnz_Symmetric, descrR , dev_valR , dev_row_ptrR , dev_colIndR , inforR);
if(cusparseStatus!=CUSPARSE_STATUS_SUCCESS)
fprintf(stderr, "cusparseScsrsv_analysis inforR returned error code %d !", cusparseStatus);
// Ax = A*x
cusparseStatus = cusparseScsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nnz, &alpha, descr, dev_val, dev_row_ptr, dev_colInd, dev_alphaLaplacian, &beta, dev_Ax);
// r= r - Ax
cublasStatus = cublasSaxpy(cublasHandle, N, &alpham1, dev_Ax, 1, d_r, 1);
// z_{0} = C^{-1}*r_{0}
cusparseScsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE , N, &alpha, descrR, dev_valR, dev_row_ptrR, dev_colIndR, inforRt, d_r, d_y);
cusparseScsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE , N, &alpha, descrR, dev_valR, dev_row_ptrR, dev_colIndR, inforR, d_y, dev_z);
cublasStatus = cublasSdot(cublasHandle, N, d_r, 1, dev_z, 1, &r1);
k = 1;
printf("residual = %e", sqrt(r1));
while (r1 > tol*tol && k <= 20 )//max_iter)
{
if (k > 1) {
//b_{i} = (r_{i+1}, z_{i+1}/(r_{i} , z_{i})
b = r1 / r0;
// p_{i+1}= z_{i+1} + b_{i}* p_{i}
cublasStatus = cublasSscal(cublasHandle, N, &b, dev_p, 1);
cublasStatus = cublasSaxpy(cublasHandle, N, &alpha, dev_z, 1, dev_p, 1);
} else {
// p = z
cublasStatus = cublasScopy(cublasHandle, N, dev_z, 1, dev_p, 1);
}
// Ax_{i} = A*p^{T}_{i}
cusparseScsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nnz, &alpha, descr, dev_val, dev_row_ptr, dev_colInd, dev_p, &beta, dev_Ax);
// Ax_{i} = p_{i}*Ax_{i}
cublasStatus = cublasSdot(cublasHandle, N, dev_p, 1, dev_Ax, 1, &dot);
//a = (r_{i} , z_{i})/(Ap_{i} , p_{i})
a = r1 / dot;
// x_{i+1}= x_{i} + a*p_{i}
cublasStatus = cublasSaxpy(cublasHandle, N, &a, dev_p, 1, dev_alphaLaplacian, 1);
na = -a;
// r_{i+1} = r_{i} - a *Ax_{i}
cublasStatus = cublasSaxpy(cublasHandle, N, &na, dev_Ax, 1, d_r, 1);
r0 = r1;
// z_{i+1} = C^{-1}*r_{i+1}
cusparseScsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE , N, &alpha, descrR, dev_valR, dev_row_ptrR, dev_colIndR,inforRt, d_r, d_y);
cusparseScsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE , N, &alpha, descrR, dev_valR, dev_row_ptrR, dev_colIndR, inforR, d_y, dev_z);
// r1_{i+1} = r_{i+1} * z_{i+1}
cublasStatus = cublasSdot(cublasHandle, N, d_r, 1, dev_z, 1, &r1);
printf("iteration = %3d, residual = %e", k, sqrt(r1));
k++;
}
The matrix (dev_val, dev_row_ptr, dev_colInd) is the same matrix than dev_valR but containing all data. I will try the method of maxim Naumov, maybe it will resolve my problem.
EDIT:
My residual r1 is < 0, so I guess my matrix R is still not correct.