Does the cusparseSpSV CSR have any built-in preconditioner? I am attempting to use cusparseSpSV CSR along with cusparseDcsrilu02, but my code results in NaN. If I do not use cusparseDcsrilu02, I get real values but my code takes much longer. I am trying to convert from using cusparseDcsrsv2_solve and other deprecated functions, but can not find a good guide on how to mesh the new function of cusparseSpSV with a preconditioner.
Can you please report the main API calls that you are using?
Also, cusparseSpSV and cusparseDcsrsv2_solve compute the same thing. It could be that one parameter of cusparseSpSV is not correct or the matrix attributes (cusparseSpMatSetAttribute) are not set in the right way.
This is the part of my code utilizing the calls. I had a working version in 21.3 with cusparseDcsrsv2_solve, but not sure where I am going wrong in this version.
void load_lusol_cusparse2(int* sparse_offsets, int* sparse_col,
double* sparse_val, int nc, int nzero,
double* dense_Y)
{
// Setup cusparse
cusparseCreate(&cusparseHandle);
// Set up M
cusparseCreateMatDescr(&M_described);
cusparseSetMatIndexBase(M_described, CUSPARSE_INDEX_BASE_ONE);
cusparseSetMatType(M_described, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseCreateCsrilu02Info(&M_analyzed);
// Get algorithm buffer sizes
cusparseDcsrilu02_bufferSize(cusparseHandle, nc, nzero,
M_described, sparse_val, sparse_offsets, sparse_col,
M_analyzed, &Mbuf_size);
// Allocate buffers
cudaMalloc((void**)&Mbuffer, Mbuf_size);
//Analyze M
cusparseDcsrilu02_analysis(cusparseHandle, nc, nzero,
M_described, sparse_val, sparse_offsets, sparse_col,
M_analyzed, M_policy, Mbuffer);
cusparseXcsrilu02_zeroPivot(cusparseHandle,M_analyzed, &struct_zero);
// Create the sparse matrixes
cusparseCreateCsr(&L_SparseA, nc, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
cusparseCreateCsr(&U_SparseA, nc, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
// Create the dense vectors
cusparseCreateDnVec(&Dense_Y, nc,(void*)dense_Y, CUDA_R_64F);
// Initialize the data structures
cusparseSpSV_createDescr(&L_spsv_described);
cusparseSpSV_createDescr(&U_spsv_described);
// Set filling mode types
cusparseFillMode_t L_fill_mode = CUSPARSE_FILL_MODE_LOWER;
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&L_fill_mode, sizeof(L_fill_mode));
cusparseFillMode_t U_fill_mode = CUSPARSE_FILL_MODE_UPPER;
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&U_fill_mode, sizeof(U_fill_mode));
// Set diagonal unit types
cusparseDiagType_t L_type_diag = CUSPARSE_DIAG_TYPE_UNIT;
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&L_type_diag, sizeof(L_type_diag));
cusparseDiagType_t U_type_diag = CUSPARSE_DIAG_TYPE_NON_UNIT;
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&U_type_diag, sizeof(U_type_diag));
// allocate buffers
cusparseSpSV_bufferSize(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described,&L_SpSv_bufferSize);
cusparseSpSV_bufferSize(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described,&U_SpSv_bufferSize);
cudaMalloc(&L_SpSv_Buffer, L_SpSv_bufferSize);
cudaMalloc(&U_SpSv_Buffer, U_SpSv_bufferSize);
// Analyze the Sparse Matrixes
//Analyze L
cusparseSpSV_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described, L_SpSv_Buffer);
//Analyze U
cusparseSpSV_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described, U_SpSv_Buffer);
// Set preconditioner (M=LU)
cusparseDcsrilu02(cusparseHandle, nc, nzero,
M_described, sparse_val, sparse_offsets,
sparse_col, M_analyzed, M_policy, Mbuffer);
cusparseDestroySpMat(L_SparseA);
cusparseDestroySpMat(U_SparseA);
// Create the sparse matrixes
cusparseCreateCsr(&L_SparseA, nr, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
cusparseCreateCsr(&U_SparseA, nr, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
// Set filling mode types
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&L_fill_mode, sizeof(L_fill_mode));
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&U_fill_mode, sizeof(U_fill_mode));
// Set diagonal unit types
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&L_type_diag, sizeof(L_type_diag));
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&U_type_diag, sizeof(U_type_diag));
cudaDeviceSynchronize();
}
void lusol_cusparse2(double* dense_Y)
{
// dense_Y = values for the dense vector Y and X
// Set values of the dense vector Y
void* temp=(void*)dense_Y;
cusparseDnVecSetValues(DenseY,(void*)dense_Y);
// Forward solve (Lx=y)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described);
// Backward solve (Uy=x)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described);
// Get the values of the dense vector into dense_Y
cusparseDnVecGetValues(DenseY,&temp);
dense_Y = (double*)temp;
cudaDeviceSynchronize();
}
- Miko
The workflow seems correct. Can you please provide a minimal working code and the input matrix? in this way, we can better identify the problem
Thank you for the help!
I was most concerned if my workflow was correct. Since it appears to be, I now have a better idea of where to find my potential error. Between my two versions, I had to make a small change to my main code, which in all likelihood is the error. I had to pass in variables in a slightly different way, and this may be the culprit. I will post back if it is not the case.
- Miko
We did a further investigation. cusparseSpSV does not support in-place computation d_X == d_Y (DenseYin your case).
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described);
I have updated my code as follows, but I think I narrowed my problem down to not copying or updating dense_Y correctly between my C and Fortran code. I have included my Fortran call code.
C:
void lusol_cusparse2(double* dense_Y)
{
// dense_Y = values for the dense vector Y and X
// Set values of the dense vector Y
cusparseDnVecSetValues(DenseY,(void*)dense_Y);
cusparseDnVecSetValues(DenseX,(void*)dense_Y);
// Forward solve (Lx=y)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseX, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described);
void* temp=(void*)dense_Y;
cusparseDnVecGetValues(DenseY,&temp);
cusparseDnVecSetValues(DenseX,temp);
// Backward solve (Uy=x)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseX, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described);
// Get the values of the dense vector into dense_Y
cusparseDnVecGetValues(DenseY,&temp);
dense_Y = (double*)temp;
cudaDeviceSynchronize();
}
Fortran:
c#######################################################################
subroutine prec_inv (dense_Y)
c
c-----------------------------------------------------------------------
c
use ....
implicit none
real(r_typ), dimension(N) :: dense_Y
c
c-----------------------------------------------------------------------
c
!$acc host_data use_device(dense_Y)
c
call lusol_cusparse2(C_LOC(dense_Y(1)))
!$acc end host_data
c
return
end
Actually that is not problem. Sorry
- Miko
Hello. I finally found my bug. It was caused by the order of my functions. I had to set the preconditioner (M=LU) before doing any analysis on L and U.My code works fine with d_X==d_Y. If anyone has an issue similar to mine, here is my pseudocode:
void load_lusol_cusparse(int* sparse_offsets, int* sparse_col,
double* sparse_val, int nc, int nzero, double* dense_Y)
{
// Setup cusparse.
// Set up M
// Get algorithm buffer size for M
// Allocate buffer for M
// Analyze M
// Set preconditioner (M=LU)
////////////////////////////////////////////////////////////////////////////////////
//
// Set up L
//
////////////////////////////////////////////////////////////////////////////////////
// Creat the sparse matrix
// Initialize the data structure
// Set filling mode types
// Set diagonal unit types
////////////////////////////////////////////////////////////////////////////////////
//
// Set up U
//
////////////////////////////////////////////////////////////////////////////////////
// Creat the sparse matrix
// Initialize the data structure
// Set filling mode types
// Set diagonal unit types
////////////////////////////////////////////////////////////////////////////////////
// Set up Dense Vector
// Get algorithm buffer sizes for L and U
// Allocate buffers for L and U
// Analyze L
// Analyze U
cudaDeviceSynchronize();
}
void lusol_cusparse(double* dense_Y)
{
cusparseDnVecSetValues(DenseVec,(void*)dense_Y);
// Forward solve (Lx=y)
// Backward solve (Uy=x)
cudaDeviceSynchronize();
}
perfect. Happy that the problem has been fixed