Does the cusparseSpSV CSR
have any built-in preconditioner? I am attempting to use cusparseSpSV CSR
along with cusparseDcsrilu02, but my code results in NaN. If I do not use cusparseDcsrilu02, I get real values but my code takes much longer. I am trying to convert from using cusparseDcsrsv2_solve and other deprecated functions, but can not find a good guide on how to mesh the new function of cusparseSpSV with a preconditioner.
Can you please report the main API calls that you are using?
Also, cusparseSpSV
and cusparseDcsrsv2_solve
compute the same thing. It could be that one parameter of cusparseSpSV
is not correct or the matrix attributes (cusparseSpMatSetAttribute
) are not set in the right way.
This is the part of my code utilizing the calls. I had a working version in 21.3 with cusparseDcsrsv2_solve, but not sure where I am going wrong in this version.
void load_lusol_cusparse2(int* sparse_offsets, int* sparse_col,
double* sparse_val, int nc, int nzero,
double* dense_Y)
{
// Setup cusparse
cusparseCreate(&cusparseHandle);
// Set up M
cusparseCreateMatDescr(&M_described);
cusparseSetMatIndexBase(M_described, CUSPARSE_INDEX_BASE_ONE);
cusparseSetMatType(M_described, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseCreateCsrilu02Info(&M_analyzed);
// Get algorithm buffer sizes
cusparseDcsrilu02_bufferSize(cusparseHandle, nc, nzero,
M_described, sparse_val, sparse_offsets, sparse_col,
M_analyzed, &Mbuf_size);
// Allocate buffers
cudaMalloc((void**)&Mbuffer, Mbuf_size);
//Analyze M
cusparseDcsrilu02_analysis(cusparseHandle, nc, nzero,
M_described, sparse_val, sparse_offsets, sparse_col,
M_analyzed, M_policy, Mbuffer);
cusparseXcsrilu02_zeroPivot(cusparseHandle,M_analyzed, &struct_zero);
// Create the sparse matrixes
cusparseCreateCsr(&L_SparseA, nc, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
cusparseCreateCsr(&U_SparseA, nc, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
// Create the dense vectors
cusparseCreateDnVec(&Dense_Y, nc,(void*)dense_Y, CUDA_R_64F);
// Initialize the data structures
cusparseSpSV_createDescr(&L_spsv_described);
cusparseSpSV_createDescr(&U_spsv_described);
// Set filling mode types
cusparseFillMode_t L_fill_mode = CUSPARSE_FILL_MODE_LOWER;
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&L_fill_mode, sizeof(L_fill_mode));
cusparseFillMode_t U_fill_mode = CUSPARSE_FILL_MODE_UPPER;
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&U_fill_mode, sizeof(U_fill_mode));
// Set diagonal unit types
cusparseDiagType_t L_type_diag = CUSPARSE_DIAG_TYPE_UNIT;
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&L_type_diag, sizeof(L_type_diag));
cusparseDiagType_t U_type_diag = CUSPARSE_DIAG_TYPE_NON_UNIT;
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&U_type_diag, sizeof(U_type_diag));
// allocate buffers
cusparseSpSV_bufferSize(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described,&L_SpSv_bufferSize);
cusparseSpSV_bufferSize(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described,&U_SpSv_bufferSize);
cudaMalloc(&L_SpSv_Buffer, L_SpSv_bufferSize);
cudaMalloc(&U_SpSv_Buffer, U_SpSv_bufferSize);
// Analyze the Sparse Matrixes
//Analyze L
cusparseSpSV_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described, L_SpSv_Buffer);
//Analyze U
cusparseSpSV_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described, U_SpSv_Buffer);
// Set preconditioner (M=LU)
cusparseDcsrilu02(cusparseHandle, nc, nzero,
M_described, sparse_val, sparse_offsets,
sparse_col, M_analyzed, M_policy, Mbuffer);
cusparseDestroySpMat(L_SparseA);
cusparseDestroySpMat(U_SparseA);
// Create the sparse matrixes
cusparseCreateCsr(&L_SparseA, nr, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
cusparseCreateCsr(&U_SparseA, nr, nc, nzero,
(void*)sparse_offsets, (void*)sparse_col, (void*)sparse_val,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ONE, CUDA_R_64F);
// Set filling mode types
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&L_fill_mode, sizeof(L_fill_mode));
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_FILL_MODE,
&U_fill_mode, sizeof(U_fill_mode));
// Set diagonal unit types
cusparseSpMatSetAttribute(L_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&L_type_diag, sizeof(L_type_diag));
cusparseSpMatSetAttribute(U_SparseA, CUSPARSE_SPMAT_DIAG_TYPE,
&U_type_diag, sizeof(U_type_diag));
cudaDeviceSynchronize();
}
void lusol_cusparse2(double* dense_Y)
{
// dense_Y = values for the dense vector Y and X
// Set values of the dense vector Y
void* temp=(void*)dense_Y;
cusparseDnVecSetValues(DenseY,(void*)dense_Y);
// Forward solve (Lx=y)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described);
// Backward solve (Uy=x)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described);
// Get the values of the dense vector into dense_Y
cusparseDnVecGetValues(DenseY,&temp);
dense_Y = (double*)temp;
cudaDeviceSynchronize();
}
- Miko
The workflow seems correct. Can you please provide a minimal working code and the input matrix? in this way, we can better identify the problem
Thank you for the help!
I was most concerned if my workflow was correct. Since it appears to be, I now have a better idea of where to find my potential error. Between my two versions, I had to make a small change to my main code, which in all likelihood is the error. I had to pass in variables in a slightly different way, and this may be the culprit. I will post back if it is not the case.
- Miko
We did a further investigation. cusparseSpSV
does not support in-place computation d_X == d_Y
(DenseY
in your case).
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseY, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described);
I have updated my code as follows, but I think I narrowed my problem down to not copying or updating dense_Y correctly between my C and Fortran code. I have included my Fortran call code.
C:
void lusol_cusparse2(double* dense_Y)
{
// dense_Y = values for the dense vector Y and X
// Set values of the dense vector Y
cusparseDnVecSetValues(DenseY,(void*)dense_Y);
cusparseDnVecSetValues(DenseX,(void*)dense_Y);
// Forward solve (Lx=y)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, L_SparseA, DenseX, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, L_spsv_described);
void* temp=(void*)dense_Y;
cusparseDnVecGetValues(DenseY,&temp);
cusparseDnVecSetValues(DenseX,temp);
// Backward solve (Uy=x)
cusparseSpSV_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, U_SparseA, DenseX, DenseY, CUDA_R_64F,
CUSPARSE_SPSV_ALG_DEFAULT, U_spsv_described);
// Get the values of the dense vector into dense_Y
cusparseDnVecGetValues(DenseY,&temp);
dense_Y = (double*)temp;
cudaDeviceSynchronize();
}
Fortran:
c#######################################################################
subroutine prec_inv (dense_Y)
c
c-----------------------------------------------------------------------
c
use ....
implicit none
real(r_typ), dimension(N) :: dense_Y
c
c-----------------------------------------------------------------------
c
!$acc host_data use_device(dense_Y)
c
call lusol_cusparse2(C_LOC(dense_Y(1)))
!$acc end host_data
c
return
end
Actually that is not problem. Sorry
- Miko
Hello. I finally found my bug. It was caused by the order of my functions. I had to set the preconditioner (M=LU) before doing any analysis on L and U.My code works fine with d_X==d_Y. If anyone has an issue similar to mine, here is my pseudocode:
void load_lusol_cusparse(int* sparse_offsets, int* sparse_col,
double* sparse_val, int nc, int nzero, double* dense_Y)
{
// Setup cusparse.
// Set up M
// Get algorithm buffer size for M
// Allocate buffer for M
// Analyze M
// Set preconditioner (M=LU)
////////////////////////////////////////////////////////////////////////////////////
//
// Set up L
//
////////////////////////////////////////////////////////////////////////////////////
// Creat the sparse matrix
// Initialize the data structure
// Set filling mode types
// Set diagonal unit types
////////////////////////////////////////////////////////////////////////////////////
//
// Set up U
//
////////////////////////////////////////////////////////////////////////////////////
// Creat the sparse matrix
// Initialize the data structure
// Set filling mode types
// Set diagonal unit types
////////////////////////////////////////////////////////////////////////////////////
// Set up Dense Vector
// Get algorithm buffer sizes for L and U
// Allocate buffers for L and U
// Analyze L
// Analyze U
cudaDeviceSynchronize();
}
void lusol_cusparse(double* dense_Y)
{
cusparseDnVecSetValues(DenseVec,(void*)dense_Y);
// Forward solve (Lx=y)
// Backward solve (Uy=x)
cudaDeviceSynchronize();
}
perfect. Happy that the problem has been fixed