In my research, I need to use SSOR as the preconditioner for the PCG method. I created a CSR matrix, in which the L and the U parts contain the SSOR information. I actually use two SpMV routines for solving the preconditioned system.
I created the L mat in the following manner:
// A struct encapsulated the information for a CSR matrix, and csrMat is the original linear operator.
spMat<T> L{nullptr, csrMat.rowOffsetsPtr, csrMat.colIndPtr, nullptr};
// Copy the values of SSOR matrix from the host.
CHECK_CUDA_ERROR(cudaMalloc(reinterpret_cast<void **>(&L.valuesPtr), nnz * sizeof(T)));
CHECK_CUDA_ERROR(cudaMemcpy(reinterpret_cast<void *>(L.valuesPtr), reinterpret_cast<void *>(ssorValues), nnz * sizeof(T), cudaMemcpyHostToDevice));
// Create the mat.
CHECK_CUDA_ERROR(cusparseCreateCsr(&L.descr, static_cast<int64_t>(size), static_cast<int64_t>(size), static_cast<int64_t>(nnz), reinterpret_cast<void *>(L.rowOffsetsPtr), reinterpret_cast<void *>(L.colIndPtr), reinterpret_cast<void *>(L.valuesPtr), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cuTraits<T>::valueType));
// Set attributes, note for L mat, the diagonal entries are treated as one.
cusparseFillMode_t fill_mode{CUSPARSE_FILL_MODE_LOWER};
cusparseDiagType_t diag_type{CUSPARSE_DIAG_TYPE_UNIT};
CHECK_CUDA_ERROR(cusparseSpMatSetAttribute(L.descr, CUSPARSE_SPMAT_FILL_MODE, &fill_mode, sizeof(fill_mode)));
CHECK_CUDA_ERROR(cusparseSpMatSetAttribute(L.descr, CUSPARSE_SPMAT_DIAG_TYPE, &diag_type, sizeof(diag_type)));
Then, I create U mat in the following manner:
// U mat shared the same sparsity pattern with the L and the original mat.
spMat<T> U{nullptr, csrMat.rowOffsetsPtr, csrMat.colIndPtr, L.valuesPtr};
// Create the U mat.
CHECK_CUDA_ERROR(cusparseCreateCsr(&U.descr, static_cast<int64_t>(size), static_cast<int64_t>(size), static_cast<int64_t>(nnz), reinterpret_cast<void *>(U.rowOffsetsPtr), reinterpret_cast<void *>(U.colIndPtr), reinterpret_cast<void *>(U.valuesPtr), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cuTraits<T>::valueType));
// Set attributes.
fill_mode = CUSPARSE_FILL_MODE_UPPER;
diag_type = CUSPARSE_DIAG_TYPE_NON_UNIT;
CHECK_CUDA_ERROR(cusparseSpMatSetAttribute(U.descr, CUSPARSE_SPMAT_FILL_MODE, &fill_mode, sizeof(fill_mode)));
CHECK_CUDA_ERROR(cusparseSpMatSetAttribute(U.descr, CUSPARSE_SPMAT_DIAG_TYPE, &diag_type, sizeof(diag_type)));
Prepare the triangular matrix solvers for the L and U mat:
// Create the description.
cusparseSpSVDescr_t spsvDescrL{nullptr}, spsvDescrU{nullptr};
CHECK_CUDA_ERROR(cusparseSpSV_createDescr(&spsvDescrL));
CHECK_CUDA_ERROR(cusparseSpSV_createDescr(&spsvDescrU));
// Malloc the buffer.
void *bufferSV{nullptr};
size_t bufferSizeL{0}, bufferSizeU{0};
alpha = 1;
CHECK_CUDA_ERROR(cusparseSpSV_bufferSize(sprHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, L.descr, r.descr, aux.descr, cuTraits<T>::valueType, CUSPARSE_SPSV_ALG_DEFAULT, spsvDescrL, &bufferSizeL));
CHECK_CUDA_ERROR(cusparseSpSV_bufferSize(sprHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, U.descr, aux.descr, z.descr, cuTraits<T>::valueType, CUSPARSE_SPSV_ALG_DEFAULT, spsvDescrU, &bufferSizeU));
CHECK_CUDA_ERROR(cudaMalloc(reinterpret_cast<void **>(&bufferSV), std::max(bufferSizeL, bufferSizeU)));
// Do the analysis routine.
CHECK_CUDA_ERROR(cusparseSpSV_analysis(sprHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, L.descr, r.descr, aux.descr, cuTraits<T>::valueType, CUSPARSE_SPSV_ALG_DEFAULT, spsvDescrL, bufferSV));
CHECK_CUDA_ERROR(cusparseSpSV_analysis(sprHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, U.descr, aux.descr, z.descr, cuTraits<T>::valueType, CUSPARSE_SPSV_ALG_DEFAULT, spsvDescrU, bufferSV));
The following codes show the solving process:
// Reset the memory for the solution vector.
CHECK_CUDA_ERROR(cudaMemset(reinterpret_cast<void *>(aux.ptr), 0, size * sizeof(T)));
// Solve.
CHECK_CUDA_ERROR(cusparseSpSV_solve(sprHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, L.descr, r.descr, aux.descr, cuTraits<T>::valueType, CUSPARSE_SPSV_ALG_DEFAULT, spsvDescrL));
// Reset.
CHECK_CUDA_ERROR(cudaMemset(reinterpret_cast<void *>(z.ptr), 0, size * sizeof(T)));
// Solve.
CHECK_CUDA_ERROR(cusparseSpSV_solve(sprHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, U.descr, aux.descr, z.descr, cuTraits<T>::valueType, CUSPARSE_SPSV_ALG_DEFAULT, spsvDescrU));