Hi all, I’ve implemented cusolverSpScsreigvsi
to compute the largest eigenvalue of a sparse matrix formatted in CSR. For matrices of size 12080x12080, it works. But, it throws a segmentation fault for 33000x33000. Since I cannot allocate the buffer, so I suspect it might be the issue. Can anybody rescue me?
Here is the code:
// C++ DEPENDENCIES
#include <iostream>
#include <fstream>
#include <sstream>
// CUDA TOOLKIT DEPENDENCIES
#include <cuda_runtime_api.h>
#include <cusparse.h>
#include <cusolverSp.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/random.h>
// To check if CUDA API calls are successful
#define CHECK_CUDA(func) \
{ \
cudaError_t status = (func); \
if (status != cudaSuccess) { \
printf("CUDA API failed at line %d with error: %s (%d)\n", \
__LINE__, cudaGetErrorString(status), status); \
exit(EXIT_FAILURE); \
} \
}
// To check if cuSPARSE API calls are successful
#define CHECK_CUSPARSE(func) \
{ \
cusparseStatus_t status = (func); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \
__LINE__, cusparseGetErrorString(status), status); \
exit(EXIT_FAILURE); \
} \
}
// To check if cuSOLVER API calls are successful
#define CHECK_CUSOLVER(func) \
{ \
cusolverStatus_t status = (func); \
if (status != CUSOLVER_STATUS_SUCCESS) { \
printf("CUSOLVER API failed at line %d with error: %s (%d)\n", \
__LINE__, status, status); \
exit(EXIT_FAILURE); \
} \
}
// Generate random number in the range [0, 1)
struct genRandomNumber {
__device__
float operator () (int idx) {
thrust::default_random_engine randGen;
thrust::uniform_real_distribution<float> uniDist;
randGen.discard(idx);
return uniDist(randGen);
}
};
struct CSThrust {
int m; // number of rows
int n; // number of columns
int nnz; // number of non-zero elements
std::string format; // Either "CSC" or "CSR"
thrust::device_vector<int> pointers; // can be column pointer (also called column offsets) or row pointer (also called row offsets)
thrust::device_vector<int> indices; // can be row indices or column indices
thrust::device_vector<float> values; // can be cscValues or csrValues
};
// Read a matrix market file and return a CSR matrix
// See https://math.nist.gov/MatrixMarket/formats.html for more details
void readMTXFile2CSR(const std::string& filepath, CSThrust& csr) {
std::ifstream mtxfile(filepath.c_str(), std::ios::in);
if (!mtxfile.is_open()) {
std::cout << "Error opening file: " << filepath << std::endl;
exit(EXIT_FAILURE);
}
// Reading header
std::string header;
std::getline(mtxfile, header);
assert(mtxfile.good());
std::stringstream formatheader(header);
std::string substr[5];
formatheader >> substr[0] >> substr[1] >> substr[2] >> substr[3] >> substr[4];
assert(substr[0] == "%%MatrixMarket");
assert(substr[1] == "matrix");
assert(substr[2] == "coordinate");
if (substr[3].compare("complex") == 0) {
std::cout << "Only real and integer valued matrices are supported" << std::endl;
exit(EXIT_FAILURE);
}
// Get symmetry (matrix needs to be square matrix)
bool is_symmetric = false;
if (substr[4] == "symmetric") {
is_symmetric = true;
} else if (substr[4] == "general") {
is_symmetric = false;
} else {
std::cout << "Only symmetric and general matrices are supported" << std::endl;
exit(EXIT_FAILURE);
}
// Ignore comments afterwards
while (mtxfile.peek() == '%') {
mtxfile.ignore(2048, '\n');
}
// Read nrows, ncols, nnz
int nrows, ncols, nnz;
mtxfile >> nrows >> ncols >> nnz;
// Set and resize CSR variables
csr.format = "CSR";
csr.m = nrows;
csr.n = ncols;
// Symmetric matrix is a square matrix
if (is_symmetric) {
assert(nrows == ncols);
csr.nnz = 2* nnz - nrows;
} else {
csr.nnz = nnz;
}
// Read the matrix from mtx file, if it's symmetric, then recreate upper triangular part.
// Store in row-major order
std::vector<float> values(nrows * ncols, 0.0);
int row, col;
float val;
int diag_count = 0;
for (int coeff = 0; coeff < nnz; coeff++) {
mtxfile >> row >> col >> val;
if (row == col) {
diag_count++;
values[(row - 1) * ncols + (col - 1)] = val;
if (is_symmetric) {
values[(col - 1) * ncols + (row - 1)] = val;
}
} else {
values[(row - 1) * ncols + (col - 1)] = val;
if (is_symmetric) {
values[(col - 1) * ncols + (row - 1)] = val;
}
}
}
assert(diag_count == nrows);
mtxfile.close();
// Convert to CSR
csr.pointers.resize(csr.m + 1);
csr.indices.resize(csr.nnz);
csr.values.resize(csr.nnz);
// Setting row offset start with 0 index
csr.pointers[0] = 0;
// Extract out the CSR variables
int nnz_idx = 0;
for (int row = 0; row < csr.m; row++) {
for (int col = 0; col < csr.n; col++) {
if (values[row * csr.n + col] != 0.0) {
csr.indices[nnz_idx] = col;
csr.values[nnz_idx] = values[row * csr.n + col];
nnz_idx++;
}
}
csr.pointers[row + 1] = nnz_idx;
}
}
float computeMaxEigenvalue(const CSThrust& M) {
assert(M.format == "CSR"); // We only use CSR format
assert(M.m == M.n);
cusolverSpHandle_t solver_handle = NULL;
CHECK_CUSOLVER( cusolverSpCreate(&solver_handle) )
cusparseMatDescr_t M_descr;
CHECK_CUSPARSE( cusparseCreateMatDescr(&M_descr) )
//! @note CuSolverSp only supports the matrix is general type. i.e. CUSPARSE_MATRIX_TYPE_GENERAL
//! So, if the Matrix is stored only in the upper or lower triangular part, then we need to
//! extend into its missing lower or upper part, otherwise the result would be wrong.
//! Fortunately, we don't need to do this, because cusparseSpGEMM() will automatically
//! store its result in the general format.
//! See: https://docs.nvidia.com/cuda/cusolver/index.html#cusolversp-t-csreigvsi
CHECK_CUSPARSE( cusparseSetMatType(M_descr, CUSPARSE_MATRIX_TYPE_GENERAL) )
CHECK_CUSPARSE( cusparseSetMatIndexBase(M_descr, CUSPARSE_INDEX_BASE_ZERO) )
// Define required intial values and threshold
float tol = 1e-3; // tolerance to determine the convergence
int max_iter = 1000; // maximum number of iterations
//! @note To set the simple version of upper bound of the largest eigenvalue of the (graph-)Laplacian.
//! We use Gershgorin circle theorem such that
//! |\lambda_max_computed| <= |lambda_max_guess| = d_i^max + R_i where,
//! d_i^max is largest degree of the vertex at ith vertex, and
//! R_i is the radius of the Gershgorin circle at ith vertex := \sum_{j \ne i} |M_ij|
//! For simplicity, we replace R_i by 2*(nnz - m)/m. lambda_max_guess doesn't needs to be perfect because it's
//! just a initial guess, but it should be as close as possible to the actual max. eigenvalue.
//! Let me show you how I got this formula:
//! 1. Probability of nnz elements on the (i, j) excluding diagonal (i.e. i != j) is (nnz - m)/(m^2 - m) = (nnz - m)/(m(m - 1)).
//! 2. Total possible nnz elements on the ith row excluding diagonal ii is (m - 1) * (nnz - m)/(m(m -1)) = (nnz - m)/m.
//! 3. Total possible nnz elements on the ith column excluding diagonal ii is (m - 1) * (nnz - m)/(m(m -1)) = (nnz - m)/m.
//! 4. Total possible nnz elements either on the row or column excluding its diagonal element is 2*(nnz - m)/m.
float lambda_max_guess = *thrust::max_element(M.values.begin(), M.values.end()) + ((M.nnz - M.m)/M.m)*2;
std::cout << "Initial guess for the largest eigenvalue: " << lambda_max_guess << std::endl;
thrust::device_vector<float> x0(M.m); // Get the random initial vector
thrust::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(M.m),
x0.begin(),
genRandomNumber());
thrust::device_vector<float> x(M.m, 0); // the eigenvector
// Setting the variable for the largest eigenvalue to store
//! @note cuSolver has incorrect documentation for cusolverSpScsreigvsi. mu (i.e. lambda_max_computed)
//! should be a device pointer, not a host pointer.
float *d_lambda_max_computed;
CHECK_CUDA(cudaMalloc(&d_lambda_max_computed, sizeof(float)));
CHECK_CUDA(cudaMemset(d_lambda_max_computed, 0, sizeof(float)));
// Get eigenvalue and eigenvector near to the upper bound of the largest eigenvalue
CHECK_CUSOLVER( cusolverSpScsreigvsi(solver_handle,
M.m, M.nnz,
M_descr,
thrust::raw_pointer_cast(M.values.data()),
thrust::raw_pointer_cast(M.pointers.data()),
thrust::raw_pointer_cast(M.indices.data()),
lambda_max_guess,
thrust::raw_pointer_cast(x0.data()),
max_iter, tol,
d_lambda_max_computed,
thrust::raw_pointer_cast(x.data())) ) // This line throws segmentation fault for larger matrix sizes. RESCUE ME?
// Copy to host
float lambda_max_computed;
CHECK_CUDA(cudaMemcpy(&lambda_max_computed, d_lambda_max_computed, sizeof(float), cudaMemcpyDeviceToHost));
// Destroy the descriptor, handle
CHECK_CUSPARSE( cusparseDestroyMatDescr(M_descr) )
// CHECK_CUSPARSE( cusparseDestroy(sparse_handle) )
CHECK_CUSOLVER( cusolverSpDestroy(solver_handle) )
return lambda_max_computed;
}
int main(int argc, char** argv) {
std::string mtx_filepath;
if (argc > 1) {
mtx_filepath = argv[1];
} else {
std::cout << "Please provide a path to a matrix market file path" << std::endl;
return EXIT_FAILURE;
}
CSThrust M; // Create a sparse matrix instance
readMTXFile2CSR(mtx_filepath, M); // Read the matrix market file and convert it to csr
float lambda_max = computeMaxEigenvalue(M); // Compute the largest eigenvalue
std::cout << "Max eigenvalue: " << lambda_max << std::endl;
return EXIT_SUCCESS;
}
I have also attached the matrix in MTX format for testing purpose.
dL22.mtx (9.3 MB)
Thanks!