Problem with cusparseXcoo2csr and cuda-gdb

kslimes · January 11, 2012, 8:16pm

I’m trying to convert a COO matrix with ~200,000 elements to CSR format. The code I use is:

#include<iostream>

#include<cstdlib>

#include<cmath>

#include<iomanip>

#include"hamiltonian.h" //my header for code which makes the matrix

#include"cuda.h"

#include"cuda_runtime.h"

#include"cublas_v2.h"

#include"cusparse_v2.h"

#include"cuComplex.h"

using namespace std;

__host__ void lanczos(const int how_many, const int* num_Elem, d_hamiltonian*& Hamiltonian, int max_Iter, const int num_Eig, const double conv_req)

{

int* dim = (int*)malloc(how_many*sizeof(int));

  for(int i = 0; i < how_many; i++)

  {

    dim[i] = Hamiltonian[i].sectordim; //pull dimension of matrix out of custom struct

  }

cudaStream_t stream[how_many];

  cublasStatus_t cublas_status[how_many];

cublasHandle_t linalghandle;

  cublas_status[0] = cublasCreate(&linalghandle);

if (cublas_status[0] != CUBLAS_STATUS_SUCCESS)

  {

    std::cout<<"Initializing CUBLAS failed! Error: "<<cublas_status[0]<<std::endl;

  }

cusparseHandle_t sparsehandle;

  cusparseStatus_t cusparse_status[how_many];

  cusparse_status[0] = cusparseCreate(&sparsehandle);

if (cusparse_status[0] != CUSPARSE_STATUS_SUCCESS)

  {

    std::cout<<"Failed to initialize CUSPARSE! Error: "<<cusparse_status[0]<<std::endl;

  }

cusparseMatDescr_t H_descr[how_many];

  for(int i = 0; i<how_many; i++)

  {

    cusparse_status[i] = cusparseCreateMatDescr(&H_descr[i]);

if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error creating matrix description: "<<cusparse_status[i]<<std::endl;

    }

    cusparse_status[i] = cusparseSetMatType(H_descr[i], CUSPARSE_MATRIX_TYPE_GENERAL);

    if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error setting matrix type: "<<cusparse_status[i]<<std::endl;

    }

    cusparse_status[i] = cusparseSetMatIndexBase(H_descr[i], CUSPARSE_INDEX_BASE_ZERO);

    if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error setting matrix index base: "<<cusparse_status[i]<<std::endl;

    }

}

  cudaError_t status[how_many];

  cout<<"Done creating descriptions"<<endl;

  int** d_H_rowptrs;

  d_H_rowptrs = (int**)malloc(how_many*sizeof(int*));

for(int i = 0; i < how_many; i++)

  {

    status[i] = cudaMalloc(&d_H_rowptrs[i], (dim[i] + 1)*sizeof(int));

    if (status[i] != CUDA_SUCCESS)

    {

      std::cout<<"Error allocating d_H_rowptrs: "<<cudaGetErrorString(status[i])<<std::endl;

    }

    cusparse_status[i] = cusparseXcoo2csr(sparsehandle, Hamiltonian[i].rows, num_Elem[i], dim[i], d_H_rowptrs[i], CUSPARSE_INDEX_BASE_ZERO);

if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error converting to CSR: "<<cusparse_status[i]<<std::endl;

    }

  }

}

When I try to run this, I get a CUSPARSE_STATUS_EXECUTION_FAILED error. I compiled it (this function is part of a larger project, but I am sure the matrix I am passing it is correct) using nvcc -w -g -G -gencode arch=compute_20,code=sm_21 -lcublas -lcusparse lanczos.cu

When I try to examine the problem in cuda-gdb, I get

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.8895a0.o.FuXAwf

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.88bb50.o.t0xXIx

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.a70a60.o.ugEDcJ

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.b3e0d0.o.imoyDd

I’m running Ubuntu 10.10 x86_64 with CUDA 4.1 and 2.6.35-31-generic as my kernel. I’m trying to test this on a 560Ti.

Any idea what’s wrong?

kslimes · January 11, 2012, 8:16pm

I’m trying to convert a COO matrix with ~200,000 elements to CSR format. The code I use is:

#include<iostream>

#include<cstdlib>

#include<cmath>

#include<iomanip>

#include"hamiltonian.h" //my header for code which makes the matrix

#include"cuda.h"

#include"cuda_runtime.h"

#include"cublas_v2.h"

#include"cusparse_v2.h"

#include"cuComplex.h"

using namespace std;

__host__ void lanczos(const int how_many, const int* num_Elem, d_hamiltonian*& Hamiltonian, int max_Iter, const int num_Eig, const double conv_req)

{

int* dim = (int*)malloc(how_many*sizeof(int));

  for(int i = 0; i < how_many; i++)

  {

    dim[i] = Hamiltonian[i].sectordim; //pull dimension of matrix out of custom struct

  }

cudaStream_t stream[how_many];

  cublasStatus_t cublas_status[how_many];

cublasHandle_t linalghandle;

  cublas_status[0] = cublasCreate(&linalghandle);

if (cublas_status[0] != CUBLAS_STATUS_SUCCESS)

  {

    std::cout<<"Initializing CUBLAS failed! Error: "<<cublas_status[0]<<std::endl;

  }

cusparseHandle_t sparsehandle;

  cusparseStatus_t cusparse_status[how_many];

  cusparse_status[0] = cusparseCreate(&sparsehandle);

if (cusparse_status[0] != CUSPARSE_STATUS_SUCCESS)

  {

    std::cout<<"Failed to initialize CUSPARSE! Error: "<<cusparse_status[0]<<std::endl;

  }

cusparseMatDescr_t H_descr[how_many];

  for(int i = 0; i<how_many; i++)

  {

    cusparse_status[i] = cusparseCreateMatDescr(&H_descr[i]);

if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error creating matrix description: "<<cusparse_status[i]<<std::endl;

    }

    cusparse_status[i] = cusparseSetMatType(H_descr[i], CUSPARSE_MATRIX_TYPE_GENERAL);

    if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error setting matrix type: "<<cusparse_status[i]<<std::endl;

    }

    cusparse_status[i] = cusparseSetMatIndexBase(H_descr[i], CUSPARSE_INDEX_BASE_ZERO);

    if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error setting matrix index base: "<<cusparse_status[i]<<std::endl;

    }

}

  cudaError_t status[how_many];

  cout<<"Done creating descriptions"<<endl;

  int** d_H_rowptrs;

  d_H_rowptrs = (int**)malloc(how_many*sizeof(int*));

for(int i = 0; i < how_many; i++)

  {

    status[i] = cudaMalloc(&d_H_rowptrs[i], (dim[i] + 1)*sizeof(int));

    if (status[i] != CUDA_SUCCESS)

    {

      std::cout<<"Error allocating d_H_rowptrs: "<<cudaGetErrorString(status[i])<<std::endl;

    }

    cusparse_status[i] = cusparseXcoo2csr(sparsehandle, Hamiltonian[i].rows, num_Elem[i], dim[i], d_H_rowptrs[i], CUSPARSE_INDEX_BASE_ZERO);

if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)

    {

      std::cout<<"Error converting to CSR: "<<cusparse_status[i]<<std::endl;

    }

  }

}

When I try to run this, I get a CUSPARSE_STATUS_EXECUTION_FAILED error. I compiled it (this function is part of a larger project, but I am sure the matrix I am passing it is correct) using nvcc -w -g -G -gencode arch=compute_20,code=sm_21 -lcublas -lcusparse lanczos.cu

When I try to examine the problem in cuda-gdb, I get

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.8895a0.o.FuXAwf

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.88bb50.o.t0xXIx

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.a70a60.o.ugEDcJ

warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.b3e0d0.o.imoyDd

I’m running Ubuntu 10.10 x86_64 with CUDA 4.1 and 2.6.35-31-generic as my kernel. I’m trying to test this on a 560Ti.

Any idea what’s wrong?

njuffa · January 12, 2012, 12:54am

The library team recommends to double check that the input matrix satisfies the requirements of the CUSPARSE library:

â€œSparse matrices are assumed to be stored in row-major COO format, in other words, the index arrays are first sorted by row indices and then within the same row by column indices. Also it is assumed that each pair of row and column indices appears only once.â€ (see the documentation section on coordinate format for details).

If the matrix satisfies the requirements but convesion still fails, I would suggest filing a bug. In that case, the library team would prefer that the matrix be attached as a file in matrix market format (Matrix Market: File Formats).

njuffa · January 12, 2012, 12:54am

The library team recommends to double check that the input matrix satisfies the requirements of the CUSPARSE library:

â€œSparse matrices are assumed to be stored in row-major COO format, in other words, the index arrays are first sorted by row indices and then within the same row by column indices. Also it is assumed that each pair of row and column indices appears only once.â€ (see the documentation section on coordinate format for details).

If the matrix satisfies the requirements but convesion still fails, I would suggest filing a bug. In that case, the library team would prefer that the matrix be attached as a file in matrix market format (Matrix Market: File Formats).

kslimes · January 12, 2012, 3:09pm

I’ve looked at the matrix again - it is sorted by row, but within a row the elements are not column-ordered. Could this be enough to cause the function to fail? Here’s what the matrix looks like:

(0,0) - 4

(0,4) - 0.5

(0,11) - 0.5

(0,46) - 0.5

(0,165) - 0.5

(0,502) - 0.5

(0,1293) - 0.5

(0,3008) - 0.5

(0,6439) - 0.5

(1,1) - 2

(1,5) - 0.5

(1,18) - 0.5

(1,2) - 0.5

(1,53) - 0.5

(1,4) - 0.5

(1,9) - 0.5

(1,495) - 0.5

(1,165) - 0.5

(1,509) - 0.5

(1,1300) - 0.5

(1,3015) - 0.5

(1,6446) - 0.5

Where I’ve written the matrix as (row, col) - value. I’ve gone through and ensured that (row,col) pairs are not repeated. Would writing the matrix like this prevent Xcoo2csr from working?

Edit: the problem persists when I change my earlier code so that the matrix looks like:

(0,0) - 4

(0,4) - 0.5

(0,11) - 0.5

(0,46) - 0.5

(0,165) - 0.5

(0,502) - 0.5

(0,1293) - 0.5

(0,3008) - 0.5

(0,6439) - 0.5

(1,1) - 2

(1,2) - 0.5

(1,4) - 0.5

(1,5) - 0.5

(1,9) - 0.5

(1,18) - 0.5

(1,53) - 0.5

(1,165) - 0.5

(1,495) - 0.5

(1,509) - 0.5

(1,1300) - 0.5

(1,3015) - 0.5

(1,6446) - 0.5

Now I get a segfault instead of EXECUTION_FAILED. As inputs to the function, I’m passing:

Num_elem: 232518

Dim: 12870

row indices: 0x2027a0000

row pointers: 0x200800000

The number of elements and matrix dimension are both correct.

kslimes · January 12, 2012, 3:09pm

I’ve looked at the matrix again - it is sorted by row, but within a row the elements are not column-ordered. Could this be enough to cause the function to fail? Here’s what the matrix looks like:

(0,0) - 4

(0,4) - 0.5

(0,11) - 0.5

(0,46) - 0.5

(0,165) - 0.5

(0,502) - 0.5

(0,1293) - 0.5

(0,3008) - 0.5

(0,6439) - 0.5

(1,1) - 2

(1,5) - 0.5

(1,18) - 0.5

(1,2) - 0.5

(1,53) - 0.5

(1,4) - 0.5

(1,9) - 0.5

(1,495) - 0.5

(1,165) - 0.5

(1,509) - 0.5

(1,1300) - 0.5

(1,3015) - 0.5

(1,6446) - 0.5

Where I’ve written the matrix as (row, col) - value. I’ve gone through and ensured that (row,col) pairs are not repeated. Would writing the matrix like this prevent Xcoo2csr from working?

Edit: the problem persists when I change my earlier code so that the matrix looks like:

(0,0) - 4

(0,4) - 0.5

(0,11) - 0.5

(0,46) - 0.5

(0,165) - 0.5

(0,502) - 0.5

(0,1293) - 0.5

(0,3008) - 0.5

(0,6439) - 0.5

(1,1) - 2

(1,2) - 0.5

(1,4) - 0.5

(1,5) - 0.5

(1,9) - 0.5

(1,18) - 0.5

(1,53) - 0.5

(1,165) - 0.5

(1,495) - 0.5

(1,509) - 0.5

(1,1300) - 0.5

(1,3015) - 0.5

(1,6446) - 0.5

Now I get a segfault instead of EXECUTION_FAILED. As inputs to the function, I’m passing:

Num_elem: 232518

Dim: 12870

row indices: 0x2027a0000

row pointers: 0x200800000

The number of elements and matrix dimension are both correct.

njuffa · January 12, 2012, 4:31pm

I have never used CUSPARSE. Violating the matrix layout requirements presumably causes some sort of error status to be returned by CUSPARSE, but I don’t know what the “correct” error status is.

The fact that you are now getting a segfault suggests that there may be a problem of a different kind, namely that somewhere in your code host and device pointers are getting mixed up. Passing a host pointer instead of a device pointer to a kernel would cause it to fail (the kernel experiences the equivalent of a segfault), which is the problem you encountered initially. Likewise, de-referencing a device pointer in host code would cause a segfault, which is what you are seeing now.

njuffa · January 12, 2012, 4:31pm

I have never used CUSPARSE. Violating the matrix layout requirements presumably causes some sort of error status to be returned by CUSPARSE, but I don’t know what the “correct” error status is.

The fact that you are now getting a segfault suggests that there may be a problem of a different kind, namely that somewhere in your code host and device pointers are getting mixed up. Passing a host pointer instead of a device pointer to a kernel would cause it to fail (the kernel experiences the equivalent of a segfault), which is the problem you encountered initially. Likewise, de-referencing a device pointer in host code would cause a segfault, which is what you are seeing now.

kslimes · January 12, 2012, 5:10pm

After some more testing, I think the real problem I was having was actually occurring later on. If I comment out a make_cuDoubleComplex, the conversion works. If not, it fails. It’s strange.

Thanks a lot for your help!

kslimes · January 12, 2012, 5:10pm

After some more testing, I think the real problem I was having was actually occurring later on. If I comment out a make_cuDoubleComplex, the conversion works. If not, it fails. It’s strange.

Thanks a lot for your help!

Topic		Replies	Views
cusparse coo2csr function hangs GPU-Accelerated Libraries	0	1406	July 16, 2013
CUSPARSE conversion routines not working... cusparseSnnz and cusparseSdense2csr misbehaving... CUDA Programming and Performance	11	4202	February 28, 2011
cusparse cusparseXcoo2csr CUDA Programming and Performance	1	3089	February 27, 2012
Problem in basic dense to csr format conversion using CUSPARSE GPU-Accelerated Libraries	3	946	July 28, 2015
Problem of two large sparse matrices multiplication in cuParse CUDA Programming and Performance	6	3733	November 21, 2016
cuSPARSE (cusparseXcoo2csr) problem GPU-Accelerated Libraries	0	882	March 10, 2015
Fortran CUSPARSE Bindings CUDA Programming and Performance	1	1990	May 22, 2012
Problem using cusparseScsric0 function GPU-Accelerated Libraries	22	6965	November 29, 2012
Problems allocating sparse Matrix CUDA Programming and Performance	3	2487	March 15, 2010
analysis on dense2csr created sparse matrix problem CUDA Programming and Performance	2	1363	May 4, 2011

Problem with cusparseXcoo2csr and cuda-gdb

Related topics