I’m trying to convert a COO matrix with ~200,000 elements to CSR format. The code I use is:
#include<iostream>
#include<cstdlib>
#include<cmath>
#include<iomanip>
#include"hamiltonian.h" //my header for code which makes the matrix
#include"cuda.h"
#include"cuda_runtime.h"
#include"cublas_v2.h"
#include"cusparse_v2.h"
#include"cuComplex.h"
using namespace std;
__host__ void lanczos(const int how_many, const int* num_Elem, d_hamiltonian*& Hamiltonian, int max_Iter, const int num_Eig, const double conv_req)
{
int* dim = (int*)malloc(how_many*sizeof(int));
for(int i = 0; i < how_many; i++)
{
dim[i] = Hamiltonian[i].sectordim; //pull dimension of matrix out of custom struct
}
cudaStream_t stream[how_many];
cublasStatus_t cublas_status[how_many];
cublasHandle_t linalghandle;
cublas_status[0] = cublasCreate(&linalghandle);
if (cublas_status[0] != CUBLAS_STATUS_SUCCESS)
{
std::cout<<"Initializing CUBLAS failed! Error: "<<cublas_status[0]<<std::endl;
}
cusparseHandle_t sparsehandle;
cusparseStatus_t cusparse_status[how_many];
cusparse_status[0] = cusparseCreate(&sparsehandle);
if (cusparse_status[0] != CUSPARSE_STATUS_SUCCESS)
{
std::cout<<"Failed to initialize CUSPARSE! Error: "<<cusparse_status[0]<<std::endl;
}
cusparseMatDescr_t H_descr[how_many];
for(int i = 0; i<how_many; i++)
{
cusparse_status[i] = cusparseCreateMatDescr(&H_descr[i]);
if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)
{
std::cout<<"Error creating matrix description: "<<cusparse_status[i]<<std::endl;
}
cusparse_status[i] = cusparseSetMatType(H_descr[i], CUSPARSE_MATRIX_TYPE_GENERAL);
if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)
{
std::cout<<"Error setting matrix type: "<<cusparse_status[i]<<std::endl;
}
cusparse_status[i] = cusparseSetMatIndexBase(H_descr[i], CUSPARSE_INDEX_BASE_ZERO);
if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)
{
std::cout<<"Error setting matrix index base: "<<cusparse_status[i]<<std::endl;
}
}
cudaError_t status[how_many];
cout<<"Done creating descriptions"<<endl;
int** d_H_rowptrs;
d_H_rowptrs = (int**)malloc(how_many*sizeof(int*));
for(int i = 0; i < how_many; i++)
{
status[i] = cudaMalloc(&d_H_rowptrs[i], (dim[i] + 1)*sizeof(int));
if (status[i] != CUDA_SUCCESS)
{
std::cout<<"Error allocating d_H_rowptrs: "<<cudaGetErrorString(status[i])<<std::endl;
}
cusparse_status[i] = cusparseXcoo2csr(sparsehandle, Hamiltonian[i].rows, num_Elem[i], dim[i], d_H_rowptrs[i], CUSPARSE_INDEX_BASE_ZERO);
if (cusparse_status[i] != CUSPARSE_STATUS_SUCCESS)
{
std::cout<<"Error converting to CSR: "<<cusparse_status[i]<<std::endl;
}
}
}
When I try to run this, I get a CUSPARSE_STATUS_EXECUTION_FAILED error. I compiled it (this function is part of a larger project, but I am sure the matrix I am passing it is correct) using nvcc -w -g -G -gencode arch=compute_20,code=sm_21 -lcublas -lcusparse lanczos.cu
When I try to examine the problem in cuda-gdb, I get
warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.8895a0.o.FuXAwf
warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.88bb50.o.t0xXIx
warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.a70a60.o.ugEDcJ
warning: no loadable sections found in added symbol-file /tmp/cuda-dbg/27861/elf.76ffb0.b3e0d0.o.imoyDd
I’m running Ubuntu 10.10 x86_64 with CUDA 4.1 and 2.6.35-31-generic as my kernel. I’m trying to test this on a 560Ti.
Any idea what’s wrong?