I am quite new to cuda, and I am interested in using it’s sparse solver for a project.
From the documentation I understand that I need to convert my COO-formatted sparse matrices to CSR format matrices for use in the sparse solver, So I am using the supplied cusparseXcoo2csr in the cusparse library:
cusparseStatus_t
cusparseXcoo2csr(cusparseHandle_t handle,
const int* cooRowInd,
int nnz,
int m,
int* csrRowPtr,
cusparseIndexBase_t idxBase)
From testing it out, It seems to work when the cooRowInd and the csrRowPtr parameters are in the the device memory:
int* csrRowPtr =0;
int nnz = 9;
int n = 4;
int* cooRowIndex = 0;
std::vector<int> temp_coo_ind = {0,0,0,1,2,2,2,3,3};
cudaError_t cudaStat1 = cudaMalloc ((void**)&cooRowIndex , nnz * sizeof(cooRowIndex[0]));
cudaStat1 = cudaMemcpy (cooRowIndex , temp_coo_ind.data() ,
(size_t) (nnz * sizeof (cooRowIndex[0])) , cudaMemcpyHostToDevice ) ;
cudaStat1 = cudaMalloc ((void**) &csrRowPtr , (n+1) * sizeof (csrRowPtr[ 0 ])) ;
int* h_csrRowPtr = (int*)malloc((n+1)*sizeof(int));
auto status = cusparseXcoo2csr(cusparseHandle, /* used in residual evaluation */
cooRowIndex, //on device memory
nnz,
n,
csrRowPtr, //on device memory
CUSPARSE_INDEX_BASE_ZERO );
cudaMemcpy (h_csrRowPtr , csrRowPtr ,
(size_t) ((n+1) * sizeof (h_csrRowPtr[0])) , cudaMemcpyDeviceToHost );
std::vector<int> vec_csrRow(h_csrRowPtr,h_csrRowPtr+n+1); //vec_csrRow = [0,3,7,4,9] - OK!
when the cooRowInd and the csrRowPtr parameters are in the host memory, csrRowPtr is remains unchanged:
int* csrRowPtr =0;
int nnz = 9;
int n = 4;
int* cooRowIndex = 0;
std::vector<int> temp_coo_ind = {0,0,0,1,2,2,2,3,3};
cudaError_t cudaStat1 = cudaMalloc ((void**)&cooRowIndex , nnz * sizeof(cooRowIndex[0]));
cudaStat1 = cudaMemcpy (cooRowIndex , temp_coo_ind.data() ,
(size_t) (nnz * sizeof (cooRowIndex[0])) , cudaMemcpyHostToDevice ) ;
cudaStat1 = cudaMalloc ((void**) &csrRowPtr , (n+1) * sizeof (csrRowPtr[ 0 ])) ;
int* h_csrRowPtr = (int*)malloc((n+1)*sizeof(int));
auto status = cusparseXcoo2csr(cusparseHandle, /* used in residual evaluation */
temp_coo_ind.data(), //on host memory
nnz,
n,
h_csrRowPtr, //on host memory
CUSPARSE_INDEX_BASE_ZERO );
std::vector<int> vec_csrRow(h_csrRowPtr,h_csrRowPtr+n+1); // vec_csrRow != [0,3,7,4,9]
Why is this? What am I doing wrong here, and how can I get the conversion to work in host memory? thanks!