Hello
Well I am using this function in a project so it doesn’t make sense to put it here but here is a small test code which I wrote that gives the same problem
#include <cuda_runtime.h>
#include
#include <cusparse.h>
#include <assert.h>
using namespace std;
/*
The A matrix here is
1 0 2 0 3
0 4 0 5 0
0 0 6 0 0
0 7 0 8 0
9 0 10 0 11
the vector x is
1
1
1
1
1
*/
global void d_set_value(float* rowVector_d , float value, int num_elements){
int i = threadIdx.x + blockIdx.x*blockDim.x;
if (i<num_elements)
rowVector_d[i] = value;
}
int main(int argc,char **argv)
{
cusparseStatus_t cusparseStat = CUSPARSE_STATUS_SUCCESS;
// alloc and init input arrays on host (CPU)
int n = 11;
float *csrval = new float[n];
for(int i=0; i<n; i++) csrval[i] = i+1;
int* csrcol = new int[n];
csrcol[0] = 0;
csrcol[1] = 2;
csrcol[2] = 4;
csrcol[3] = 1;
csrcol[4] = 3 ;
csrcol[5] = 2;
csrcol[6] = 1;
csrcol[7] = 3;
csrcol[8] = 0;
csrcol[9] = 2;
csrcol[10] = 4;
int* csrrow = new int[6];
csrrow[0] = 0;
csrrow[1] = 3;
csrrow[2] = 5;
csrrow[3] = 6;
csrrow[4] = 8;
csrrow[5] = 11;
float* csrval_d;
int *csrcol_d, *csrrow_d;
cudaMalloc((void**)&csrval_d , n*sizeof(float));
cudaMalloc((void**) &csrrow_d , 6*sizeof(int));
cudaMalloc((void**) &csrcol_d , n*sizeof(int));
cudaMemcpy(csrrow_d , csrrow, 6*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(csrcol_d , csrcol, n*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(csrval_d , csrval, n*sizeof(float), cudaMemcpyHostToDevice);
float* rowvector_d;
cudaMalloc((void**)&rowvector_d , 5*sizeof(float));
dim3 block = dim3(1024,1,1);
int grid_x = (n + block.x - 1)/block.x;
int grid_y = 1;
int grid_z = 1;
dim3 grid = dim3(grid_x, grid_y, grid_z);
d_set_value <<<grid, block>>> (rowvector_d, 1, 5);
cusparseMatDescr_t descrA;
cusparseCreateMatDescr(&descrA);
cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO);
cusparseHandle_t cusparseHandle;
cusparseCreate(&cusparseHandle);
float alpha = 1.0;
float beta = 0.0;
float *norm_d;
cudaMalloc((void**)&norm_d , 5*sizeof(float));
cudaMemset(norm_d , 0, 5*sizeof(float));
cusparseStat = cusparseScsrmv_mp(cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE,
5, 5, 11, &alpha, descrA, csrval_d ,
csrrow_d, csrcol_d, rowvector_d, &beta, norm_d);
if (CUSPARSE_STATUS_SUCCESS != cusparseStat)
std::cout << cusparseStat << std::endl;
else
{
cudaMemcpy(csrval , norm_d, 5*sizeof(float) , cudaMemcpyDeviceToHost);
for(int i=0; i<5; i++) cout << i << ": " << csrval[i] << endl;
}
}
I hope this would help.
Thanks