Got error “Address 0x4068 is out of bounds”, when matrix is large when using cusparseSpMV

The code is mostly from the [CUDALibrarySamples] [spmv_coo_example.c].
I changed the code to use data from “suitesparse matrix collection”, and data type from float to double.

$ nvidia-smi -L
GPU 0: Quadro T1000 (UUID: GPU-5ca07869-c0e5-2ead-481e-f1a16cdf92f7)
$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Mar_28_02:18:24_PDT_2024
Cuda compilation tools, release 12.4, V12.4.131
Build cuda_12.4.r12.4/compiler.34097967_0

It is ok when i use 494_bus/494_bus.mtx(NNZ: 1080, M: 494, N: 494) and 662_bus/662_bus.mtx(NNZ: 1568, M: 662, N: 662), but when i changed to 1138_bus/1138_bus.mtx(NNZ: 2596, M: 1138, N: 1138), I got the following error:
compute-sanitizer ./spmv -f …/data/1138_bus/1138_bus.mtx
========= COMPUTE-SANITIZER
matrix: NNZ: 2596, M: 1138, N: 1138
buf size: 0
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (488,0,0) in block (0,0,0)
========= Address 0x4068 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (494,0,0) in block (0,0,0)
========= Address 0x4040 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (0,0,0) in block (1,0,0)
========= Address 0xffffff98 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (1,0,0) in block (1,0,0)
========= Address 0xffffffb0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (2,0,0) in block (1,0,0)
========= Address 0xffffffc0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (3,0,0) in block (1,0,0)
========= Address 0xffffffe0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (4,0,0) in block (1,0,0)
========= Address 0xffffffd8 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (5,0,0) in block (1,0,0)
========= Address 0xffffffe0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

========= Program hit cudaErrorLaunchFailure (error 719) due to “unspecified launch failure” on CUDA API call to cudaMemcpy.
========= Saved host backtrace up to driver entry point at error
========= Host Frame: [0x4808d5]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame:cudaMemcpy [0x71669]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:110 [0xbe19]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv

CUDA API failed at line 110 with error: unspecified launch failure (719)
========= Target application returned an error
========= ERROR SUMMARY: 9 errors

code:
int spmv_csr(const struct matrix_coo *mc, double hX, double hY)
{
// Host problem definition
int A_num_rows = mc->m;
int A_num_cols = mc->n;
int A_nnz = mc->nnz;
int hA_rows = mc->row_indices;
int hA_columns = mc->col_indices;
double hA_values = mc->values;
//double hX[] = { 1.0f, 2.0f, 3.0f, 4.0f };
//double hY[] = { 0.0f, 0.0f, 0.0f, 0.0f };
//double hY_result[] = { 19.0f, 8.0f, 51.0f, 52.0f };
double alpha = 1.0;
double beta = 0.0;
//--------------------------------------------------------------------------
// Device memory management
int dA_rows, dA_columns;
double dA_values, dX, dY;
CHECK_CUDA( cudaMalloc((void
) &dA_rows, A_nnz * sizeof(int)) )
CHECK_CUDA( cudaMalloc((void
) &dA_columns, A_nnz * sizeof(int)) )
CHECK_CUDA( cudaMalloc((void
) &dA_values, A_nnz * sizeof(double)) )
CHECK_CUDA( cudaMalloc((void
) &dX, A_num_cols * sizeof(double)) )
CHECK_CUDA( cudaMalloc((void
) &dY, A_num_rows * sizeof(double)) )

CHECK_CUDA( cudaMemcpy(dA_rows, hA_rows, A_nnz * sizeof(int),
                       cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dA_columns, hA_columns, A_nnz * sizeof(int),
                       cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dA_values, hA_values, A_nnz * sizeof(double),
                       cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dX, hX, A_num_cols * sizeof(double),
                       cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dY, hY, A_num_rows * sizeof(double),
                       cudaMemcpyHostToDevice) )
//--------------------------------------------------------------------------
// CUSPARSE APIs
cusparseHandle_t     handle = NULL;
cusparseSpMatDescr_t matA;
cusparseDnVecDescr_t vecX, vecY;
void*                dBuffer    = NULL;
size_t               bufferSize = 0;
CHECK_CUSPARSE( cusparseCreate(&handle) )
// Create sparse matrix A in CSR format
CHECK_CUSPARSE( cusparseCreateCoo(&matA, A_num_rows, A_num_cols, A_nnz,
                                  dA_rows, dA_columns, dA_values,
                                  CUSPARSE_INDEX_32I,
                                  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F) )
// Create dense vector X
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, A_num_cols, dX, CUDA_R_64F) )
// Create dense vector y
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, A_num_rows, dY, CUDA_R_64F) )
// allocate an external buffer if needed
CHECK_CUSPARSE( cusparseSpMV_bufferSize(
                             handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                             &alpha, matA, vecX, &beta, vecY, CUDA_R_64F,
                             CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize) )
printf("buf size: %ld\n", bufferSize);
CHECK_CUDA( cudaMalloc(&dBuffer, bufferSize) )

// execute SpMV
//*
CHECK_CUSPARSE( cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                             &alpha, matA, vecX, &beta, vecY, CUDA_R_64F,
                             CUSPARSE_SPMV_ALG_DEFAULT, dBuffer) )
//*/
// destroy matrix/vector descriptors
CHECK_CUSPARSE( cusparseDestroySpMat(matA) )
CHECK_CUSPARSE( cusparseDestroyDnVec(vecX) )
CHECK_CUSPARSE( cusparseDestroyDnVec(vecY) )
CHECK_CUSPARSE( cusparseDestroy(handle) )

//--------------------------------------------------------------------------
// device result check
CHECK_CUDA( cudaMemcpy(hY, dY, A_num_rows * sizeof(double),
                       cudaMemcpyDeviceToHost) )
                       /*
int correct = 1;
for (int i = 0; i < A_num_rows; i++) {
    if (hY[i] != hY_result[i]) { // direct doubleing point comparison is not
        correct = 0;             // reliable
        break;
    }
}
if (correct)
    printf("spmv_coo_example test PASSED\n");
else
    printf("spmv_coo_example test FAILED: wrong result\n");
*/
//--------------------------------------------------------------------------
// device memory deallocation
CHECK_CUDA( cudaFree(dBuffer) )
CHECK_CUDA( cudaFree(dA_rows) )
CHECK_CUDA( cudaFree(dA_columns) )
CHECK_CUDA( cudaFree(dA_values) )
CHECK_CUDA( cudaFree(dX) )
CHECK_CUDA( cudaFree(dY) )
return EXIT_SUCCESS;

}

code has been uploaded to github. https://github.com/yolaiyoqu2012/spmv
you can “make” and run to reproduce the crash.

Any help will be greatly appreciated.

Hi @yolaiyoqu2008,
The error you get is because the COO row indices are not sorted. Based on cuSPARSE, the COO format is assumed to be sorted by row. Both sorted and unsorted column indices within a given row are supported.

Notice that the col indices are sorted. I suggest switching the row and col indices, if no specific problem is intended in this sample code or the matrix is symmetric.

Thanks

Great! I’ve tried your tips and it works. Thanks a lot.

You are so welcome