The code is mostly from the [CUDALibrarySamples] [spmv_coo_example.c].
I changed the code to use data from “suitesparse matrix collection”, and data type from float to double.
$ nvidia-smi -L
GPU 0: Quadro T1000 (UUID: GPU-5ca07869-c0e5-2ead-481e-f1a16cdf92f7)
$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Mar_28_02:18:24_PDT_2024
Cuda compilation tools, release 12.4, V12.4.131
Build cuda_12.4.r12.4/compiler.34097967_0
It is ok when i use 494_bus/494_bus.mtx(NNZ: 1080, M: 494, N: 494) and 662_bus/662_bus.mtx(NNZ: 1568, M: 662, N: 662), but when i changed to 1138_bus/1138_bus.mtx(NNZ: 2596, M: 1138, N: 1138), I got the following error:
compute-sanitizer ./spmv -f …/data/1138_bus/1138_bus.mtx
========= COMPUTE-SANITIZER
matrix: NNZ: 2596, M: 1138, N: 1138
buf size: 0
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (488,0,0) in block (0,0,0)
========= Address 0x4068 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (494,0,0) in block (0,0,0)
========= Address 0x4040 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (0,0,0) in block (1,0,0)
========= Address 0xffffff98 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (1,0,0) in block (1,0,0)
========= Address 0xffffffb0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (2,0,0) in block (1,0,0)
========= Address 0xffffffc0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (3,0,0) in block (1,0,0)
========= Address 0xffffffe0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (4,0,0) in block (1,0,0)
========= Address 0xffffffd8 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Invalid shared write of size 8 bytes
========= at void cusparse::coomv_kernel<(unsigned int)512, (unsigned int)4, (bool)0, int, double, double, double, double>(cusparse::KernelCoeffs, T4, const T4 *, const T4 *, const T5 *, const T6 *, T7 *, T4 *, T8 *)+0x1740
========= by thread (5,0,0) in block (1,0,0)
========= Address 0xffffffe0 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: [0x3344df]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame: [0x9bf16d]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0xa228ad]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x199030]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame: [0x1b1726]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:cusparseSpMV [0xfd71f]
========= in /usr/local/cuda/targets/x86_64-linux/lib/libcusparse.so.12
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:98 [0xbc4b]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Program hit cudaErrorLaunchFailure (error 719) due to “unspecified launch failure” on CUDA API call to cudaMemcpy.
========= Saved host backtrace up to driver entry point at error
========= Host Frame: [0x4808d5]
========= in /lib/x86_64-linux-gnu/libcuda.so.1
========= Host Frame:cudaMemcpy [0x71669]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:spmv_csr in /home/gaoxing/Projects/spmv/cusparse/code/spmv.c:110 [0xbe19]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:main in /home/gaoxing/Projects/spmv/cusparse/code/main.c:91 [0xb0ab]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
========= Host Frame:__libc_start_call_main in …/sysdeps/nptl/libc_start_call_main.h:58 [0x29d8f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:__libc_start_main in …/csu/libc-start.c:392 [0x29e3f]
========= in /lib/x86_64-linux-gnu/libc.so.6
========= Host Frame:_start [0xac44]
========= in /home/gaoxing/Projects/spmv/cusparse/code/./spmv
CUDA API failed at line 110 with error: unspecified launch failure (719)
========= Target application returned an error
========= ERROR SUMMARY: 9 errors
code:
int spmv_csr(const struct matrix_coo *mc, double hX, double hY)
{
// Host problem definition
int A_num_rows = mc->m;
int A_num_cols = mc->n;
int A_nnz = mc->nnz;
int hA_rows = mc->row_indices;
int hA_columns = mc->col_indices;
double hA_values = mc->values;
//double hX[] = { 1.0f, 2.0f, 3.0f, 4.0f };
//double hY[] = { 0.0f, 0.0f, 0.0f, 0.0f };
//double hY_result[] = { 19.0f, 8.0f, 51.0f, 52.0f };
double alpha = 1.0;
double beta = 0.0;
//--------------------------------------------------------------------------
// Device memory management
int dA_rows, dA_columns;
double dA_values, dX, dY;
CHECK_CUDA( cudaMalloc((void) &dA_rows, A_nnz * sizeof(int)) )
CHECK_CUDA( cudaMalloc((void) &dA_columns, A_nnz * sizeof(int)) )
CHECK_CUDA( cudaMalloc((void) &dA_values, A_nnz * sizeof(double)) )
CHECK_CUDA( cudaMalloc((void) &dX, A_num_cols * sizeof(double)) )
CHECK_CUDA( cudaMalloc((void) &dY, A_num_rows * sizeof(double)) )
CHECK_CUDA( cudaMemcpy(dA_rows, hA_rows, A_nnz * sizeof(int),
cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dA_columns, hA_columns, A_nnz * sizeof(int),
cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dA_values, hA_values, A_nnz * sizeof(double),
cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dX, hX, A_num_cols * sizeof(double),
cudaMemcpyHostToDevice) )
CHECK_CUDA( cudaMemcpy(dY, hY, A_num_rows * sizeof(double),
cudaMemcpyHostToDevice) )
//--------------------------------------------------------------------------
// CUSPARSE APIs
cusparseHandle_t handle = NULL;
cusparseSpMatDescr_t matA;
cusparseDnVecDescr_t vecX, vecY;
void* dBuffer = NULL;
size_t bufferSize = 0;
CHECK_CUSPARSE( cusparseCreate(&handle) )
// Create sparse matrix A in CSR format
CHECK_CUSPARSE( cusparseCreateCoo(&matA, A_num_rows, A_num_cols, A_nnz,
dA_rows, dA_columns, dA_values,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F) )
// Create dense vector X
CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, A_num_cols, dX, CUDA_R_64F) )
// Create dense vector y
CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, A_num_rows, dY, CUDA_R_64F) )
// allocate an external buffer if needed
CHECK_CUSPARSE( cusparseSpMV_bufferSize(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, matA, vecX, &beta, vecY, CUDA_R_64F,
CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize) )
printf("buf size: %ld\n", bufferSize);
CHECK_CUDA( cudaMalloc(&dBuffer, bufferSize) )
// execute SpMV
//*
CHECK_CUSPARSE( cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
&alpha, matA, vecX, &beta, vecY, CUDA_R_64F,
CUSPARSE_SPMV_ALG_DEFAULT, dBuffer) )
//*/
// destroy matrix/vector descriptors
CHECK_CUSPARSE( cusparseDestroySpMat(matA) )
CHECK_CUSPARSE( cusparseDestroyDnVec(vecX) )
CHECK_CUSPARSE( cusparseDestroyDnVec(vecY) )
CHECK_CUSPARSE( cusparseDestroy(handle) )
//--------------------------------------------------------------------------
// device result check
CHECK_CUDA( cudaMemcpy(hY, dY, A_num_rows * sizeof(double),
cudaMemcpyDeviceToHost) )
/*
int correct = 1;
for (int i = 0; i < A_num_rows; i++) {
if (hY[i] != hY_result[i]) { // direct doubleing point comparison is not
correct = 0; // reliable
break;
}
}
if (correct)
printf("spmv_coo_example test PASSED\n");
else
printf("spmv_coo_example test FAILED: wrong result\n");
*/
//--------------------------------------------------------------------------
// device memory deallocation
CHECK_CUDA( cudaFree(dBuffer) )
CHECK_CUDA( cudaFree(dA_rows) )
CHECK_CUDA( cudaFree(dA_columns) )
CHECK_CUDA( cudaFree(dA_values) )
CHECK_CUDA( cudaFree(dX) )
CHECK_CUDA( cudaFree(dY) )
return EXIT_SUCCESS;
}