I’m writing a test code to use the gtsv2 function from cuSparse to find the inverse of a tridiagonal matrix but get an access error when calling the bufferSizeExt function. I’ve tried to stick to documentation as much as possible but just can’t find the source of the error. I put the code below. I get the error “Exception thrown at 0x00007FFDAC27A47A (cusparse64_11.dll) in cusparse_test.exe: 0xC0000005: Access violation writing location 0x0000000000000000.”
Any input will be much appreciated!
int main()
{
const int N = 512;
// lower diagonal
cuDoubleComplex* d_ld = NULL;
// diagonal
cuDoubleComplex* d_d = NULL;
// upper diagonal
cuDoubleComplex* d_ud = NULL;
cuDoubleComplex* h_ld = NULL;
cuDoubleComplex* h_d = NULL;
cuDoubleComplex* h_ud = NULL;
cuDoubleComplex* h_ID_matrix = NULL;
cuDoubleComplex*d_ID_matrix = NULL;
cudaMallocHost((void**)&h_ID_matrix, N * N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for id matrix on device failed !");
cudaMalloc((void**)&d_ID_matrix, N * N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for id matrix on device failed !");
cudaMallocHost((void**)&h_ld, N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for ld on host failed !");
cudaMallocHost((void**)&h_d, N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for d on host failed !");
cudaMallocHost((void**)&h_ud, N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for ud on host failed !");
cudaMalloc((void**)&d_ld, N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for ld on device failed !");
cudaMalloc((void**)&d_d, N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for d on device failed !");
cudaMalloc((void**)&d_ud, N * sizeof(cuDoubleComplex));
check_cuda_error("Malloc for ud on device failed !");
// fill dense arrays with ld[0]=ud[size-1]=0
fill_arrays_host(h_ld, h_d, h_ud, N);
// make ID matrix as 1D dense array
make_ID_matrix(h_ID_matrix, N);
cudaMemcpy(d_ld, h_ld, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
check_cuda_error("memcpy for ld failed !");
cudaMemcpy(d_d, h_d, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
check_cuda_error("memcpy for d failed !");
cudaMemcpy(d_ud, h_ud, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
check_cuda_error("memcpy for ud failed!");
cudaMemcpy(d_ID_matrix, h_ID_matrix, N * N *sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
check_cuda_error("memcpy for d_ID_matrix failed !");
cudaDeviceSynchronize();
size_t* buffer_size = 0;
cusparseHandle_t handle = NULL;
// variables to check success of operations
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;
// create handle
status = cusparseCreate(&handle);
cudaDeviceSynchronize();
// check if cuSparse initialized properly
assert(CUSPARSE_STATUS_SUCCESS == status);
// get buffer size needed for the gtsv function. square matrix so N is rows and cols
status = cusparseZgtsv2_bufferSizeExt(handle, N, N, d_ld, d_d, d_ud, d_ID_matrix, N, buffer_size);
assert(CUSPARSE_STATUS_SUCCESS == status);
cudaDeviceSynchronize();
// solve the equation A*X = B for X, where A is our input matrix and B is the identity matrix.
// solution is overwritten on B
status = cusparseZgtsv2(handle, N, N, d_ld, d_d, d_ud, d_ID_matrix, N, buffer_size);
assert(CUSPARSE_STATUS_SUCCESS == status);
cudaDeviceSynchronize();
cusparseDestroy(handle);
cudaDeviceSynchronize();
cudaFree(d_d); cudaFree(d_ud); cudaFree(d_ld); cudaFree(d_ID_matrix);
cudaFreeHost(h_d); cudaFreeHost(h_ud); cudaFreeHost(h_ld); cudaFreeHost(h_ID_matrix);
return 0;
}
void check_cuda_error(const char* errormsg)
{
cudaError_t error = cudaGetLastError();
if (error != cudaSuccess)
{
fprintf(stderr, "%s\n", errormsg);
fprintf(stderr, "Cuda: %s\n", cudaGetErrorString(error));
exit(EXIT_FAILURE);
}
}
int fill_arrays_host(cuDoubleComplex* ld, cuDoubleComplex* d, cuDoubleComplex* up, int size)
{
for (int i = 0; i < size; i ++)
{
ld[i].x = 1.0 * i; ld[i].y = 0;
d[i].x = 0.5; d[i].y = 0;
up[i].x = -1.0 * i; up[i].y = 0;
}
ld[0].x = 0; ld[0].y = 0;
up[size - 1].x = 0; up[size - 1].y = 0;
return 0;
}
int make_ID_matrix(cuDoubleComplex* ID_matrix, int rows)
{
// identity matrix is the same in column-major or row-major format
for (int r = 0; r < rows * rows; r++)
{
if (r % (rows + 1) == 0) { ID_matrix[r].x = 1; ID_matrix[r].y = 0; }
else { ID_matrix[r].x = 0; ID_matrix[r].y = 0; }
if (r == (rows * rows) - 1) { printf("ID matrix initialized \n"); }
}
return 0;
}