Access error using cuSPARSE gtsv2

I’m writing a test code to use the gtsv2 function from cuSparse to find the inverse of a tridiagonal matrix but get an access error when calling the bufferSizeExt function. I’ve tried to stick to documentation as much as possible but just can’t find the source of the error. I put the code below. I get the error “Exception thrown at 0x00007FFDAC27A47A (cusparse64_11.dll) in cusparse_test.exe: 0xC0000005: Access violation writing location 0x0000000000000000.”

Any input will be much appreciated!

int main()
{
    const int N = 512;
    // lower diagonal
    cuDoubleComplex* d_ld = NULL;
    // diagonal
    cuDoubleComplex* d_d = NULL;
    // upper diagonal
    cuDoubleComplex* d_ud = NULL;

    cuDoubleComplex* h_ld = NULL;
    cuDoubleComplex* h_d = NULL;
    cuDoubleComplex* h_ud = NULL;

    cuDoubleComplex* h_ID_matrix = NULL;
    cuDoubleComplex*d_ID_matrix = NULL;

    cudaMallocHost((void**)&h_ID_matrix, N * N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for id matrix on device failed !");

    cudaMalloc((void**)&d_ID_matrix, N * N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for id matrix on device failed !");

    cudaMallocHost((void**)&h_ld, N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for ld on host failed !");

    cudaMallocHost((void**)&h_d, N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for d on host failed !");

    cudaMallocHost((void**)&h_ud, N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for ud on host failed !");

    cudaMalloc((void**)&d_ld, N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for ld on device failed !");

    cudaMalloc((void**)&d_d, N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for d on device failed !");

    cudaMalloc((void**)&d_ud, N * sizeof(cuDoubleComplex));
    check_cuda_error("Malloc for ud on device failed !");

    // fill dense arrays with ld[0]=ud[size-1]=0
    fill_arrays_host(h_ld, h_d, h_ud, N);

    // make ID matrix as 1D dense array
    make_ID_matrix(h_ID_matrix, N);

    cudaMemcpy(d_ld, h_ld, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
    check_cuda_error("memcpy for ld failed !");

    cudaMemcpy(d_d, h_d, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
    check_cuda_error("memcpy for d failed !");

    cudaMemcpy(d_ud, h_ud, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
    check_cuda_error("memcpy for ud failed!");

    cudaMemcpy(d_ID_matrix, h_ID_matrix, N * N *sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
    check_cuda_error("memcpy for d_ID_matrix failed !");

    cudaDeviceSynchronize();

    size_t* buffer_size = 0;
    cusparseHandle_t handle = NULL;
    // variables to check success of operations
    cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;

    // create handle
    status = cusparseCreate(&handle);
    cudaDeviceSynchronize();
    // check if cuSparse initialized properly
    assert(CUSPARSE_STATUS_SUCCESS == status);

    // get buffer size needed for the gtsv function. square matrix so N is rows and cols
    status = cusparseZgtsv2_bufferSizeExt(handle, N, N, d_ld, d_d, d_ud, d_ID_matrix, N, buffer_size);
    assert(CUSPARSE_STATUS_SUCCESS == status);
    cudaDeviceSynchronize();

    // solve the equation A*X = B for X, where A is our input matrix and B is the identity matrix.
    // solution is overwritten on B
    status = cusparseZgtsv2(handle, N, N, d_ld, d_d, d_ud, d_ID_matrix, N, buffer_size);
    assert(CUSPARSE_STATUS_SUCCESS == status);
    cudaDeviceSynchronize();
    cusparseDestroy(handle);
    cudaDeviceSynchronize();

    cudaFree(d_d); cudaFree(d_ud); cudaFree(d_ld); cudaFree(d_ID_matrix);
    cudaFreeHost(h_d); cudaFreeHost(h_ud); cudaFreeHost(h_ld); cudaFreeHost(h_ID_matrix);

    return 0;
}
   

void check_cuda_error(const char* errormsg)
{
    cudaError_t error = cudaGetLastError();
    if (error != cudaSuccess)
    {
        fprintf(stderr, "%s\n", errormsg);
        fprintf(stderr, "Cuda: %s\n", cudaGetErrorString(error));
        exit(EXIT_FAILURE);
    }
}


int fill_arrays_host(cuDoubleComplex* ld, cuDoubleComplex* d, cuDoubleComplex* up, int size)
{
    for (int i = 0; i < size; i ++)
    {
        ld[i].x = 1.0 * i; ld[i].y = 0;
        d[i].x = 0.5; d[i].y = 0;
        up[i].x = -1.0 * i; up[i].y = 0;

    }

    ld[0].x = 0; ld[0].y = 0;
    up[size - 1].x = 0; up[size - 1].y = 0;

    return 0;
}

int make_ID_matrix(cuDoubleComplex* ID_matrix, int rows)
{
 
    // identity matrix is the same in column-major or row-major format
    for (int r = 0; r < rows * rows; r++)
    {
        if (r % (rows + 1) == 0) { ID_matrix[r].x = 1; ID_matrix[r].y = 0; }

        else { ID_matrix[r].x = 0; ID_matrix[r].y = 0; }

        if (r == (rows * rows) - 1) { printf("ID matrix initialized \n"); }
    }
    return 0;
}

Problem solved by not initializing the buffer_size variable as a pointer, and using the & operator to give it’s address to the bufferSizeExt function as follows:

size_t buffer_size = 0;
cusparseHandle_t handle = NULL;

// variables to check success of operations
cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS;

// create handle
status = cusparseCreate(&handle);

cudaDeviceSynchronize();
// check if cuSparse initialized properly
assert(CUSPARSE_STATUS_SUCCESS == status);
// get buffer size needed for the gtsv function. square matrix so N is rows and cols
status = cusparseZgtsv2_bufferSizeExt(handle, N, N, d_ld, d_d, d_ud, d_ID_matrix, N, &buffer_size);

assert(CUSPARSE_STATUS_SUCCESS == status);

cudaDeviceSynchronize();



// solve the equation A*X = B for X, where A is our input matrix and B is the identity matrix.

// solution is overwritten on B

status = cusparseZgtsv2(handle, N, N, d_ld, d_d, d_ud, d_ID_matrix, N, &buffer_size);

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.