Cuda Driver API and CUSolver internal error

cheivan · January 16, 2022, 12:05pm

Hello everyone! Is it possible to use cuda driver api and cusolver?

In my project ( Rust lang) I use Cuda driver api through FFI and need to compute eigenvalues . I thought that CUSovler could help me, but it seems to me that it is impossible to use with driver API. How can I pass gpu arrays to cusolverDnSsyevd?

Currently I am getting INTERNAL_ERROR.

Thanks.

cheivan · January 16, 2022, 7:31pm

Here I created same code but on C++ and get same iternal error of cusolver on call cusolverDnDsyevd. How to pass data from cuda driver to cusolver?
Cuda version - 10.1

  cusolverDnHandle_t cusolverH = NULL;
    CUstream stream = NULL;
    CUcontext ctx = NULL;
    CUdevice device = NULL;
    cuInit(0);
    cuDeviceGet(&device, 0);
    cuCtxCreate_v2(&ctx,0,device);
    cuStreamCreate(&stream, 0);

    const int m = 3;
    const int lda = m;
    /*
     *       | 3.5 0.5 0.0 |
     *   A = | 0.5 3.5 0.0 |
     *       | 0.0 0.0 2.0 |
     *
     */
    const std::vector<double> A = { 3.5, 0.5, 0.0, 0.5, 3.5, 0.0, 0.0, 0.0, 2.0 };
    const std::vector<double> lambda = { 2.0, 3.0, 4.0 };

    std::vector<double> V(lda * m, 0); // eigenvectors
    std::vector<double> W(m, 0);       // eigenvalues

    CUdeviceptr d_A;
    CUdeviceptr d_W;
    CUdeviceptr d_info;

    int info = 0;

    int lwork = 0;            /* size of workspace */
    CUdeviceptr d_work; /* device workspace*/

    std::printf("A = (matlab base-1)\n");
    //print_matrix(m, m, A.data(), lda);
    std::printf("=====\n");

    /* step 1: create cusolver handle, bind a stream */
    CUSOLVER_CHECK(
        cusolverDnCreate(&cusolverH)
    );

    (cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
    CUSOLVER_CHECK(
        cusolverDnSetStream(cusolverH, stream)
    );

    CU_CHECK(cuMemAlloc_v2(&d_A, sizeof(double) * A.size()));
    CU_CHECK(cuMemAlloc_v2(&d_W, sizeof(double) * W.size()));
    CU_CHECK(cuMemAlloc_v2(&d_info, sizeof(int)));

    CU_CHECK(
        cuMemcpyHtoDAsync_v2(d_A, static_cast<const void *>(A.data()), sizeof(double) * A.size(),
             stream));

    // step 3: query working space of syevd
    cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvalues and eigenvectors.
    cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;

    CUSOLVER_CHECK(
    cusolverDnDsyevd_bufferSize(cusolverH, jobz, uplo, m, reinterpret_cast<double*>(d_A), 
        lda, reinterpret_cast<double *>(d_W), &lwork)
    );
    printf("workspace size %d", lwork);
    CU_CHECK(cuMemAlloc_v2(&d_work, sizeof(double) * lwork));

    // step 4: compute spectrum
    CUSOLVER_CHECK(
        cusolverDnDsyevd(cusolverH, jobz, uplo, m, reinterpret_cast<double *>(&d_A), lda, 
            reinterpret_cast<double*>(&d_W), reinterpret_cast<double*>(&d_work),
            lwork, reinterpret_cast<int*>(&d_info))
    );

    CU_CHECK(
        cuMemcpyDtoHAsync_v2(reinterpret_cast<void *>(V.data()), d_A, sizeof(double) * V.size(), stream)
    );
    CU_CHECK(
        cuMemcpyDtoHAsync_v2(reinterpret_cast<void*>(W.data()), d_W, sizeof(double) * W.size(), stream)
    ); 
    CU_CHECK(
        cuMemcpyDtoHAsync_v2(reinterpret_cast<void*>(&info), d_info, sizeof(int), stream)
    );

    (cuStreamSynchronize(stream));

    std::printf("after syevd: info = %d\n", info);
    if (0 > info) {
        std::printf("%d-th parameter is wrong \n", -info);
        exit(1);
    }

    std::printf("eigenvalue = (matlab base-1), ascending order\n");
    int idx = 1;
    for (auto const& i : W) {
        std::printf("W[%i] = %E\n", idx, i);
        idx++;
    }

    std::printf("V = (matlab base-1)\n");
    //print_matrix(m, m, V.data(), lda);
    std::printf("=====\n");

    // step 4: check eigenvalues
    double lambda_sup = 0;
    for (int i = 0; i < m; i++) {
        double error = fabs(lambda[i] - W[i]);
        lambda_sup = (lambda_sup > error) ? lambda_sup : error;
    }
    std::printf("|lambda - W| = %E\n", lambda_sup);

    /* free resources */
    (cuMemFree_v2(d_A));
    (cuMemFree_v2(d_W));
    (cuMemFree_v2(d_info));
    (cuMemFree_v2(d_work));

    //CUSOLVER_CHECK(
    cusolverDnDestroy(cusolverH);
    //);

    (cuStreamDestroy_v2(stream));

    (cuCtxDestroy_v2(ctx));

    //return EXIT_SUC0CESS;

cheivan · January 20, 2022, 11:48am

I’ve solved it. CUdeviceptr is pointer itself , so what I need is to remove adress operator in syevd call.

        cusolverDnDsyevd(cusolverH, jobz, uplo, m, reinterpret_cast<double *>(d_A), lda, 
            reinterpret_cast<double*>(d_W), reinterpret_cast<double*>(d_work),
            lwork, reinterpret_cast<int*>(d_info))

system · February 3, 2022, 11:49am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
CUDA graph capture mode with cuSolver get CUSOLVER_STATUS_INTERNAL_ERROR CUDA Programming and Performance cuda	2	37	October 9, 2024
Wrong output of Matrix Inversion using cuSOLVER GPU-Accelerated Libraries cuda	7	59	July 22, 2024
Warning: Cuda API error detected: cuModuleLoadFatBinary returned (0xd1) CUDA-GDB	25	1062	July 30, 2024
Cuda Error Illegal Address DeepStream SDK	10	725	June 7, 2023
New installation Multiple Failues DeepStream SDK	18	1120	June 28, 2022
How to create opencv gpumat from nvstream? DeepStream SDK	36	14419	July 27, 2021
Getting “CUDA_ERROR_INVALID_VALUE: invalid argument” in python with Tensorflow 1.14 cuDNN cuda	3	2446	April 29, 2020
Cuda cannot find my graphic card? CUDA Setup and Installation	5	2396	April 9, 2019
Error: CUDA driver version is insufficient for CUDA runtime version CUDA Setup and Installation	11	16831	July 19, 2018
Error 719 (failure to launch) for JCUDA and PyCUDA; How to run GPU consecutive times for 'large' data blocks CUDA Programming and Performance	0	2327	December 13, 2016

Cuda Driver API and CUSolver internal error

Related topics