Hi NVIDIA team! Today I have a code, and I’m getting cusolverStatus error value 6 in a cuSOLVER call, I went to the official page → cuSOLVER :: CUDA Toolkit Documentation and I think that error 6 is CUSOLVER_STATUS_INTERNAL_ERROR, since SUCCESS is 0.

Why am I getting this error?

Here is the code with the relevant cuSOLVER parts:

#include <cusolverDn.h>
#include <cuda_runtime.h>
#include <assert.h>

double *meanSpect;
double *Cov;
double *Corr;
double *CovEigVal;
double *CorrEigVal;
double *U;
double *VT;

double *image = (double*)malloc(lines*samples*bands*sizeof(double)); // IMAGE IS FILLED WITH SOME CODE HERE
meanSpect		= (double*) malloc(bands * sizeof(double));
Cov			= (double*) malloc(bands * bands * sizeof(double));
Corr			= (double*) malloc(bands * bands * sizeof(double));
CovEigVal		= (double*) malloc(bands * sizeof(double));
CorrEigVal	= (double*) malloc(bands * sizeof(double));
U		= (double*) malloc(bands * bands * sizeof(double));
VT	= (double*) malloc(bands * bands * sizeof(double));

    // Status cuSOLVER
    cusolverStatus_t cusolver_status = CUSOLVER_STATUS_SUCCESS;

    // Handler cuSOLVER
    cusolverDnHandle_t cusolverHandle = NULL;
    cusolver_status = cusolverDnCreate(&cusolverHandle);
    assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);

    // Stream-Link cuSOLVER
    cudaStream_t streamCusolver;
    cusolver_status = cusolverDnSetStream(cusolverHandle, streamCusolver);
    assert(CUSOLVER_STATUS_SUCCESS == cusolver_status); // This seems to work fine

    int lwork = MAX(1,MAX(3*MIN(bands, bands)+MAX(bands,bands),5*MIN(bands,bands)));
    int *info;
    double *work = (double*)malloc(lwork*sizeof(double));
    double *rwork  = (double*)malloc(lwork*sizeof(double));
    int cublas_error;

#pragma acc data copyin(meanSpect[0:bands], Cov[0:bands*bands], Corr[0:bands*bands], CovEigVal[0:bands], CorrEigVal[0:bands], U[0:bands*bands], VT[0:bands*bands], image[0:lines*samples*bands]) \
... // some code...

    #pragma acc host_data use_device(image, Cov)
      //                                dgemm_("T", "N", &bands, &bands, &N, &alpha, image, &N, image, &N, &beta, Cov, &bands);
      cublas_error = cublasDgemm(handle_gemm,CUBLAS_OP_T, CUBLAS_OP_N, bands, bands, N, &alpha, image, N, image, N, &beta, Cov, bands);
      if( cublas_error != CUBLAS_STATUS_SUCCESS )
        printf( "failed cuBLAS execution %d\n", cublas_error );
    cublasGetStream(handle_gemm, &stream);

...// some other code that isnt relevant

    #pragma acc host_data use_device(Cov, CovEigVal, U, VT)
                            //dgesvd_("N", "N", &bands, &bands, Cov, &bands, CovEigVal, U, &bands, VT, &bands, work, &lwork, &info);
      cusolver_status = cusolverDnDgesvd(cusolverHandle,'N', 'N', bands, bands, Cov, bands, CovEigVal, U, bands, VT, bands, work, lwork, rwork, info); // Execution dies here
      if( cusolver_status != CUSOLVER_STATUS_SUCCESS )
        printf( "failed cuSOLVER execution %d\n", cusolver_status );

That dgesvd() function is the one that has been replaced by cuSOLVER.

OK I didn’t know about all the cusolverDnDgesvd_buffersize() function and that some of the last parameters had to be on the device, therefore I got the memory error (error nº6). I found a code example just in one PDF of the official documentation, so hidden!!

For future reference, we have more examples on Github

