Hi NVIDIA team! Today I have a code, and I’m getting cusolverStatus error value 6 in a cuSOLVER call, I went to the official page → cuSOLVER :: CUDA Toolkit Documentation and I think that error 6 is CUSOLVER_STATUS_INTERNAL_ERROR, since SUCCESS is 0.
Why am I getting this error?
Here is the code with the relevant cuSOLVER parts:
#include <cusolverDn.h>
#include <cuda_runtime.h>
#include <assert.h>
double *meanSpect;
double *Cov;
double *Corr;
double *CovEigVal;
double *CorrEigVal;
double *U;
double *VT;
double *image = (double*)malloc(lines*samples*bands*sizeof(double)); // IMAGE IS FILLED WITH SOME CODE HERE
meanSpect = (double*) malloc(bands * sizeof(double));
Cov = (double*) malloc(bands * bands * sizeof(double));
Corr = (double*) malloc(bands * bands * sizeof(double));
CovEigVal = (double*) malloc(bands * sizeof(double));
CorrEigVal = (double*) malloc(bands * sizeof(double));
U = (double*) malloc(bands * bands * sizeof(double));
VT = (double*) malloc(bands * bands * sizeof(double));
// Status cuSOLVER
cusolverStatus_t cusolver_status = CUSOLVER_STATUS_SUCCESS;
// Handler cuSOLVER
cusolverDnHandle_t cusolverHandle = NULL;
cusolver_status = cusolverDnCreate(&cusolverHandle);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
// Stream-Link cuSOLVER
cudaStream_t streamCusolver;
cudaStreamCreate(&streamCusolver);
cusolver_status = cusolverDnSetStream(cusolverHandle, streamCusolver);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status); // This seems to work fine
int lwork = MAX(1,MAX(3*MIN(bands, bands)+MAX(bands,bands),5*MIN(bands,bands)));
int *info;
double *work = (double*)malloc(lwork*sizeof(double));
double *rwork = (double*)malloc(lwork*sizeof(double));
int cublas_error;
#pragma acc data copyin(meanSpect[0:bands], Cov[0:bands*bands], Corr[0:bands*bands], CovEigVal[0:bands], CorrEigVal[0:bands], U[0:bands*bands], VT[0:bands*bands], image[0:lines*samples*bands]) \
copyout(count[0:FPS])
{
... // some code...
#pragma acc host_data use_device(image, Cov)
{
// dgemm_("T", "N", &bands, &bands, &N, &alpha, image, &N, image, &N, &beta, Cov, &bands);
cublas_error = cublasDgemm(handle_gemm,CUBLAS_OP_T, CUBLAS_OP_N, bands, bands, N, &alpha, image, N, image, N, &beta, Cov, bands);
if( cublas_error != CUBLAS_STATUS_SUCCESS )
{
printf( "failed cuBLAS execution %d\n", cublas_error );
exit(1);
}
}
cublasGetStream(handle_gemm, &stream);
cudaStreamSynchronize(stream);
...// some other code that isnt relevant
#pragma acc host_data use_device(Cov, CovEigVal, U, VT)
{
//dgesvd_("N", "N", &bands, &bands, Cov, &bands, CovEigVal, U, &bands, VT, &bands, work, &lwork, &info);
cusolver_status = cusolverDnDgesvd(cusolverHandle,'N', 'N', bands, bands, Cov, bands, CovEigVal, U, bands, VT, bands, work, lwork, rwork, info); // Execution dies here
if( cusolver_status != CUSOLVER_STATUS_SUCCESS )
{
printf( "failed cuSOLVER execution %d\n", cusolver_status );
exit(1);
}
}
...
That dgesvd() function is the one that has been replaced by cuSOLVER.
Tell me if you need something else.
Thanks!