I am trying to use cusolverDnSgeqrf in CUDA 7.0. The code below always returns CUSOLVER_STATUS_EXECUTION_FAILED. Can someone please explain what I am doing wrong? By the way, should there be example code with the new CUSOLVER capabilities? I can’t seem to find any examples.
OS : Ubuntu 14.04
Driver : 346.46
GPU : 690
void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { exit(code); }
}
}
void gpuErrchk(cudaError_t ans) { gpuAssert((ans), __FILE__, __LINE__); }
int main()
{
cusolverStatus_t status;
cusolverDnHandle_t solver_handle;
cusolverDnCreate(&solver_handle);
int M = 4;
int N = 3;
int minMN = min(M,N);
float *h_A = (float *)malloc(M * N * sizeof(float));
h_A[0] = 1.0f;
h_A[1] = 4.0f;
h_A[2] = 7.0f;
h_A[3] = 10.0f;
h_A[4] = 2.0f;
h_A[5] = 5.0f;
h_A[6] = 8.0f;
h_A[7] = 11.0f;
h_A[8] = 3.0f;
h_A[9] = 6.0f;
h_A[10] = 9.0f;
h_A[11] = 12.0f;
float *d_A;
gpuErrchk(cudaMalloc(&d_A, M * N * sizeof(float)));
gpuErrchk(cudaMemcpy(d_A, h_A, M * N * sizeof(float), cudaMemcpyHostToDevice));
int work_size = 0;
status = cusolverDnSgeqrf_bufferSize(solver_handle, M, N, d_A, M, &work_size);
cout << "status = " << status << endl;
cout << "work_size = " << work_size << endl;
if (status != CUSOLVER_STATUS_SUCCESS )
cout << "Initialization of cuSolver failed." << endl;
int devInfo = 0;
float* TAU;
gpuErrchk(cudaMalloc(&TAU, minMN * sizeof(float)));
float* Workspace;
gpuErrchk(cudaMalloc(&Workspace, work_size * sizeof(float)));
status = cusolverDnSgeqrf(solver_handle, M, N, d_A, M, TAU, Workspace, work_size, &devInfo);
cout << "devInfo = " << devInfo << endl;
cout << "status = " << status << endl;
switch(status)
{
case CUSOLVER_STATUS_SUCCESS:
cout << "SVD computation success" << endl;
break;
case CUSOLVER_STATUS_NOT_INITIALIZED :
cout << "Library cuSolver not initialized correctly" << endl;
break;
case CUSOLVER_STATUS_INVALID_VALUE:
cout << "Invalid parameters passed" << endl;
break;
case CUSOLVER_STATUS_INTERNAL_ERROR:
cout << "Internal operation failed" << endl;
break;
case CUSOLVER_STATUS_EXECUTION_FAILED:
cout << "Execution failed" << endl;
break;
}
cudaDeviceSynchronize();
float* h_U = (float *)malloc(M * N * sizeof(float));
gpuErrchk(cudaMemcpy(h_U, d_A, M * N * sizeof(float), cudaMemcpyDeviceToHost));
cusolverDnDestroy(solver_handle);
return 0;
}