Hi, I am trying to use cusolverDnSsyevd to solve eigenvalues of matrices. The code runs ok with the input of example (array AA below). However, If I change the input a little bit, like use eye(3) or ones(3), most of the output become nan .
My code shows as follows. Could somebody help me to check where is the problem. Thanks!
__host__ void Eig(cusolverDnHandle_t cusolverH,
const unsigned int dim,
float *Sigma,float *working){
cusolverStatus_t cusolver_status;
assert(dim<31);
float *d_W=&working[0];
int *d_Info = (int *)&working[dim];
float *d_work = &working[32];
int h_lwork = 0;
cusolver_status = cusolverDnSsyevd_bufferSize(
cusolverH,
CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_LOWER,
dim,
Sigma,
dim,
d_W,
&h_lwork);
assert(cusolver_status == CUSOLVER_STATUS_SUCCESS);
cusolver_status = cusolverDnSsyevd(
cusolverH,
CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_LOWER,
dim,
Sigma,
dim,
d_W,
d_work,
h_lwork,
d_Info);
cudaDeviceSynchronize();
assert(cusolver_status == CUSOLVER_STATUS_SUCCESS);
int info_gpu = 0;
CudaCheck(cudaMemcpy(&info_gpu, d_Info, sizeof(int), cudaMemcpyDeviceToHost));
assert(info_gpu==0);
}
float* mallocOnGpu(const size_t N) {
float* device_A;
float ABytes = N * sizeof(float);
CudaCheck(cudaMalloc(&device_A, ABytes));
return device_A;
}
int main(){
cusolverStatus_t cusolver_status;
cusolverDnHandle_t cusolverH;
cusolver_status = cusolverDnCreate(&cusolverH);
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status);
int dim=3;
float *d_mat=mallocOnGpu(dim*dim);
float *d_d=mallocOnGpu(dim*dim*20096);
float A[dim*dim] = { 3.5, 0.5, 0, 0.5, 3.5, 0, 0, 0, 2.0};
CudaCheck(cudaMemcpy(
&d_mat[0], A,
dim*dim*sizeof(float),
cudaMemcpyHostToDevice));
Eig(cusolverH, dim,d_mat, d_d);
cudaDeviceSynchronize();
/* My result:
d_mat[0] = 0
d_mat[1] = 0
d_mat[2] = 1
d_mat[3] = -0.707107
d_mat[4] = 0.707107
d_mat[5] = 0
d_mat[6] = -0.707107
d_mat[7] = -0.707107
d_mat[8] = 0
d_d[0] = 2
d_d[1] = 3
d_d[2] = 4
*/
float B[dim*dim] = { 2, 0, 0, 0, 2, 0, 0, 0, 2};
CudaCheck(cudaMemcpy(
&d_mat[0], B,
dim*dim*sizeof(float),
cudaMemcpyHostToDevice));
Eig(cusolverH, dim,d_mat, d_d);
cudaDeviceSynchronize();
/* My result:
d_mat[0] = 1
d_mat[1] = nan
d_mat[2] = nan
d_mat[3] = -nan
d_mat[4] = nan
d_mat[5] = nan
d_mat[6] = -nan
d_mat[7] = nan
d_mat[8] = nan
d_d[0] = 2
d_d[1] = -nan
d_d[2] = -nan
*/
}