Eigen decomposition of Hermitian Matrix using CuSolver does not match the result with matlab

I am following the example of eigen decomposition from here, CUDALibrarySamples/cusolver_syevd_example.cu at master · NVIDIA/CUDALibrarySamples · GitHub

I need to do it for Hermatian complex matrix. The problem is the eigen vector is not matching at all with the result with Matlab result.

Does anyone have any idea about it why this mismatch is happening?

I have also tried cusolverdn svd method to get eigen values and vector that is giving another result.

My code is here for convenience,


#include <cstdio>
#include <cstdlib>
#include <vector>

#include <cuda_runtime.h>
#include <cusolverDn.h>

#include "cusolver_utils.h"

int N = 16;
void BuildMatrix(cuComplex* input);

void main()
{
	cusolverDnHandle_t cusolverH = NULL;
	cudaStream_t stream = NULL;
	printf("*******************\n");

	cuComplex* h_idata = (cuComplex*)malloc(sizeof(cuComplex) * N);
	cuComplex* h_eigenVector = (cuComplex*)malloc(sizeof(cuComplex) * N); // eigen vector
	float* h_eigenValue = (float*)malloc(sizeof(float) * 4);	// eigen Value
	BuildMatrix(h_idata);
	int count = 0;
	for (int i = 0; i < N / 4; i++)
	{
		for (int j = 0; j < 4; j++)
		{
			printf("%f + %f\t", h_idata[count].x, h_idata[count].y);
			count++;
		}
		printf("\n");
	}
	printf("\n*****************\n");

	/* step 1: create cusolver handle, bind a stream */
	CUSOLVER_CHECK(cusolverDnCreate(&cusolverH));

	CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
	CUSOLVER_CHECK(cusolverDnSetStream(cusolverH, stream));

	// step 2: reserve memory in cuda and copy input data from host to device
	cuComplex* d_idata;
	float* d_eigenValue = nullptr;
	int* d_info = nullptr;

	CUDA_CHECK(cudaMalloc((void**)&d_idata, N * sizeof(cuComplex)));
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_eigenValue), N * sizeof(float)));
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_info), sizeof(int)));

	CUDA_CHECK(cudaMemcpyAsync(d_idata, h_idata, N * sizeof(cuComplex), cudaMemcpyHostToDevice, stream));

	// step 3: query working space of syevd
	cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvalues and eigenvectors.
	cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;

	int lwork = 0;            /* size of workspace */
	cuComplex* d_work = nullptr; /* device workspace*/
	const int m = 4;
	const int lda = m;

	cusolverDnCheevd_bufferSize(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, &lwork);

	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_work), sizeof(cuComplex) * lwork));

	// step 4: compute spectrum
	cusolverDnCheevd(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, d_work, lwork, d_info);

	CUDA_CHECK(
		cudaMemcpyAsync(h_eigenVector, d_idata, N * sizeof(cuComplex), cudaMemcpyDeviceToHost, stream));
	CUDA_CHECK(
		cudaMemcpyAsync(h_eigenValue, d_eigenValue, 4 * sizeof(double), cudaMemcpyDeviceToHost, stream));

	int info = 0;
	CUDA_CHECK(cudaMemcpyAsync(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost, stream));

	CUDA_CHECK(cudaStreamSynchronize(stream));

	std::printf("after syevd: info = %d\n", info);
	if (0 > info)
	{
		std::printf("%d-th parameter is wrong \n", -info);
		exit(1);
	}

	count = 0;
	for (int i = 0; i < N / 4; i++)
	{
		for (int j = 0; j < 4; j++)
		{
			printf("%f + %f\t", h_eigenVector[count].x, h_eigenVector[count].y);
			count++;
		}
		printf("\n");
	}
	printf("\n");
	for (int i = 0; i < N / 4; i++)
	{
		std::cout << h_eigenValue[i] << std::endl;
	}
	printf("\n*****************\n");

	/* free resources */
	CUDA_CHECK(cudaFree(d_idata));
	CUDA_CHECK(cudaFree(d_eigenValue));
	CUDA_CHECK(cudaFree(d_info));
	CUDA_CHECK(cudaFree(d_work));

	CUSOLVER_CHECK(cusolverDnDestroy(cusolverH));

	CUDA_CHECK(cudaStreamDestroy(stream));

	CUDA_CHECK(cudaDeviceReset());
}

//0.5560 + 0.0000i - 0.4864 + 0.0548i   0.8592 + 0.2101i - 1.5374 - 0.2069i
//- 0.4864 - 0.0548i   0.4317 + 0.0000i - 0.7318 - 0.2698i   1.3255 + 0.3344i
//0.8592 - 0.2101i - 0.7318 + 0.2698i   1.4099 + 0.0000i - 2.4578 + 0.2609i
//- 1.5374 + 0.2069i   1.3255 - 0.3344i - 2.4578 - 0.2609i   4.3333 + 0.0000i
void BuildMatrix(cuComplex* input)
{
	std::vector<float> realVector = { 0.5560, -0.4864, 0.8592, -1.5374, -0.4864, 0.4317, -0.7318, 1.3255,
									0.8592, -0.7318, 1.4099, -2.4578, -1.5374, 1.3255, -2.4578, 4.3333 };
	std::vector<float> imagVector = { 0, -0.0548, -0.2101, 0.2069, 0.0548, 0.0000, 0.2698, -0.3344,
									 0.2101, -0.2698, 0, -0.2609, -0.2069, 0.3344, 0.2609, 0 };

	for (int i = 0; i < N; i++)
	{
		input[i].x = realVector.at(i) * std::pow(10, 11);
		input[i].y = imagVector.at(i) * std::pow(10, 11);
	}
}

I checked with MATLAB, CUDA result does not match with MATLAB. I validated MATLAB result by doing A*EigenVector = EigenValue*EigenVector . But the CUDA Eigen decomposition result does not validate this eigen decomposition equation.

I raised this issue in their git ( Eigen decomposition for Hermitian complex matrix does not match the result with matlab · Issue #58 · NVIDIA/CUDALibrarySamples · GitHub), but unfortunately did not get any answer yet.

What output are you expecting, or receiving from MATLAB?

MATLAB output:
EigenVector =

-0.5166 + 0.0698i -0.0592 - 0.3800i -0.0503 - 0.7038i -0.2847 - 0.0384i
-0.2829 + 0.2908i -0.6225 + 0.2729i 0.5561 - 0.0010i 0.2455 + 0.0619i
-0.5750 + 0.1692i 0.3426 + 0.4283i -0.0116 + 0.3610i -0.4551 + 0.0483i
-0.4521 + 0.0000i 0.2987 + 0.0000i -0.2499 + 0.0000i 0.8024 + 0.0000i

EigenValues = 2.1882e+05, 2.4830e+06, 1.1528e+08, 6.7297e+11

When I input your code into Scipy Linalg Lapack I get the same answer as your CUDA code.

Python

from scipy import linalg
import numpy as np

B = np.array([55600001024.00000 + 0.00000j,   -48640000000.00000 + -5480000000.00000j,        85919997952.00000 + -21009999872.00000j,     -153740001280.00000 + 20689999872.00000j,                                                                    -48640000000.00000 + 5480000000.00000j, 43170000896.00000 + 0.00000j,   -73180004352.00000 + 26979999744.00000j,    132550000640.00000 + -33440000000.00000j,                                                                             85919997952.00000 + 21009999872.00000j, -73180004352.00000 + -26979999744.00000j,       140989988864.00000 + 0.00000j,       -245779988480.00000 + -26089998336.00000j,                                                                   -153740001280.00000 + -20689999872.00000j,      132550000640.00000 + 33440000000.00000j,        -245779988480.00000 + 26089998336.00000j,    433329995776.00000 + 0.00000j])
B = np.reshape(B, (-1,4))
linalg.lapack.cheevd(B, lower=1)

(array([-8.75528300e+06, -2.49890359e+05,  1.24810856e+08,  6.72974176e+11],
      dtype=float32), array([[ 0.30125734+0.j        ,  0.56122553-0.j        ,
         0.7153639 +0.j        ,  0.28727114+0.j        ],
       [ 0.5458933 +0.30618015j, -0.08478781+0.49962962j,
        -0.06235069-0.5323986j , -0.2515595 +0.02859632j],
       [ 0.26433572+0.6163572j ,  0.10433488-0.43609095j,
        -0.37171417+0.03892629j,  0.44460592+0.10866661j],
       [ 0.15056351+0.21227252j,  0.29652867-0.37311813j,
         0.02330746+0.24635124j, -0.7952453 -0.10713243j]],
      dtype=complex64), 0)

I had also seen this result in python using np.linalg.eigh(A) It gave me almost the same result except the imaginary part sign changed.

However, considering the result of your python code result, I can take the second EigenValue and EigenVector from your result, and If I put this in matlab then It does not satisff A*EigenVector = EigenValue*EigenVector .

You can simply run the matlab code in Octave Online · Cloud IDE compatible with MATLAB (octave-online.net)


A = [55595377712.1934+0i, -48635598883.6933 + 5484312756.01085i, 85924488819.1992 + 21012522254.9437i, -153740551560.668 - 20694279467.1970i;
-48635598883.6933 - 5484312756.01085i, 43172875570.6568+0i, -73176493270.6451 - 26976536858.3616i, 132548010434.967 + 33444032207.8740i;
85924488819.1992 - 21012522254.9437i, -73176493270.6451 + 26976536858.3616i, 140992836298.019+0i, -245778206651.103 + 26091218800.1993i;
-153740551560.668 + 20694279467.1970i, 132548010434.967 - 33444032207.8740i, -245778206651.103 - 26091218800.1993i, 433330178855.009];

EVec = [0.5458933+0.30618015i,-0.08478781+0.49962962i,-0.06235069-0.5323986i,-0.2515595+0.02859632i];
EVec = EVec.';
A* EVec
-2.49890359e+05 * EVec

It seems to satisfy Av=lambdav

from scipy import linalg
import numpy as np

A = np.array([55600001024.00000 + 0.00000j,   -48640000000.00000 + -5480000000.00000j,        85919997952.00000 + -21009999872.00000j,     -153740001280.00000 + 20689999872.00000j,                                                                    -48640000000.00000 + 5480000000.00000j, 43170000896.00000 + 0.00000j,   -73180004352.00000 + 26979999744.00000j,    132550000640.00000 + -33440000000.00000j,                                                                             85919997952.00000 + 21009999872.00000j, -73180004352.00000 + -26979999744.00000j,       140989988864.00000 + 0.00000j,       -245779988480.00000 + -26089998336.00000j,                                                                   -153740001280.00000 + -20689999872.00000j,      132550000640.00000 + 33440000000.00000j,        -245779988480.00000 + 26089998336.00000j,    433329995776.00000 + 0.00000j])
A = np.reshape(A, (-1,4))

print("A",A,"\n")

w, v=np.linalg.eigh(A) 
print("Eigenvalues",w,"\n")
print("Eigenvector",v,"\n")

print("A v-lambda v","\n")
for i in range(4):
	print(i,"\n",np.dot(A, v[:, i]) - w[i] * v[:, i])

Yes @mfatica , This equation should satisfy. As you can see in your python program also, it is not satisfying. I also tried to show same thing in Matlab in my previous comment.

Then where is the problem?

I get the correct answer, each eigenvector satisfies the relationship ( A v= lambda v)

python forum.py

A [[ 5.56000010e+10+0.00000000e+00j -4.86400000e+10-5.48000000e+09j
8.59199980e+10-2.10099999e+10j -1.53740001e+11+2.06899999e+10j]
[-4.86400000e+10+5.48000000e+09j 4.31700009e+10+0.00000000e+00j
-7.31800044e+10+2.69799997e+10j 1.32550001e+11-3.34400000e+10j]
[ 8.59199980e+10+2.10099999e+10j -7.31800044e+10-2.69799997e+10j
1.40989989e+11+0.00000000e+00j -2.45779988e+11-2.60899983e+10j]
[-1.53740001e+11-2.06899999e+10j 1.32550001e+11+3.34400000e+10j
-2.45779988e+11+2.60899983e+10j 4.33329996e+11+0.00000000e+00j]]

Eigenvalues [-8.74542021e+06 -2.40613244e+05 1.24829717e+08 6.72974143e+11]

Eigenvector [[ 0.30129972+0.j -0.56124842+0.j -0.71532789+0.j
0.28727114+0.j ]
[ 0.54590924+0.3049116j 0.08491319-0.50030929j 0.06229187+0.53245859j
-0.25155953+0.02859633j]
[ 0.2662484 +0.61588758j -0.1032465 +0.43585908j 0.3717036 -0.03892156j
0.44460591+0.10866662j]
[ 0.15153233+0.21227983j -0.2960243 +0.37311823j -0.02327774-0.24636006j
-0.79524533-0.10713243j]]

A v-lambda v

0
[-2.02273950e-05-7.62939453e-06j 3.92515212e-05+4.46657650e-05j
1.13747083e-05-2.65091658e-05j 1.33598223e-05+3.50405462e-05j]
1
[ 3.36203957e-05+1.52587891e-05j -6.25488974e-06-1.47584360e-05j
-5.29271347e-06-1.53461006e-05j 1.09513639e-05-3.91284120e-05j]
2
[ 5.29885292e-05+2.47955322e-05j 1.33924186e-06+3.41981649e-06j
-1.94907188e-05+4.67523932e-05j 1.41700730e-05-5.30779362e-05j]
3
[-6.10351562e-05-2.83122063e-07j -6.10351562e-05+1.14440918e-05j
-6.10351562e-05+1.52587891e-05j -1.22070312e-04-1.52587891e-05j]

1 Like

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.