Problem with inverse CuFFT calculations

I’m trying to make the inverse fourer transform using Cufft2D in the Tesla K80 card, but when I compare the result between the GPU and CPU, some of the array values I got from the GPU are multiplied by -1 and the absolute value of this Array looks the same as the result I got from the CPU. I have a lot of time looking for how to solve this problem, if someone can help me with this I will appreciate. you can see the code below

// Complex data type
	cufftDoubleComplex* a;

	// Create a complex variable on host
	a = (cufftDoubleComplex *)malloc(m*n* sizeof(cufftDoubleComplex));

	//Trasnfert Matrix Type Double to CufftComplex
	for (int j = 0; j < n; j++){
		for (int i = 0; i < m; i++){
			a[IDX2C(i, j, m)].x = R[IDX2C(i, j, m)];
			a[IDX2C(i, j, m)].y = I[IDX2C(i, j, m)];

	double mem_size = m*n*sizeof(cufftDoubleComplex);

	// Allocate device memory for signal
	cufftDoubleComplex* d_a;
	cudaMalloc((void**)&d_a, mem_size);

	// Copy the data from host to device
	cudaMemcpy(d_a, a, mem_size, cudaMemcpyHostToDevice);

	// Create an plan
	cufftHandle plan;
	cufftPlan2d(&plan, m, n, CUFFT_Z2Z);

	for (int i = 0; i < k; i++){
		// Execute inverse FFT on data 
		cufftExecZ2Z(plan, (cufftDoubleComplex *)d_a, (cufftDoubleComplex *)d_a, 1);
	// Copy the data from device to host 
	cudaMemcpy(a, d_a, mem_size, cudaMemcpyDeviceToHost);

	//Trasnfert Matrix Type CufftComplex to Double  
	for (int j = 0; j < n; j++){
		for (int i = 0; i < m; i++){
			R[IDX2C(i, j, m)] = a[IDX2C(i, j, m)].x;
			I[IDX2C(i, j, m)] = a[IDX2C(i, j, m)].y;

	//Destroy CUFFT context

	// cleanup memory

	return 0;