Trouble with CUFFT

I’m trying to develop a 2D FFT for an Imaging App using CUFFT, but it doesn’t seem to be working. The CUDA (*.cu) code I’m working with is below.

My FFT output does not have any specturm. It looks like grainy image. No real or imaginary axis. (My FFT expertise is minimum).

Note that the image is 400 x 400(lena.pgm), and stored in an array of unsigned chars.

void runTest(unsigned char *pixels, int w, int h, unsigned char *outArray)

{

    // Allocate host memory for the image

    cufftReal *host_real_image    = (cufftReal*)malloc(sizeof(cufftReal)* w*h);

    cufftComplex* h_result = (cufftComplex*)malloc(sizeof(cufftComplex) * w*h);

   // Initalize the memory for the image

    for (unsigned int i = 0; i < (w*h); ++i)

   {

       host_real_image[i] = pixels[i];

       host_real_image[i+1] = 0;

    }

  int mem_size = sizeof(cufftReal)* w*h;

   int out_size = sizeof(cufftComplex)*w*h;

   // Allocate device memory image

    cufftReal* device_real_image;

    CUDA_SAFE_CALL(cudaMalloc((void**)&device_real_image, mem_size));

    // Copy host memory to device

    CUDA_SAFE_CALL(cudaMemcpy(device_real_image host_real_image, mem_size, cudaMemcpyHostToDevice));

   // Allocate device memory for output

   cufftComplex *d_result;

   CUDA_SAFE_CALL(cudaMalloc((void**)&d_result, out_size));

   // CUFFT plan

    cufftHandle plan;

    CUFFT_SAFE_CALL(cufftPlan2d(&plan, w, h, CUFFT_R2C));

    // Transform image  and kernel

    CUFFT_SAFE_CALL(cufftExecR2C(plan, (cufftReal *)device_real_image, (cufftComplex *)d_result));

   CUFFT_SAFE_CALL(cufftDestroy(plan));

	// CUFFT plan

    cufftHandle plan1;

    CUFFT_SAFE_CALL(cufftPlan2d(&plan1, w, h, CUFFT_C2R));

	cufftExecC2R( plan1, (cufftComplex *)d_result, (cufftReal *)device_real_image);

	CUFFT_SAFE_CALL(cufftDestroy(plan1));

   // Copy device memory to host

     CUDA_SAFE_CALL(cudaMemcpy(h_result, d_result, out_size,

                              cudaMemcpyDeviceToHost));

	// Initalize the memory for the output image

    for (unsigned int i = 0; i < (w*h); i+=2)

   {

        outArray[(2*i)] = h_result[i].x;

        outArray[(2*i)+1] = h_result[i].y;

    }

   // cleanup memory

    free( host_real_image);

    CUDA_SAFE_CALL(cudaFree(d_signal));

}

Can anyone tell me what I’m doing wrong here?

Thanks in advance

Waiting for a solution. Anybody out there.