# about cufft for 2D array

Hi, all:

``````  I made a cufft program with visual studio V++. The basic idea of the program is performing cufft for a 2D array. I used

cufftPlan2d(&plan, xsize, ysize, CUFFT_C2C)

to create a 2D plan that is spacially arranged by xsize(row) by ysize (column).

Then, I reordered the 2D array to 1D array lining up by one row to another row. Then, I applied 1D cufft to this new 1D array

cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD).

So, who can kindly explain to me when cufftExecC2C does 1D fft to 1D array, the cuda will automatically do fft on 2D pattern of data based on the plan, or just simply does 1D fft for 1D array?

When plotting the frequency spectrum of fft for 1D array, the spectrum curves show strange horizontal lines along frequency domain if using 1D plot or along both frequency domain and wavenumber domain if using 2D plotting.  The number of straight lines depends on the data size. More row or column the more straight lines.

I also have a problem using gnuplot for plotting with FILE i/o in c/c++. For reading a text data file, the figure can be plotted. However, if reading the text data file is written as a new text data file, the new text data file cannot be plotted with an error message :"Matrix does not represent a grid".

So, please read my program as below and kindly explain my problems to me.

Thank you very much in advance!
``````

Dawn

#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include “cufft.h”
#include “cuda.h”

#include “helper_functions.h”
#include “helper_cuda.h”

//#include <cutil_inline.h>
//#include <shrQATest.h>

#include
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include
#include

#ifdef WIN32
#define GNUPLOT_NAME “C:/gnuplot/bin/gnuplot -persist”
#else
#define GNUPLOT_NAME “gnuplot”
#endif

#define xsize 28
#define ysize 12
typedef float2 Complex;

global void subKernel(cuComplex *x, cuComplex *h_data, int size) {

``````int i = blockIdx.x * gridDim.x + threadIdx.x;

x[i].x = x[i].x - h_data[i].x / (float)size;
x[i].y = 0.0f;
``````

}

global void genSignal(cuComplex *x, int *ind) {

``````int i = blockIdx.x * gridDim.x + threadIdx.x;

if (ind[i] == 0) {
x[i].x = 0.0f;
x[i].y = 0.0f;
}
``````

}

global void initzero(cuComplex *a) {

``````int i = blockIdx.x * gridDim.x + threadIdx.x;

a[i].x = 0.0f;
a[i].y = 0.0f;
``````

}

void find_max(float2 *h_data, float2 *res_fft, float2 *pk_fft, int size) {
int i_max = 0;
float2 tmp;
tmp.x = 0.0;
tmp.y = 0.0;

``````for (int i = 0; i < size; i++) {
pk_fft[i].x = pk_fft[i].y = 0.0f;
if (cuCabsf(tmp) < cuCabsf(h_data[i])) {
i_max = i;
tmp.x = h_data[i].x;
tmp.y = h_data[i].y;
}
}

pk_fft[i_max].x = h_data[i_max].x;
pk_fft[i_max].y = h_data[i_max].y;

res_fft[i_max].x = res_fft[i_max].x + h_data[i_max].x;
res_fft[i_max].y = res_fft[i_max].y + h_data[i_max].y;
``````

}

int main()
{
#ifdef WIN32
FILE *pipe = _popen(GNUPLOT_NAME, “w”);
#else
FILE *pipe = popen(GNUPLOT_NAME, “w”);
#endif

``````clock_t start;
double diff;
start = clock();

int *ind, *k_ind, *d_ind;
cufftComplex *d_data, *d_x, *d_sinc, *res_data, *res_fft, *pk_fft, *d_fft, *dr_data;

typedef cufftComplex my_array[xsize];
my_array *x = (my_array *)malloc(ysize * sizeof(my_array));

typedef float array[xsize];
array *din = (array *)malloc(ysize * sizeof(array));

float2 *h_data = 0, *h_x = 0;

res_fft = (cufftComplex *)malloc(sizeof(cufftComplex) * xsize*ysize);
pk_fft = (cufftComplex *)malloc(sizeof(cufftComplex) * xsize*ysize);
res_data = (cufftComplex *)malloc(sizeof(cufftComplex) * xsize*ysize);
h_data = (float2 *)malloc(sizeof(float2) * xsize*ysize);
h_x = (float2 *)malloc(sizeof(float2) * xsize*ysize);
ind = (int *)malloc(sizeof(int) * xsize);
k_ind = (int *)malloc(sizeof(int) * xsize*ysize);

// read in 2D data
FILE *fp, *fp1;
fp = fopen("wedgegas1.txt", "r");         // read in a 2D array text data file
fp1 = fopen("inversedwedgegas.txt", "w");  // write as a 2D array  text data file

// read in data
if (fp == NULL) {
printf("fp is NULL!\n");
exit(1);
}

// read in 2D array from a data file by the order of [r][c]
for (int i = 0; i < xsize; i++) {
printf("i=%d \n ", i);
for (int j = 0; j < ysize; j++) {
fscanf(fp, "%f ", &din[i][j]);
printf("%f ", din[i][j]);
}
fscanf(fp, "\n");
printf("\n ");
}

// alloc memory for device variables and copy variables from host to device
checkCudaErrors(cudaMalloc((void **)&d_data, sizeof(cufftComplex)*xsize*ysize));
checkCudaErrors(cudaMalloc((void **)&d_x, sizeof(cufftComplex)*xsize*ysize));
checkCudaErrors(cudaMalloc((void **)&d_sinc, sizeof(cufftComplex)*xsize*ysize));
checkCudaErrors(cudaMalloc((void **)&d_ind, sizeof(int)*xsize*ysize));
checkCudaErrors(cudaMalloc((void **)&d_fft, sizeof(cufftComplex)*xsize*ysize));
checkCudaErrors(cudaMalloc((void **)&dr_data, sizeof(int)*xsize*ysize));

// generate index in which traces are deleted
for (int i = 0; i < ysize; i++) {

ind[i] = rand() % 30;
if (ind[i] == 0) {
ind[i] = 0;
}
else {
ind[i] = i;
}

}

// delete traces at ysize in which index=0
for (int i = 0; i < ysize; i++) {  // at c
if (ind[i] == 0) {
for (int j = 0; j < xsize; j++) {  // at r
x[i][j].x = 0.0f;
x[i][j].y = 0.0f;
}

}
else {
for (int j = 0; j < xsize; j++) {  // at c
x[i][j].x = din[i][j];
x[i][j].y = 0.0f;
}
}
}

// make 2D array to 1D array in the order of [r][c]
int k = 0;
for (int i = 0; i < xsize; i++) {  // at r	{
for (int j = 0; j < ysize; j++) {  // at c
h_x[k].x = x[i][j].x;
h_x[k].y = x[i][j].y;

if (x[i][j].x == 0) k_ind[k] = 0;
else k_ind[k] = k;
k++;
}
}

printf("k=%d \n", k);

if (cudaMemcpy(d_ind, k_ind, sizeof(int)*xsize*ysize, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
printf("faild to do copy x in host to d_data on device. \n");
exit(0);
}

// initialize parameters
initzero << <1, xsize*ysize >> > (d_fft);
cudaMemcpy(res_fft, d_fft, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyDeviceToHost);

/* -------------------- start iterations ---------------------------------------- */

int inc = 0;
``````

iteration:

``````// copy h_x from host to d_data in device
if (cudaMemcpy(d_data, h_x, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
printf("faild to do copy x in host to d_data on device. \n");
exit(0);
}

// d_data is input data
genSignal << <1, xsize*ysize >> > (d_data, d_ind);

cudaMemcpy(h_x, d_data, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyDeviceToHost);

// set up 2D fft plan
cufftHandle plan;
checkCudaErrors(cufftPlan2d(&plan, xsize, ysize, CUFFT_C2C));  //xsize: # of samples; ysize: # of traces

// do fft
if (cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD) != CUFFT_SUCCESS) {
printf("faild to do cufft. \n");
exit(0);
}

// copy d_data in device to h_data in host for remove peak freq for ifft
cudaMemcpy(h_data, d_data, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyDeviceToHost);

/*
1. find peak frequenct (pk_fft)
2. accumulate pk_fft in res_fft for final ifft of inversed data after finishing interations
*/
find_max(h_data, res_fft, pk_fft, xsize*ysize);

/* after putting peak freq in res_fft, copy res_fft in host to d_data in device for cuifft */
if (cudaMemcpy(d_data, pk_fft, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
printf("faild to do copy x in host to d_data on device. \n");
exit(0);
}

printf("\n goto next iteration %d \n", inc);

// do ifft to get peak frq data
checkCudaErrors(cufftExecC2C(plan, d_data, d_data, CUFFT_INVERSE));

// copy input data x in host to d_x in device
if (cudaMemcpy(d_x, h_x, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
printf("faild to do copy x in host to d_data on device. \n");
exit(0);
}

//subtract d_data from d_x to get res that is for next iteration
subKernel << < 1, xsize*ysize >> > (d_x, d_data, xsize*ysize);

/*
1. copy inversed d_x in device to x in host	for next iteration
*/
cudaMemcpy(h_x, d_x, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyDeviceToHost);

inc++;

if (inc < 10)
goto iteration;

// ------------------------------------ end iteration ------------------------------------------

// copy accumulated peak frequencies (res_fft) in host to d_data in device for ifft
if (cudaMemcpy(d_data, res_fft, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
printf("faild to do copy x in host to d_data on device. \n");
exit(0);
}

// do ifft to get final inversed data to get interpolation
checkCudaErrors(cufftExecC2C(plan, d_data, d_data, CUFFT_INVERSE));

// copy inversed d_data in device to res_data in host
cudaMemcpy(res_data, d_data, sizeof(cufftComplex)*xsize*ysize, cudaMemcpyDeviceToHost);

/* write final inversed data to din */
k = 0;
for (int i = 0; i < xsize; i++) {
printf("%d  \n", i);
for (int j = 0; j < ysize; j++) {
din[i][j] = res_data[k].x;

fprintf(fp1, "%f ", din[i][j]);
printf("%f ", din[i][j]);

k++;

}
fprintf(fp1, "\n");
printf("\n");
}

/* ---------------------------- plot results ------------------------------------------------------- */
``````

plot:
if (pipe != NULL)
{
// plot 1
fprintf(pipe, “set multiplot layout 1,2 \n”); // set multiple plots

``````	fprintf(pipe, "set title 'Original Wedge Model' \n");
fprintf(pipe, "set xlabel 'x location' \n");
fprintf(pipe, "set ylabel 'Depth (m)' \n");

fprintf(pipe, "plot 'wedgegas1.txt' matrix with image \n"); // plot type

fprintf(pipe, "%s \n", "e");             // termination character, end a plot
fflush(pipe);                           // flush the pipe

diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
printf("Time Elapsed:%f", diff);
printf("\n");

//     -------------------------------------------------

// plot 2
fprintf(pipe, "set title 'Inversed Wedge Model \n' offset 1,2 \n");

fprintf(pipe, "plot 'inversedwedgegas.txt' matrix with image \n"); // plot type

fprintf(pipe, "%s\n", "e");             // termination character
fflush(pipe);                           // flush the pipe

// wait for key press
std::cin.clear();
std::cin.ignore(std::cin.rdbuf()->in_avail());
std::cin.get();
``````

#ifdef WIN32
_pclose(pipe);
#else
pclose(pipe);
#endif
}
else
std::cout << “Could not open pipe” << std::endl;

``````fclose(fp);
fclose(fp1);

// free
free(x);
free(h_data);
free(res_data);
free(res_fft);
free(pk_fft);
free(ind);

cufftDestroy(plan);
cudaFree(d_data);
cudaFree(res_fft);
cudaFree(d_ind);
cudaFree(d_sinc);

return 0;
``````

}