how to run un cudafft ?

in documention i have i dont understand what is nx batch

1D Complex-to-Complex Transforms

#define NX 256

#define BATCH 10

cufftHandle plan;

cufftComplex *data;

cudaMalloc((void**)&data, sizeof(cufftComplex)*NX*BATCH);

/* Create a 1D FFT plan. */

cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH);

/* Use the CUFFT plan to transform the signal in place. */

cufftExecC2C(plan, data, data, CUFFT_FORWARD);

/* Inverse transform the signal in place. */

cufftExecC2C(plan, data, data, CUFFT_INVERSE);

/* Note:

(1) Divide by number of elements in data set to get back original data

(2) Identical pointers to input and output arrays implies in-place

transformation

*/

/* Destroy the CUFFT plan. */

cufftDestroy(plan);

cudaFree(data);

i wrote but nothing happend what s wrong ?

int main(int argc, char* argv[])

{

#define NX 256

#define BATCH 10

cufftHandle plan;

cufftComplex *memoirecpu1;

cufftComplex *memoirecpu2;

memoirecpu1 = (float2 *)malloc(32);        // Allocate array on host  

 memoirecpu2 = (float2 *)malloc(32);        // Allocate array on host  

for (int i=0 ;i<32;i=i+1)

{

 memoirecpu1[i].x=4.1;

 memoirecpu1[i].y=0.0;

}

cutilSafeCall( cudaThreadSynchronize() );

cufftPlan1d(&plan, 32, CUFFT_C2C, 1);

cufftExecC2C(plan, memoirecpu1, memoirecpu2, CUFFT_FORWARD);

cufftDestroy(plan);

free(memoirecpu1); 

free(memoirecpu2); 

}

I think in that example Nx is the number of data points in the array that is to be FFT’d, and the BATCH number is for fourier transforming several arrays at once (less sure about that one).

I think your malloc wants to be 32*sizeof(float2) not just 32.

found that work but only for 128521 data lol

#include <stdio.h>

#include <math.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <cufft.h>

#define NX      128521  //128521  128522

#define BATCH   1

int main(int argc, char *argv[])

{

        cufftHandle plan;

        cufftComplex *devPtr;

        cufftComplex data[NX*BATCH];

        int i;

	/* source data creation */

        for(i=  0 ; i < NX*BATCH ; i++){

                data[i].x = i;

                data[i].y = 0.0;

        }

	/* GPU memory allocation */

        cudaMalloc((void**)&devPtr, sizeof(cufftComplex)*NX*BATCH);

	/* transfer to GPU memory */

        cudaMemcpy(devPtr, data, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyHostToDevice);

/* creates 1D FFT plan */

        cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH);

/* executes FFT processes */

        cufftExecC2C(plan, devPtr, devPtr, CUFFT_FORWARD);

/* executes FFT processes (inverse transformation) */

     //   cufftExecC2C(plan, devPtr, devPtr, CUFFT_INVERSE);

	/* transfer results from GPU memory */

        cudaMemcpy(data, devPtr, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyDeviceToHost);

/* deletes CUFFT plan */

        cufftDestroy(plan);

	/* frees GPU memory */

        cudaFree(devPtr);

for(i = 0 ; i < 4 ; i++){

                printf("data[%d] %f %f\n", i, data[i].x, data[i].y);

        }

return 0;

}

I think you should just be able to ignore BATCH. Don’t define it and call your plan etc without adding an extra argument i.e. cufftPlan1d(&plan, NX, CUFFT_C2C); At least thats how it works for 2d case.