Example From the cuFFT manual How to compile?

Hi, I cannot get this simple code to compile. This is exactly as in the reference manual (cuFFT) page 16 (except for the initial includes).

#include <stdio.h>

#include <cuComplex.h>

#include "cuda.h"

#include "cufft.h"

#include "cutil.h"

#include "cutil_inline_runtime.h"

#define NX 256

#define BATCH 10

cufftHandle plan;

cufftComplex *data;

cudaSafeCall(cudaMalloc((void**)&data,sizeof(cufftComplex)*(NX/2+1)*BATCH));

cufftPlan1d(&plan,Nx,CUFFT_R2C,BATCH);

cufftExecR2C(plan,(cufftReal*)data,data);

cufftDestroy(plan);

cudaFree(data);

I use the following for compiling:

nvcc -arch=sm_11 -g -c -O3 1Dreal_to_Complex.cu -I…

I forgot to add

main()

{

.

.

.

.

}

Now it compiles!

But it does not run well:

This is the complete code

include <stdio.h>

#include <cuComplex.h>

#include "cuda.h"

#include "cufft.h"

#include "cutil.h"

#include "cutil_inline_runtime.h"

#define NX 4

#define BATCH 1

main()

{

		cufftComplex *data;

		int i;

		for (i=0;i < NX; i++){

		data[i].x=2.45*i;

		data[i].y=5.67*i;

								};

cufftHandle plan;

cutilSafeCall( cudaMalloc((void**)&data,sizeof(cufftComplex)*(NX/2+1)*BATCH));

cufftPlan1d(&plan,NX,CUFFT_R2C,BATCH);

cufftExecR2C(plan,(cufftReal*)data,data);

cufftDestroy(plan);

cudaFree(data);

}

For NX > 3 it cannot allocate memory. BATCH can be any value if NX < 4. so it is not due to the size but to the (void**)&data part.

Is the data properly initialized?

This give the same results with “cufftComplex” or “typedef float2 Complex” *data

Thank you for your comments.

By digging through the examples in the toolkit, I finally got this program to work properly.

#include <stdio.h>

#include <cuComplex.h>

#include "cuda.h"

#include "cufft.h"

#include "cutil.h"

#include "cutil_inline_runtime.h"

#define NX 256

#define BATCH 10

main()

{

		cufftComplex *h_data=  (cufftComplex*)malloc(sizeof(cufftComplex) * (NX/2+1)*BATCH);

		cufftComplex *d_data;

		cufftHandle plan;

		int i;

		cudaMalloc((void**)&d_data,sizeof(cufftComplex)*(NX/2+1)*BATCH);

		srand(2009);

		for (i=0;i < NX; i++){

		h_data[i].x= float(rand())/float(RAND_MAX);

		h_data[i].y=0.0;

								}

	   cutilSafeCall(cudaMemcpy(d_data, h_data, sizeof(cufftComplex)*(NX/2+1)*BATCH, cudaMemcpyHostToDevice )); 

		cufftPlan1d(&plan,NX,CUFFT_R2C,BATCH);

		cufftExecR2C(plan,(cufftReal*)d_data,d_data);

	   cutilSafeCall(cudaMemcpy(h_data, d_data, sizeof(cufftComplex)*(NX/2+1)*BATCH, cudaMemcpyDeviceToHost ));

//		for (i=0;i < NX; i++)

//		printf(" %d  %g  %g   \n",i,h_data[i].x/NX,h_data[i].y/NX);

		cufftDestroy(plan);

		cudaFree(h_data);

		cudaFree(d_data);

}

Should this type of simple example be included in the cuFFT user guide?