jam11
1
Hi, I cannot get this simple code to compile. This is exactly as in the reference manual (cuFFT) page 16 (except for the initial includes).
#include <stdio.h>
#include <cuComplex.h>
#include "cuda.h"
#include "cufft.h"
#include "cutil.h"
#include "cutil_inline_runtime.h"
#define NX 256
#define BATCH 10
cufftHandle plan;
cufftComplex *data;
cudaSafeCall(cudaMalloc((void**)&data,sizeof(cufftComplex)*(NX/2+1)*BATCH));
cufftPlan1d(&plan,Nx,CUFFT_R2C,BATCH);
cufftExecR2C(plan,(cufftReal*)data,data);
cufftDestroy(plan);
cudaFree(data);
I use the following for compiling:
nvcc -arch=sm_11 -g -c -O3 1Dreal_to_Complex.cu -I…
I forgot to add
main()
{
.
.
.
.
}
Now it compiles!
jam11
2
But it does not run well:
This is the complete code
include <stdio.h>
#include <cuComplex.h>
#include "cuda.h"
#include "cufft.h"
#include "cutil.h"
#include "cutil_inline_runtime.h"
#define NX 4
#define BATCH 1
main()
{
cufftComplex *data;
int i;
for (i=0;i < NX; i++){
data[i].x=2.45*i;
data[i].y=5.67*i;
};
cufftHandle plan;
cutilSafeCall( cudaMalloc((void**)&data,sizeof(cufftComplex)*(NX/2+1)*BATCH));
cufftPlan1d(&plan,NX,CUFFT_R2C,BATCH);
cufftExecR2C(plan,(cufftReal*)data,data);
cufftDestroy(plan);
cudaFree(data);
}
For NX > 3 it cannot allocate memory. BATCH can be any value if NX < 4. so it is not due to the size but to the (void**)&data part.
Is the data properly initialized?
This give the same results with “cufftComplex” or “typedef float2 Complex” *data
Thank you for your comments.
jam11
3
By digging through the examples in the toolkit, I finally got this program to work properly.
#include <stdio.h>
#include <cuComplex.h>
#include "cuda.h"
#include "cufft.h"
#include "cutil.h"
#include "cutil_inline_runtime.h"
#define NX 256
#define BATCH 10
main()
{
cufftComplex *h_data= (cufftComplex*)malloc(sizeof(cufftComplex) * (NX/2+1)*BATCH);
cufftComplex *d_data;
cufftHandle plan;
int i;
cudaMalloc((void**)&d_data,sizeof(cufftComplex)*(NX/2+1)*BATCH);
srand(2009);
for (i=0;i < NX; i++){
h_data[i].x= float(rand())/float(RAND_MAX);
h_data[i].y=0.0;
}
cutilSafeCall(cudaMemcpy(d_data, h_data, sizeof(cufftComplex)*(NX/2+1)*BATCH, cudaMemcpyHostToDevice ));
cufftPlan1d(&plan,NX,CUFFT_R2C,BATCH);
cufftExecR2C(plan,(cufftReal*)d_data,d_data);
cutilSafeCall(cudaMemcpy(h_data, d_data, sizeof(cufftComplex)*(NX/2+1)*BATCH, cudaMemcpyDeviceToHost ));
// for (i=0;i < NX; i++)
// printf(" %d %g %g \n",i,h_data[i].x/NX,h_data[i].y/NX);
cufftDestroy(plan);
cudaFree(h_data);
cudaFree(d_data);
}
Should this type of simple example be included in the cuFFT user guide?