I’ve taken the sample code and got rid of most of the non-essential parts. Which leaves me with:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <cutil.h>
#include <cufft.h>
#include <cutil_inline.h>
// Complex data type
typedef float2 Complex;
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest(int argc, char** argv);
#define SIGNAL_SIZE 100
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
runTest(argc, argv);
cutilExit(argc, argv);
}
void runTest(int argc, char** argv)
{
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
cutilDeviceInit(argc, argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );
// Allocate host memory for the signal
Complex* h_signal = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; i++) {
h_signal[i].x = 1;
h_signal[i].y = 0;
}
// Print signal
for (unsigned int i = 0; i < SIGNAL_SIZE; i++) {
printf("%f\n", h_signal[i].x);
}
printf("end of signal\n");
// Allocate device memory for signal
Complex* d_signal;
cutilSafeCall(cudaMalloc((void**)&d_signal, SIGNAL_SIZE));
// Copy host memory to device
cutilSafeCall(cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE, cudaMemcpyHostToDevice));
// CUFFT plan
cufftHandle plan;
cufftSafeCall(cufftPlan1d(&plan, SIGNAL_SIZE, CUFFT_C2C, 1));
// Transform signal
cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));
// Copy device memory to host
Complex* h_convolved_signal = h_signal;
cutilSafeCall(cudaMemcpy(h_convolved_signal, d_signal, SIGNAL_SIZE, cudaMemcpyDeviceToHost));
// Print result
for (unsigned int i = 0; i < SIGNAL_SIZE; i++) {
printf("%f %f\n", h_convolved_signal[i].x, h_convolved_signal[i].y);
}
printf("end of result\n");
//Destroy CUFFT context
cufftSafeCall(cufftDestroy(plan));
// cleanup memory
free(h_signal);
cutilSafeCall(cudaFree(d_signal));
cudaThreadExit();
}
Basically I create data to be transformed, print the data, let CUFFT do the transformation and print the results. The output however does not show a correct fourier transformation - but I can’t find my mistake.