Using cucomplex.h

Hi, I’m here again. I’m working with FFT, and I need to make a simple code, but it’s not working. I need to transform with cufft a sin(x) and turn back, but between the transforms, I need to multiply by 2 the result so that, when I turn back the result with the inverse transfomr, I’ll recive 2sin(x) for example. With the fftw.h, I just multiply by 2 my d_signal[i] and when I turn back, I have 2sin(x), but I used to work with the complex.h. Any idea? Thanks.

# define SIGNAL_SIZE 64
# define PI acos(-1.0)
# define x 2*PI/SIGNAL_SIZE
void runTest(int argc, char **argv)
    printf("[simpleCUFFT] is starting...\n");

    findCudaDevice(argc, (const char **)argv);

    // Allocate host memory for the signal
        cufftComplex *h_signal = (Complex *)malloc(sizeof(Complex) * SIGNAL_SIZE);

        cufftComplex *h_reversed_signal = (Complex *)malloc(sizeof(Complex) * SIGNAL_SIZE);

    // Initalize the memory for the signal
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i)
        h_signal[i].x = sin(i*x);
        h_signal[i].y = 0;
    cufftComplex *d_signal;
    checkCudaErrors(cudaMalloc((void **)&d_signal, SIGNAL_SIZE*sizeof(cufftComplex)));
    // Copy host memory to device
        checkCudaErrors(cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(cufftComplex),
    cufftHandle plan;
    checkCudaErrors(cufftPlan1d(&plan, SIGNAL_SIZE, CUFFT_C2C, 1));

    // Transform signal and kernel
    printf("Transforming signal cufftExecC2C\n");
    checkCudaErrors(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));
    getLastCudaError("Kernel execution failed [ ComplexPointwiseMulAndScale ]");
    for (unsigned int i = 0; i < SIGNAL_SIZE; ++i)
        d_signal[i].x = 2*d_signal[i].x;
        d_signal[i].y = 2*d_signal[i].y;
    // Transform signal back
    printf("Transforming signal back cufftExecC2C\n");
    checkCudaErrors(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE));

   // Copy device memory to host
    checkCudaErrors(cudaMemcpy(h_reversed_signal, d_signal, SIGNAL_SIZE*sizeof(cufftComplex),
    // check result
        for (unsigned int i = 0; i < SIGNAL_SIZE; ++i)
                h_reversed_signal[i].x = h_reversed_signal[i].x / (float)SIGNAL_SIZE;
                h_reversed_signal[i].y = h_reversed_signal[i].y/(float)SIGNAL_SIZE;
                printf("first : %f %f  after %f %f \n", h_signal[i].x, h_signal[i].y, h_reversed_signal[i].x, h_reversed_signal[i].y);
    bool bTestResult = sdkCompareL2fe((float *)h_reversed_signal, (float *)h_signal, 2 * SIGNAL_SIZE, 1e-5f);
    //Destroy CUFFT context
    // cleanup memory

// Pad data
int PadData(const Complex *signal, Complex **padded_signal, int signal_size,
            const Complex *filter_kernel, Complex **padded_filter_kernel, int filter_kernel_size)
    int minRadius = filter_kernel_size / 2;
    int maxRadius = filter_kernel_size - minRadius;
    int new_size = signal_size + maxRadius;

    // Pad signal
    Complex *new_data = (Complex *)malloc(sizeof(Complex) * new_size);
    memcpy(new_data +           0, signal,              signal_size * sizeof(Complex));
    memset(new_data + signal_size,      0, (new_size - signal_size) * sizeof(Complex));
    *padded_signal = new_data;

    // Pad filter
    new_data = (Complex *)malloc(sizeof(Complex) * new_size);
    memcpy(new_data +                    0, filter_kernel + minRadius,                       maxRadius * sizeof(Complex));
    memset(new_data +            maxRadius,                         0, (new_size - filter_kernel_size) * sizeof(Complex));
    memcpy(new_data + new_size - minRadius,             filter_kernel,                       minRadius * sizeof(Complex));
    *padded_filter_kernel = new_data;

    return new_size;

This code is illegal:

for (unsigned int i = 0; i < SIGNAL_SIZE; ++i)
        d_signal[i].x = 2*d_signal[i].x;
        d_signal[i].y = 2*d_signal[i].y;

It is not legal to attempt to directly modify data using a device pointer (d_signal) in host code. This type of usage should be generating a seg fault.

One possible approach would be to replace this code with a call to a CUDA kernel that does the same thing.

Thanks txbob, I understood now and my code is working. Thanks