i am trying to run cufft 1D implementation i want to take in a vector of real or complex valued entries and transform nothing more what i have right now compiles with the correct arch types (borrowed Makefile from samples). I want to print out the results of the transformed signal but have no idea get sef faults AHHHHHHHH i just want to verify the result using another prgram like maple or matlab… code:
// includes, system
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// includes, project
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>
#include "common/inc/helper_cuda.h"
#include "common/inc/helper_functions.h"
#define SIGNAL_SIZE 16
// Complex data type
typedef float2 Complex;
//--------------------------------------------------------------------------------
// Pad data function
int PadData(const Complex *signal, Complex **padded_signal, int signal_size) {
// int minRadius = filter_kernel_size / 2;
// int maxRadius = filter_kernel_size - minRadius;
int new_size = signal_size; //+ maxRadius;
// Pad signal
Complex *new_data =
reinterpret_cast<Complex *>(malloc(sizeof(Complex) * new_size));
memcpy(new_data + 0, signal, signal_size * sizeof(Complex));
memset(new_data + signal_size, 0, (new_size - signal_size) * sizeof(Complex));
*padded_signal = new_data;
// Pad filter
// new_data = reinterpret_cast<Complex *>(malloc(sizeof(Complex) * new_size));
// memcpy(new_data + 0, filter_kernel + minRadius, maxRadius * sizeof(Complex));
// memset(new_data + maxRadius, 0,
// (new_size - filter_kernel_size) * sizeof(Complex));
// memcpy(new_data + new_size - minRadius, filter_kernel,
// minRadius * sizeof(Complex));
// *padded_filter_kernel = new_data;
return new_size;
}
int main() {
// allocate memory for the host
Complex *h_signal =
reinterpret_cast<Complex *>(malloc(sizeof(Complex) * SIGNAL_SIZE));
// Initialize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i) {
h_signal[i].x = i / static_cast<float>(RAND_MAX);
h_signal[i].y = 0;
}
// Pad signal and filter kernel
Complex *h_padded_signal;
int new_size =
PadData(h_signal, &h_padded_signal, SIGNAL_SIZE);
int mem_size = sizeof(Complex) * new_size;
// Allocate device memory for signal
Complex *d_signal;
checkCudaErrors(cudaMalloc(reinterpret_cast<void **>(&d_signal), mem_size));
// Copy host memory to device
checkCudaErrors(
cudaMemcpy(d_signal, h_padded_signal, mem_size, cudaMemcpyHostToDevice));
// CUFFT plan simple API
cufftHandle plan;
checkCudaErrors(cufftPlan1d(&plan, new_size, CUFFT_C2C, 1));
// Transform signal
printf("Transforming signal cufftExecC2C\n");
checkCudaErrors(cufftExecC2C(plan, reinterpret_cast<cufftComplex *>(d_signal),
reinterpret_cast<cufftComplex *>(d_signal),
CUFFT_FORWARD));
// Copy device memory to host
Complex *h_convolved_signal = h_padded_signal;
checkCudaErrors(cudaMemcpy(h_convolved_signal, d_signal, mem_size,
cudaMemcpyDeviceToHost));
// Destroy CUFFT context
checkCudaErrors(cufftDestroy(plan));
// cleanup memory
free(h_signal);
free(h_padded_signal);
checkCudaErrors(cudaFree(d_signal));
}