Hi,
I just implement hilbert transform using cufft.
When I just tested with small data(width=16, height=8, total 128 elements), it worked well.
However, it doesn’t work when I used such a big data(width=2400, height=1024).
The error occurred Line #111 attached source code.
I attach the source code, plz help me!
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <iomanip>
// includes, project
#include <cuda_runtime.h>
#include <cufft.h>
#include <helper_functions.h>
#include <helper_cuda.h>
#include <windows.h>
#include "device_launch_parameters.h"
#define WIDTH 2400
#define HEIGHT 1024
#define INPUTSIZE WIDTH*HEIGHT
#define TILE_WIDTH 256
void File_Read(FILE *fp, cufftComplex* input_value);
void Generate_Pointwise_Coeff(int* pointwise_coeff);
__global__ void point_wise_product(cufftComplex *a, int *b, int numElements);
int main(int argc, char **argv)
{
FILE *fp=NULL;
int complexSize = sizeof(cufftComplex) * INPUTSIZE;
int normalSize = sizeof(int) * INPUTSIZE;
int* h_pointwise_coeff = new int[INPUTSIZE];
// Allocate the host memory set
cufftComplex* h_input_value = new cufftComplex[INPUTSIZE];
cufftComplex* MID_result = (cufftComplex *)malloc(complexSize);
cufftComplex* Hilbert_result = (cufftComplex *)malloc(complexSize);
//Read the input signal file(it should be the real time signal)
File_Read(fp, h_input_value);
//Pointwise-product
Generate_Pointwise_Coeff(h_pointwise_coeff);
printf("\n===============================================================\n");
printf("=================== Pointwise coeff Result ===================\n");
printf("================================================================\n");
// for(int i=0; i<INPUTSIZE; i++)
// printf("index %d: %d \n", i+1, h_pointwise_coeff[i]);
// printf("\n");
// Allocate the device memory set
cufftComplex* d_input_value;
int *d_pointwise_coeff;
checkCudaErrors(cudaMalloc((void**)&d_input_value, complexSize));
checkCudaErrors(cudaMalloc((void**)&d_pointwise_coeff, normalSize));
//Copy input value & pointwise coeff host memory to device
checkCudaErrors(cudaMemcpy(d_input_value, h_input_value, complexSize,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(d_pointwise_coeff, h_pointwise_coeff, normalSize,
cudaMemcpyHostToDevice));
//cufft plan
cufftHandle plan;
checkCudaErrors(cufftPlan1d(&plan, INPUTSIZE, CUFFT_C2C, 1));
//FFT the input signal
printf("====================================================\n");
printf("========== FFT signal cufftexecR2C ==========\n");
printf("====================================================\n");
checkCudaErrors(cufftExecC2C(plan, d_input_value, d_input_value, CUFFT_FORWARD));
printf("\n====================================================\n");
printf("=================== FFT Result ===================\n");
printf("====================================================\n");
printf("\n=====================================================\n");
printf("=======Launching ComplexPointwiseAndScale<<< >>>======\n");
printf("======================================================\n");
dim3 dimGrids((WIDTH)/TILE_WIDTH , (HEIGHT)/TILE_WIDTH, 1);
dim3 dimBlocks(TILE_WIDTH, TILE_WIDTH, 1);
point_wise_product<<<((WIDTH)/TILE_WIDTH)*((HEIGHT)/TILE_WIDTH) ,(TILE_WIDTH*TILE_WIDTH)>>>(d_input_value, d_pointwise_coeff,(int)INPUTSIZE);
printf("\n=================================================================\n");
printf("=================== PointWise-Product result ===================\n");
printf("==================================================================\n");
printf("\n====================================================\n");
printf("=========== IFFT signal cufftexecR2C ===========\n");
printf("=====================================================\n");
//Inverse FFT cufftHandle plan
checkCudaErrors(cufftExecC2C(plan, d_input_value, d_input_value, 1));
//Copy Final result memory to host
checkCudaErrors(cudaMemcpy(Hilbert_result, d_input_value, complexSize, cudaMemcpyDeviceToHost));
printf("\n====================================================================\n");
printf("=================== Result of Hilbert Transform ===================\n");
printf("=====================================================================\n");
for(unsigned int i = 0; i<INPUTSIZE; i++)
{
printf("Index %d: Real-> %.2f, imagi-> %.2f \n", i+1, Hilbert_result[i].x/((float)INPUTSIZE), Hilbert_result[i].y/((float)INPUTSIZE));
}
cufftDestroy(plan);
free(h_pointwise_coeff);
free(h_input_value);
free(MID_result);
free(Hilbert_result);
cudaFree(d_input_value);
cudaFree(d_pointwise_coeff);
return 0;
}
void File_Read(FILE *fp, cufftComplex* input_value)
{
fp=NULL;
int i,j;
float temp;
int offset=0;
if((fp=fopen("input_full.dat","r"))==NULL)
{
fprintf(stderr,"Cannot open the file\n");
exit(1);
}
for(j=0;j<WIDTH*HEIGHT;j++){
fscanf(fp, "%f", &temp);
input_value[j].x = temp;
input_value[j].y = 0;
}
printf("============================================\n");
printf("================ Input Data ================\n");
printf("============================================\n");
}
void Generate_Pointwise_Coeff(int* pointwise_coeff)
{
pointwise_coeff[0] = 1;
pointwise_coeff[INPUTSIZE/2] = 1;
for(unsigned int i = 0; i < INPUTSIZE/2 -1; i++)
{
pointwise_coeff[i+1] = 2;
pointwise_coeff[INPUTSIZE/2 + 1 + i] = 0;
}
}
__global__ void point_wise_product(cufftComplex *a, int *b, int numElements){
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i < WIDTH*HEIGHT){
a[i].x = a[i].x * b[i];
a[i].y = a[i].y * b[i];
}
}