I have the following bit of code that I am using trying to replicate the SDK example code, and all of the methods called in here are out of the convolution2DFFT source code:
int dcW;
int halfl;
const int kSize = 3;
const int kernelY = 1;
const int kernelX = 1;
char tmp[128];
int id = 0;
FILE *file;
//hp_kern is kSize x kSize
unsigned char * charData;
float
*h_ResultGPU,
*float_h_data;
float
*d_Data,
*d_PaddedData,
*d_Kernel,
*d_PaddedKernel;
cufftHandle
fftPlanFwd,
fftPlanInv;
fComplex
*d_DataSpectrum,
*d_KernelSpectrum;
const int fftH = snapTransformSize(chipH + kSize - 1);//snaps to power of 2 or 512 multiple
const int fftW = snapTransformSize(chipW + kSize - 1);
setHeight(chipH);
setWidth(chipW);
printf("fftH: %i, fftW: %i \n", fftH, fftW);
float_h_data = (float *)malloc(chipW*chipH*sizeof(float));
/* I used this code to verify that float_h_data is in fact correct! Displaying this csv as an image in matlab matches
* the output by using imread() on the chip size in matlab
*/
sprintf(tmp, "orig_float_data.csv");
file = fopen(tmp, "wb");
for(size_t i = 0 ; i < chipH ; i++)
{
for(size_t j = 0 ; j < chipW ; j++)
{
float_h_data[i*chipW + j] = (float)h_data[i*chipW+j];
sprintf(tmp, "%f,", float_h_data[i*chipW + j]);
fwrite(tmp, strlen(tmp), 1, file);
}
sprintf(tmp,"\n");
fwrite(tmp, strlen(tmp), 1, file);
}
fclose(file);
printf("...allocating memory\n");
h_ResultGPU = (float *)malloc(fftH * fftW * sizeof(float));
cutilSafeCall( cudaMalloc((void **)&d_Data, chipH * chipW * sizeof(float)) );
cutilSafeCall( cudaMalloc((void **)&d_Kernel, kSize * kSize * sizeof(float)) );
cutilSafeCall( cudaMalloc((void **)&d_PaddedData, fftH * fftW * sizeof(float)) );
cutilSafeCall( cudaMalloc((void **)&d_PaddedKernel, fftH * fftW * sizeof(float)) );
cutilSafeCall( cudaMalloc((void **)&d_DataSpectrum, fftH * (fftW / 2 + 1) * sizeof(fComplex)) );
cutilSafeCall( cudaMalloc((void **)&d_KernelSpectrum, fftH * (fftW / 2 + 1) * sizeof(fComplex)) );
//PLANS
printf("...creating R2C & C2R FFT plans for %i x %i\n", fftH, fftW);
cufftSafeCall( cufftPlan2d(&fftPlanFwd, fftH, fftW, CUFFT_R2C) );
cufftSafeCall( cufftPlan2d(&fftPlanInv, fftH, fftW, CUFFT_C2R) );
//UPLOAD DATA TO DEVICE
printf("...uploading to GPU and padding convolution kernel and input data\n");
cutilSafeCall( cudaMemcpy(d_Kernel, hp_kernel, kSize * kSize * sizeof(float), cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMemcpy(d_Data, float_h_data, chipH * chipW * sizeof(float), cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMemset(d_PaddedKernel, 0, fftH * fftW * sizeof(float)) );
cutilSafeCall( cudaMemset(d_PaddedData, 0, fftH * fftW * sizeof(float)) );
//adjust the padKernel and border conditions
padKernel(d_PaddedKernel, d_Kernel, fftH, fftW, kSize, kSize, kernelY, kernelX);
//pad Kernel is working as described in the cufft2D paper.
padDataClampToBorder( d_PaddedData, d_Data, fftH, fftW, chipH, chipW, kSize, kSize, kernelY, kernelX);
charData = (unsigned char *)malloc(chipH *chipW * sizeof(unsigned char));
printf("...transforming convolution kernel\n");
cufftSafeCall( cufftExecR2C(fftPlanFwd, (cufftReal *)d_PaddedKernel, (cufftComplex *)d_KernelSpectrum) );
printf("...running GPU FFT convolution: ");
cufftSafeCall( cufftExecR2C(fftPlanFwd, (cufftReal *)d_PaddedData, (cufftComplex *)d_DataSpectrum) );
modulateAndNormalize(d_DataSpectrum, d_KernelSpectrum, fftH, fftW, 1);
cufftSafeCall( cufftExecC2R(fftPlanInv, (cufftComplex *)d_DataSpectrum, (cufftReal *)d_PaddedData) ); //puts DataSpectrum back to float
printf("...reading back GPU convolution results\n");
cutilSafeCall( cudaMemcpy(h_ResultGPU, d_PaddedData, fftH * fftW * sizeof(float), cudaMemcpyDeviceToHost) );
for(size_t i = 0 ; i < chipH ; i++)
{
for(size_t j = 0 ; j < chipW ; j++)
{
charData[i*chipH + j] = (unsigned char)h_ResultGPU[i*fftW+j];
}
}
return charData;
So I am basically passing in h_data which is an unsigned char array, and converting it to float, and then performing the FFT’s on the padded kernel and data matrices. As I mentioned in the code comments, I verified that in fact I am passing the right data there, because I can read that .csv file into matlab and display the image before processing properly.
However, when I try ti display charData after the return, it is coming back looking like noise/snow whenever I run this on my input jpg data (which has been reduced to grayscale).
Also, my kernel is a 3x3 do-nothing kernel, so the output should be exactly like the input.
Can anyone see anything wrong in my code?