Wiener deconvolution

Hi everyone.

I’ve been trying to develop a wiener deconvolution algorithm using CUDA. I’m following the form of it’s equation where to get the G(f) you divide the conjugate of the H(f) (the PSF after FFT) by the magnitude of H(f) plus a noise constant.
My PSF is zero padded to the size of the image, the FFT is performed as a 2D C2C cufft transform (I converse the real numbers to complex numbers first).
My functions look as follow:

global void conjugate(cufftComplex* signal_in) {

int id = blockDim.x*blockIdx.x + threadIdx.x;
int idx = threadIdx.x;
float temp_imag[256];
float temp_real[256];

temp_imag[idx] = (-1)*(signal_in[id].y);
temp_real[idx] = signal_in[id].x;



__syncthreads();
signal_in[id].y = temp_imag[idx];
signal_in[id].x = temp_real[idx];

};

global void mul(cufftComplex* signal_in, cufftComplex* signal_out, int* width, int* length) {

int id = blockDim.x*blockIdx.x + threadIdx.x;
int idx = threadIdx.x;

cuComplex temp_real2[256];

float scale = 1.0f / (float)(*width*(*length));
temp_real2[idx] = cuCmulf(signal_in[id],signal_out[id]);


	__syncthreads();
signal_in[id] = make_cuFloatComplex(scale*cuCrealf(temp_real2[idx]), scale*cuCimagf(temp_real2[idx]));

return;

}

global void addnoise(cufftComplex* signal_in, cufftComplex* signal_out) {

int id = threadIdx.x;
int idx = threadIdx.x + blockDim.x*blockIdx.x;
float temp_real[256];
temp_real[id] = signal_in[idx].x;
__syncthreads();

signal_out[idx].x = (temp_real[id] + NOISECONTS) ;

}

global void divide(cufftComplex* signal_in, cufftComplex* signal_in_noisy, int* width, int* length) {

int id = blockDim.x*blockIdx.x + threadIdx.x;
int idx = threadIdx.x;
cuComplex temp[256];

temp[idx] = cuCdivf(signal_in[id], signal_in_noisy[id]);

__syncthreads();

signal_in[id] = temp[idx];

};

global void magnitude(cufftComplex* signal_in, double* magnitude) {

int id = blockDim.x*blockIdx.x + threadIdx.x;
int idx = threadIdx.x;

double temp_magRE[256];
double temp_magIM[256];

temp_magRE[idx] = signal_in[id].x;
temp_magIM[idx] = signal_in[id].y;

__syncthreads();

magnitude[id] = temp_magRE[idx] *temp_magRE[idx] + temp_magIM[idx] * temp_magIM[idx];

}

The resulting image is just pure noise. Random-looking pixels that resemble nothing really. Not sure what am I doing wrong?
Any input would be highly appreciated, thanks.

Suggestions:

  1. If you want help from others, I think you’re more likely to get it if you provide a complete test case for your code.
  2. Instrument your code with proper cuda error checking (google that) and run your code with cuda-memcheck