Hi!
I’m facing a problem with the following kernel:
__global__ void GALfilterKern(kernArgs *kArgs, Complex *deviceSig_filt, Complex *d_Y)
{
int i,n,m;
// ===== INPUT PARAMETERS ===
const float delta = 1e-2f; // small positive constant for "desired response"
const float beta = 0.8f;
const float mhu = 0.08f;
// ===== INITIALIZATION =====
float absE_f = 0.0f;
float absE_b = 0.0f;
// ===== APPLICATION TO INPUT SIGNAL FOR EACH SAMPLE===
for(n=0; n<kArgs->sig_length; ++n){
// data in
Complex u = {kArgs->sig[n].real,kArgs->sig[n].img};
Complex d = {kArgs->sig_d[n].real,kArgs->sig_d[n].img};
// forward and backward error initialization
kArgs->E_f[0].real = u.real;
kArgs->E_f[0].img = u.img;
kArgs->E_b[1][0].real = u.real;
kArgs->E_b[1][0].img = u.img;
// desired response at time n and stage "-1"
kArgs->y[0] = c_mul(c_con(kArgs->h[0]),kArgs->E_b[1][0]);
kArgs->err[0] = c_sub(d,kArgs->y[0]);
absE_b = c_abs(kArgs->E_b[1][0]);
kArgs->norm_b[0] = delta + (absE_b*absE_b);
Complex mn = {mhu/kArgs->norm_b[0],0.0f};
Complex partMul = c_mul(kArgs->E_b[1][0],c_con(kArgs->err[0]));
kArgs->h[0] = c_add(kArgs->h[0],c_mul(mn,partMul));
for(m=1; m<M_DEFAULT+1; ++m){
absE_f = c_abs(kArgs->E_f[m-1]);
absE_b = c_abs(kArgs->E_b[0][m-1]);
kArgs->Energy[m-1] = beta * kArgs->Energy[m-1] + (1-beta) * ((absE_f*absE_f) + (absE_b*absE_b));
kArgs->E_f[m] = c_add(kArgs->E_f[m-1],c_mul(c_con(kArgs->k[m-1]),kArgs->E_b[0][m-1]));
kArgs->E_b[1][m] = c_add(kArgs->E_b[0][m-1],c_mul(kArgs->k[m-1],kArgs->E_f[m-1]));
Complex mE = {mhu/kArgs->Energy[m-1],0.0f};
Complex firstMul = c_mul(c_con(kArgs->E_f[m-1]),kArgs->E_b[1][m]);
Complex secondMul = c_mul(kArgs->E_b[0][m-1],c_con(kArgs->E_f[m]));
kArgs->k[m-1] = c_sub(kArgs->k[m-1],c_mul(c_add(firstMul,secondMul),mE));
// desired response
kArgs->y[m] = c_add(kArgs->y[m-1],c_mul(c_con(kArgs->h[m]),kArgs->E_b[1][m]));
kArgs->err[m] = c_sub(d,kArgs->y[m]);
absE_b = c_abs(kArgs->E_b[1][m]);
kArgs->norm_b[m] = kArgs->norm_b[m-1] + (absE_b*absE_b);
Complex mn_b = {mhu/kArgs->norm_b[m],0.0f};
kArgs->h[m] = c_add(kArgs->h[m],c_mul(mn_b,c_mul(kArgs->E_b[1][m],c_con(kArgs->err[m]))));
}
for(i=0; i<M_DEFAULT+1; ++i)
kArgs->E_b[0][i]=kArgs->E_b[1][i];
d_Y[n] = kArgs->y[m-1];
deviceSig_filt[n] = kArgs->err[m-1];
}
}
For testing purposes, I want to launch kernel with one thread only; so in main() I wrote:
GALfilterKern<<<1,1>>>(dArgs,deviceSig_filt,d_Y); // deviceSig_filt contains results
The problem is that results are wrong (and different at every launch), while in emulation mode are correct. It’s strange because I’ve only a thread that execute the kernel. What should I do?