# Can someone explain why is this recursive?

recursiveGaussian.cpp

``````extern "C"
void gaussianFilterRGBA(uint *d_src, uint *d_dest, uint *d_temp, int width, int height, float sigma, int order, int nthreads)
{
// compute filter coefficients
const float
nsigma = sigma < 0.1f ? 0.1f : sigma,
alpha = 1.695f / nsigma,
ema = (float)std::exp(-alpha),
ema2 = (float)std::exp(-2*alpha),
b1 = -2*ema,
b2 = ema2;

float a0 = 0, a1 = 0, a2 = 0, a3 = 0, coefp = 0, coefn = 0;

switch (order)
{
case 0:
{
const float k = (1-ema)*(1-ema)/(1+2*alpha*ema-ema2);
a0 = k;
a1 = k*(alpha-1)*ema;
a2 = k*(alpha+1)*ema;
a3 = -k*ema2;
}
break;

case 1:
{
const float k = (1-ema)*(1-ema)/ema;
a0 = k*ema;
a1 = a3 = 0;
a2 = -a0;
}
break;

case 2:
{
const float
ea = (float)std::exp(-alpha),
k = -(ema2-1)/(2*alpha*ema),
kn = (-2*(-1+3*ea-3*ea*ea+ea*ea*ea)/(3*ea+1+3*ea*ea+ea*ea*ea));
a0 = kn;
a1 = -kn*(1+k*alpha)*ema;
a2 = kn*(1-k*alpha)*ema;
a3 = -kn*ema2;
}
break;

default:
fprintf(stderr, "gaussianFilter: invalid order parameter!\n");
return;
}

coefp = (a0+a1)/(1+b1+b2);
coefn = (a2+a3)/(1+b1+b2);

// process columns
#if USE_SIMPLE_FILTER
#else
d_recursiveGaussian_rgba<<< iDivUp(width, nthreads), nthreads >>>(d_src, d_temp, width, height, a0, a1, a2, a3, b1, b2, coefp, coefn);
#endif
getLastCudaError("Kernel execution failed");

transpose(d_temp, d_dest, width, height);
getLastCudaError("transpose: Kernel execution failed");

// process rows
#if USE_SIMPLE_FILTER
#else
d_recursiveGaussian_rgba<<< iDivUp(height, nthreads), nthreads >>>(d_dest, d_temp, height, width, a0, a1, a2, a3, b1, b2, coefp, coefn);
#endif
getLastCudaError("Kernel execution failed");

transpose(d_temp, d_dest, height, width);
}
``````

recursiveGaussian_kernel.cuh

``````__global__ void
d_recursiveGaussian_rgba(uint *id, uint *od, int w, int h, float a0, float a1, float a2, float a3, float b1, float b2, float coefp, float coefn)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

if (x >= w) return;

id += x;    // advance pointers to correct column
od += x;

// forward pass
float4 xp = make_float4(0.0f);  // previous input
float4 yp = make_float4(0.0f);  // previous output
float4 yb = make_float4(0.0f);  // previous output by 2
#if CLAMP_TO_EDGE
xp = rgbaIntToFloat(*id);
yb = coefp*xp;
yp = yb;
#endif

for (int y = 0; y < h; y++)
{
float4 xc = rgbaIntToFloat(*id);
float4 yc = a0*xc + a1*xp - b1*yp - b2*yb;
*od = rgbaFloatToInt(yc);
id += w;
od += w;    // move to next row
xp = xc;
yb = yp;
yp = yc;
}

// reset pointers to point to last element in column
id -= w;
od -= w;

// reverse pass
// ensures response is symmetrical
float4 xn = make_float4(0.0f);
float4 xa = make_float4(0.0f);
float4 yn = make_float4(0.0f);
float4 ya = make_float4(0.0f);
#if CLAMP_TO_EDGE
xn = xa = rgbaIntToFloat(*id);
yn = coefn*xn;
ya = yn;
#endif

for (int y = h-1; y >= 0; y--)
{
float4 xc = rgbaIntToFloat(*id);
float4 yc = a2*xn + a3*xa - b1*yn - b2*ya;
xa = xn;
xn = xc;
ya = yn;
yn = yc;
*od = rgbaFloatToInt(rgbaIntToFloat(*od) + yc);
id -= w;
od -= w;  // move to previous row
}
}
``````

this function is in recursiveGaussian cuda sample, i have a problem with my ray tracing recursion, so i looked up for it. but i donâ€™t see any recursive features in the function. is there something iâ€™ve missed?

The meaning here is not that the filter implements recursion (a function calling itself). Recursive Gaussian filters are a specific category of gaussian filters that perform 2D filtering implemented using two successive 1D filters, one of which filters horizontally, the other vertically. Consequently, instead of using a 2D kernel these filters will typically have (perhaps 2) 1D kernels. The â€śkernelâ€ť here does not mean CUDA kernel, but means the filter coefficients used. To get a better explanation, look up and study the concept of recursive gaussian filters.

1 Like