Can someone explain why is this recursive?

recursiveGaussian.cpp

extern "C"
void gaussianFilterRGBA(uint *d_src, uint *d_dest, uint *d_temp, int width, int height, float sigma, int order, int nthreads)
{
    // compute filter coefficients
    const float
    nsigma = sigma < 0.1f ? 0.1f : sigma,
    alpha = 1.695f / nsigma,
    ema = (float)std::exp(-alpha),
    ema2 = (float)std::exp(-2*alpha),
    b1 = -2*ema,
    b2 = ema2;

    float a0 = 0, a1 = 0, a2 = 0, a3 = 0, coefp = 0, coefn = 0;

    switch (order)
    {
        case 0:
            {
                const float k = (1-ema)*(1-ema)/(1+2*alpha*ema-ema2);
                a0 = k;
                a1 = k*(alpha-1)*ema;
                a2 = k*(alpha+1)*ema;
                a3 = -k*ema2;
            }
            break;

        case 1:
            {
                const float k = (1-ema)*(1-ema)/ema;
                a0 = k*ema;
                a1 = a3 = 0;
                a2 = -a0;
            }
            break;

        case 2:
            {
                const float
                ea = (float)std::exp(-alpha),
                k = -(ema2-1)/(2*alpha*ema),
                kn = (-2*(-1+3*ea-3*ea*ea+ea*ea*ea)/(3*ea+1+3*ea*ea+ea*ea*ea));
                a0 = kn;
                a1 = -kn*(1+k*alpha)*ema;
                a2 = kn*(1-k*alpha)*ema;
                a3 = -kn*ema2;
            }
            break;

        default:
            fprintf(stderr, "gaussianFilter: invalid order parameter!\n");
            return;
    }

    coefp = (a0+a1)/(1+b1+b2);
    coefn = (a2+a3)/(1+b1+b2);

    // process columns
#if USE_SIMPLE_FILTER
    d_simpleRecursive_rgba<<< iDivUp(width, nthreads), nthreads >>>(d_src, d_temp, width, height, ema);
#else
    d_recursiveGaussian_rgba<<< iDivUp(width, nthreads), nthreads >>>(d_src, d_temp, width, height, a0, a1, a2, a3, b1, b2, coefp, coefn);
#endif
    getLastCudaError("Kernel execution failed");

    transpose(d_temp, d_dest, width, height);
    getLastCudaError("transpose: Kernel execution failed");

    // process rows
#if USE_SIMPLE_FILTER
    d_simpleRecursive_rgba<<< iDivUp(height, nthreads), nthreads >>>(d_dest, d_temp, height, width, ema);
#else
    d_recursiveGaussian_rgba<<< iDivUp(height, nthreads), nthreads >>>(d_dest, d_temp, height, width, a0, a1, a2, a3, b1, b2, coefp, coefn);
#endif
    getLastCudaError("Kernel execution failed");

    transpose(d_temp, d_dest, height, width);
}

recursiveGaussian_kernel.cuh

__global__ void
d_recursiveGaussian_rgba(uint *id, uint *od, int w, int h, float a0, float a1, float a2, float a3, float b1, float b2, float coefp, float coefn)
{
    unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

    if (x >= w) return;

    id += x;    // advance pointers to correct column
    od += x;

    // forward pass
    float4 xp = make_float4(0.0f);  // previous input
    float4 yp = make_float4(0.0f);  // previous output
    float4 yb = make_float4(0.0f);  // previous output by 2
#if CLAMP_TO_EDGE
    xp = rgbaIntToFloat(*id);
    yb = coefp*xp;
    yp = yb;
#endif

    for (int y = 0; y < h; y++)
    {
        float4 xc = rgbaIntToFloat(*id);
        float4 yc = a0*xc + a1*xp - b1*yp - b2*yb;
        *od = rgbaFloatToInt(yc);
        id += w;
        od += w;    // move to next row
        xp = xc;
        yb = yp;
        yp = yc;
    }

    // reset pointers to point to last element in column
    id -= w;
    od -= w;

    // reverse pass
    // ensures response is symmetrical
    float4 xn = make_float4(0.0f);
    float4 xa = make_float4(0.0f);
    float4 yn = make_float4(0.0f);
    float4 ya = make_float4(0.0f);
#if CLAMP_TO_EDGE
    xn = xa = rgbaIntToFloat(*id);
    yn = coefn*xn;
    ya = yn;
#endif

    for (int y = h-1; y >= 0; y--)
    {
        float4 xc = rgbaIntToFloat(*id);
        float4 yc = a2*xn + a3*xa - b1*yn - b2*ya;
        xa = xn;
        xn = xc;
        ya = yn;
        yn = yc;
        *od = rgbaFloatToInt(rgbaIntToFloat(*od) + yc);
        id -= w;
        od -= w;  // move to previous row
    }
}

this function is in recursiveGaussian cuda sample, i have a problem with my ray tracing recursion, so i looked up for it. but i don’t see any recursive features in the function. is there something i’ve missed?

The meaning here is not that the filter implements recursion (a function calling itself). Recursive Gaussian filters are a specific category of gaussian filters that perform 2D filtering implemented using two successive 1D filters, one of which filters horizontally, the other vertically. Consequently, instead of using a 2D kernel these filters will typically have (perhaps 2) 1D kernels. The “kernel” here does not mean CUDA kernel, but means the filter coefficients used. To get a better explanation, look up and study the concept of recursive gaussian filters.

1 Like