How to implement in CUDA

Hi, folks! I have two functions for wavelet transform. Could somebody help me to imlement them in CUDA? Thanks!

These are the functions:

``````static void

filter_fh(int *p, int w, int h, int rowsize, int scale)

{

//parameters:

// p - pointer to a 2D array of ints

// w, h - width and height of array (in elements)

//rowsize - generally is equal to w (width of array in ints)

//scale - scale =)

int y = 0;

int s = scale;

int s3 = s+s+s;

rowsize *= scale;

while (y<h)

{

int *q = p+s;

int *e = p+w;

int a0=0, a1=0, a2=0, a3=0;

int b0=0, b1=0, b2=0, b3=0;

if (q < e)

{

a1 = a2 = a3 = q[-s];

if (q+s<e)

a2 = q[s];

if (q+s3<e)

a3 = q[s3];

b3 = q[0] - ((a1+a2+1)>>1);

q[0] = b3;

q += s+s;

}

while (q+s3 < e)

{

// Generic case

a0=a1;

a1=a2;

a2=a3;

a3=q[s3];

b0=b1;

b1=b2;

b2=b3;

b3 = q[0] - ((((a1+a2)<<3)+(a1+a2)-a0-a3+8) >> 4);

q[0] = b3;

q[-s3] = q[-s3] + ((((b1+b2)<<3)+(b1+b2)-b0-b3+16) >> 5);

q += s+s;

}

while (q < e)

{

a1=a2;

a2=a3;

b0=b1;

b1=b2;

b2=b3;

b3 = q[0] - ((a1+a2+1)>>1);

q[0] = b3;

q[-s3] = q[-s3] + ((((b1+b2)<<3)+(b1+b2)-b0-b3+16) >> 5);

q += s+s;

}

while (q-s3 < e)

{

b0=b1;

b1=b2;

b2=b3;

b3=0;

if (q-s3 >= p)

q[-s3] = q[-s3] + ((((b1+b2)<<3)+(b1+b2)-b0-b3+16) >> 5);

q += s+s;

}

y += scale;

p += rowsize;

}
``````
``````static void

filter_fv(int *p, int w, int h, int rowsize, int scale)

{

int y = 0;

int s = scale*rowsize;

int s3 = s+s+s;

h = ((h-1)/scale)+1;

y += 1;

p += s;

while (y-3 < h)

{

// 1-Delta

{

int *q = p;

int *e = q+w;

if (y>=3 && y+3<h)

{

// Generic case

while (q<e)

{

int a = (int)q[-s] + (int)q[s];

int b = (int)q[-s3] + (int)q[s3];

*q -= (((a<<3)+a-b+8)>>4);

q += scale;

}

}

else if (y<h)

{

// Special cases

int *q1 = (y+1<h ? q+s : q-s);

while (q<e)

{

int a = (int)q[-s] + (int)(*q1);

*q -= ((a+1)>>1);

q += scale;

q1 += scale;

}

}

}

// 2-Update

{

int *q = p-s3;

int *e = q+w;

if (y>=6 && y<h)

{

// Generic case

while (q<e)

{

int a = (int)q[-s] + (int)q[s];

int b = (int)q[-s3] + (int)q[s3];

*q += (((a<<3)+a-b+16)>>5);

q += scale;

}

}

else if (y>=3)

{

// Special cases

int *q1 = (y-2<h ? q+s : 0);

int *q3 = (y<h ? q+s3 : 0);

if (y>=6)

{

while (q<e)

{

int a = (int)q[-s] + (q1 ? (int)(*q1) : 0);

int b = (int)q[-s3] + (q3 ? (int)(*q3) : 0);

*q += (((a<<3)+a-b+16)>>5);

q += scale;

if (q1) q1 += scale;

if (q3) q3 += scale;

}

}

else if (y>=4)

{

while (q<e)

{

int a = (int)q[-s] + (q1 ? (int)(*q1) : 0);

int b = (q3 ? (int)(*q3) : 0);

*q += (((a<<3)+a-b+16)>>5);

q += scale;

if (q1) q1 += scale;

if (q3) q3 += scale;

}

}

else

{

while (q<e)

{

int a = (q1 ? (int)(*q1) : 0);

int b = (q3 ? (int)(*q3) : 0);

*q += (((a<<3)+a-b+16)>>5);

q += scale;

if (q1) q1 += scale;

if (q3) q3 += scale;

}

}

}

}

y += 2;

p += s+s;

}

}
``````