Bilinear interpolation problem

Hello all
I have problem,I finish my program with c(in the cpu) because it’s slow
so I want perform program in CUDA but i have no idea
the image has been saved f
Someone suggest something?
my code :
for (j=0; jHeight ; j++)
{
for (i=0 ; iWidth3 ; i+=3)
{
if(GU[j][i/3]>=0 && GU[j][i/3]Width-1 && GV[j][i/3]>=0 && GV[j][i/3]Height-1)
{
ncol1 = f[i + 2 + 3 * w * j];
ncol2 = f[i + 5 + 3 * w * j];
ncol3 = f[i + 1202 + 3 * w * j];
ncol4 = f[i + 1205 + 3 * w * j];
colR = (byte)((1.0-GFU[j][i/3])
(1.0-GFV[j][i/3])(double)ncol1+(GFU[j][i/3])(1.0-GFV[j][i/3])(double)ncol2
+(1.0-GFU[j][i/3])
(GFV[j][i/3])(double)ncol3+(GFU[j][i/3])(GFV[j][i/3])(double)ncol4);
ncol1 = f[i + 1 + 3 * w * j];
ncol2 = f[i + 4 + 3 * w * j];
ncol3 = f[i + 1201 + 3 * w * j];
ncol4 = f[i + 1204 + 3 * w * j];
colG = (byte)((1.0-GFU[j][i/3])
(1.0-GFV[j][i/3])(double)ncol1+(GFU[j][i/3])(1.0-GFV[j][i/3])(double)ncol2
+(1.0-GFU[j][i/3])
(GFV[j][i/3])(double)ncol3+(GFU[j][i/3])(GFV[j][i/3])(double)ncol4);
ncol1 = f[i+ 3 * w * j];
ncol2 = f[i+3 + 3 * w * j];
ncol3 = f[i+1203 + 3 * w * j];
ncol4 = f[i+1206 + 3 * w * j];
colB = (byte)((1.0-GFU[j][i/3])
(1.0-GFV[j][i/3])(double)ncol1+(GFU[j][i/3])(1.0-GFV[j][i/3])(double)ncol2
+(1.0-GFU[j][i/3])
(GFV[j][i/3])(double)ncol3+(GFU[j][i/3])(GFV[j][i/3])*(double)ncol4);

}
}
}

``````			for (i=0 ; i<360000 ; i+=3)
{
if(GU2[i/3]>=0 && GU2[i/3]<Dmp->Width-1 && GV2[i/3]>=0 && GV2[i/3]<Dmp->Height-1)
{
Label42->Caption=f[3];
Label43->Caption=c[3];
ncol1 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+2];
ncol2 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+5];
ncol3 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1202];
ncol4 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1205];
colR = (byte)((1.0-GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol1+(GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol2
+(1.0-GFU2[i/3])*(GFV2[i/3])*(double)ncol3+(GFU2[i/3])*(GFV2[i/3])*(double)ncol4);
ncol1 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1];
ncol2 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+4];
ncol3 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1201];
ncol4 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1204];
colG = (byte)((1.0-GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol1+(GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol2
+(1.0-GFU2[i/3])*(GFV2[i/3])*(double)ncol3+(GFU2[i/3])*(GFV2[i/3])*(double)ncol4);
ncol1 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200];
ncol2 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+3];
ncol3 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1200];
ncol4 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1203];
colB = (byte)((1.0-GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol1+(GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol2
+(1.0-GFU2[i/3])*(GFV2[i/3])*(double)ncol3+(GFU2[i/3])*(GFV2[i/3])*(double)ncol4);

}``````

Linear/bilinear/trilinear interpolations can be effectively performed in CUDA by using texture memory.

Do you have any information i can learn?
ex: website or books ,lesson?