kernel is not executed with no error

i tried two kinds of kernels for projection,one is nearest interpolation.one is cubic interpolation.both of these two kernel are tranformed using image2D.

the kernel is

__kernel 

void median(__global CB* cb,

	__read_only image2d_t SourceRgbaTex,sampler_t RowSampler,__global unsigned int* uiDest,

	unsigned int uiWidth, unsigned int uiHeight)

    {

int gx=get_global_id(0);

    int gy=get_global_id(1);

    // float4 f4Sum = (float4)0.0f;

if(gx < uiWidth && gy<uiHeight)

	{

	/*

	 * transformation

	 */

	float k = 1.0f / (cb->a * gcol + cb->b * grow + 1.0f);

	float x = (cb->c * gcol + cb->d * grow + cb->e) * k; //col

	float y = (cb->f * gcol + cb->g * grow + cb->h) * k; //row

	int pix = (int)x;

	int scn = (int)y;

	/*

	 * pixel sampling from src image

	 */

	unsigned int r, g, b;

	if ( pix < 0 || uiWidth <= pix || scn < 0 || uiHeight <= scn)

	    {

	    // out of source image, use padding instead

	    r = PADDING;

	    g = PADDING;

	    b = PADDING;

	    }

	else

	    {

             /*

	     //  nearest neighborhood

	     int x1=select(ceil(x),floor(x),fabs(ceil(x)-x)-fabs(floor(x)-x)>0);

	     int y1=select(ceil(y),floor(y),fabs(ceil(y)-y)-fabs(floor(y)-y)>0);

	     int2 pos =

	     {x1, y1};

	     uint4 p = read_imageui(SourceRgbaTex, RowSampler, pos);

	     r=p.x;

	     g=p.y;

	     b=p.z;

	     //  a=p.w;

	     */

	    //bicubic interpolation

	    float abyRed[4][4], abyGreen[4][4], abyBlue[4][4];

	    int i,j;

	    for (i = 0; i < 4; i++)

		{

		for (j = 0; j < 4; j++)

		    {

		    int2 pos=

			{scn + i - 2,pix + j - 2};

		    float4 p0 = convert_float4(read_imageui(SourceRgbaTex, RowSampler,pos));

		    abyBlue[i][j] = p0.z;

		    abyGreen[i][j] = p0.y;

		    abyRed[i][j] = p0.x;

		    /*

		     uint4 p0 = read_imageui(SourceRgbaTex, RowSampler,(int2)(scn + i - 2,pix + j - 2));

		     abyBlue[i][j] = 0;

		     abyGreen[i][j] =0;

		     abyRed[i][j] = 0;*/

		    }

		}

	    float afu[4], afv[4];

	    float fv = y - scn;//[0,1)

	    float fu = x - pix;

	    afu[0] = Sinxx(1.0f + fu);

	    afu[1] = Sinxx(fu);

	    afu[2] = Sinxx(1.0f - fu);

	    afu[3] = Sinxx(2.0f - fu);

	    afv[0] = Sinxx(1.0f + fv);

	    afv[1] = Sinxx(fv);

	    afv[2] = Sinxx(1.0f - fv);

	    afv[3] = Sinxx(2.0f - fv);

	    float afRed[4] =

		{

		0.0f, 0.0f, 0.0f, 0.0f

		};

	    float afGreen[4] =

		{

		0.0f, 0.0f, 0.0f, 0.0f

		};

	    float afBlue[4] =

		{

		0.0f, 0.0f, 0.0f, 0.0f

		};

	    for (i = 0; i < 4; i++)

		{

		for (j = 0; j < 4; j++)

		    {

		    afRed[i] += afv[j] * abyRed[j][i];

		    afGreen[i] += afv[j] * abyGreen[j][i];

		    afBlue[i] += afv[j] * abyBlue[j][i];

		    }

		}

	    r =

	    (T)(BOUND((afu[0] * afRed[0] + afu[1] * afRed[1] + afu[2]

				    * afRed[2] + afu[3] * afRed[3]), 0, 255));

	    g =

	    (T)(BOUND((afu[0] * afGreen[0] + afu[1] * afGreen[1]

				    + afu[2] * afGreen[2] + afu[3] * afGreen[3]), 0, 255));

	    b =

	    (T)(BOUND((afu[0] * afBlue[0] + afu[1] * afBlue[1] + afu[2]

				    * afBlue[2] + afu[3] * afBlue[3]), 0, 255));

	    };

	uint4 w =

	    {

	    r,

	    g,

	    b,

	    0

	    };

	//uint4 p = read_imageui(SourceRgbaTex, RowSampler, (int2)(gx,gy));

	uiDest[grow * get_global_size(0)+ get_global_id(0)] = rgbaInt4ToUint(w);

	}

    };

CB is a struct of parameters

there is no error during compile and run time. but oberviously,the kernel is not executed,even the parameter of CB is not transfored.

but the strange things is,if I tryed another kernel using nearest interpolation,which is commented out in the upper code,the kernel can run correctly.

can any one help me to fix this problem?

thanks.