cuda error in image processing

I wrote a image processing function in cuda,but the result is always a black image. I think maybe it is because of the near_color function. My code is as follow:
device float near_color(float3 color1, float3 color2){
//BGR to YUV
float Y1 = 0.299color1.z + 0.587color1.y + 0.114color1.x;
float U1 = -0.147
color1.z - 0.289color1.y + 0.436color1.x;
float V1 = 0.615color1.z - 0.515color1.y - 0.100*color1.x;

double Y2 = 0.299*color2.z + 0.587*color2.y + 0.114*color2.x;
double U2 = -0.147*color2.z - 0.289*color2.y + 0.436*color2.x;
double V2 = 0.615*color2.z - 0.515*color2.y - 0.100*color2.x;

float distance = (U1 - U2)*(U1 - U2) + (V1 - V2)*(V1 - V2) + (Y1 - Y2)*(Y1 - Y2);
return distance;

}
global void near_color_edge_detect(float3* dataIn, float4 *dataOut, int width, int height)
{

int xIndex = threadIdx.x + blockIdx.x * blockDim.x;
int yIndex = threadIdx.y + blockIdx.y * blockDim.y;
float3 colorC, colorL, colorR, colorT, colorB;
double nearcolor_value = 0.00;
float4 out;

if (xIndex + 1 < width && yIndex + 1 < height &&xIndex - 1 >= 0 && yIndex - 1 >= 0)
{
	colorC = dataIn[yIndex *width + xIndex ];
	colorL = dataIn[yIndex *width + xIndex - 1];
	colorR = dataIn[yIndex *width + xIndex + 1];
	colorT = dataIn[(yIndex - 1) *width + xIndex];
	colorB = dataIn[(yIndex + 1) *width + xIndex];

	out.x = 1.0;
	out.y = 1.0;
	out.z = 1.0;
	out.w = 1.0;

	if ( (near_color(colorL, colorR) >= nearcolor_value) ) {
		if (near_color(colorL, colorC) < near_color(colorC, colorR)) {
			out.x = 1.0;				
			out.y = 0.0;
			out.z = 0.0;
			out.w = 1.0;
		}
		else{
			out.x = -1.0;
			out.y = 0.0;
			out.z = 0.0;
			out.w = 1.0;
		}
	}
	
	if (near_color(colorT, colorB) >= nearcolor_value) {
		if (near_color(colorT, colorC)<near_color(colorC, colorB)) {
			out.x = 0.0;
			out.y = 1.0;
			out.z = 0.0;
			out.w = 1.0;
		}
		else{
			out.x = 0.0;
			out.y = -1.0;
			out.z = 0.0;
			out.w = 1.0;
		}
	}
	dataOut[yIndex *width + xIndex] = out;
}

}
If you know the reason, please tell me, Thank you very much!