Error: Not-inlined non-intrinsic function calls not supported yet : randInt Why can't it inline

I’ve run into a nasty problem with OpenCL - it refuses to compile this code, claiming that randInt() in not inlined. Adding the inline keyword to the function doesn’t change this. In addition, renaming the function doesn’t change this either - in fact, if I rename all the randInt to randInt_1 except for the one in atomicExchangePoints(), it still gives the exact same error,

but it says randInt, which shouldn’t even be defined. Changing the randInt in atomicExchangePoints() to randInt_1 makes the error message display the correct randInt_1, but still doesn’t solve the problem.

Anyone have any ideas what’s going on here? I don’t have any recursion, so it appears to be a compiler bug?

#pragma OPENCL_EXTENSION cl_khr_global_int32_base_atomics : enable

global float pointPool[4*NUM_POINTS_PER_NODE*5];

const sampler_t paletteSampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

constant int shift1[4] = {6, 2, 13, 3};

constant int shift2[4] = {13, 27, 21, 12};

constant int shift3[4] = {18, 2, 7, 13};

constant unsigned int offset[4] = {4294967294, 4294967288, 4294967280, 4294967168};

local unsigned int randStates[32];

local int pointOffset[BLOCK_SIZE];

local int oldPointOffset[BLOCK_SIZE];

local float pointStage[BLOCK_SIZE*4];

unsigned int TausStep(unsigned int z, int s1, int s2, int s3, unsigned int M)

{

	unsigned int b = (((z << s1) ^ z) >> s2);

	return (((z & M) << s3) ^b);

}

unsigned int randInt()

{

	int index = get_local_id(0)&31;

	randStates[index] = TausStep(randStates[index], shift1[index&3], shift2[index&3], shift3[index&3], offset[index&3]);

	return (randStates[index]^randStates[(index+1)&31]^randStates[(index+2)&31]^randStates[(index+3)&31]);

}

float randFloat()

{

	unsigned int y = randInt();

	return as_float((y&0x007FFFFF)|0x3F800000)-1.0f;

}

float randFloatWarp()

{

	unsigned int y = randInt();

	return as_float((randStates[(get_local_id(0)&31)]&&0x007FFFFF)|0x3F800000)-1.0f;

}

float4 loadPoint(int nodeIndex)

{

	int index = get_local_id(0);

	oldPointOffset[index] = pointOffset[index];

	pointOffset[index] = nodeIndex*NUM_POINTS_PER_NODE+randInt()%NUM_POINTS_PER_NODE;

	for (int n = (index&~3); n < ((index&~3)+4);n++)

	{

		pointStage[n*4+(index&3)]=pointPool[pointOffset[n]+(index&3)];

	}

	return (float4)(pointStage[index*4],pointStage[index*4+1],pointStage[index*4+2],pointStage[index*4+3]);

}

float4 atomicExchangePoints(float4 point, int nodeIndex)

{

	int index = get_local_id(0);

	pointOffset[index] = nodeIndex*NUM_POINTS_PER_NODE*randInt()%NUM_POINTS_PER_NODE;

	pointStage[index*4] = point.x;

	pointStage[index*4+1] = point.y;

	pointStage[index*4+2] = point.z;

	pointStage[index*4+3] = point.w;

	for (int n = (index&~3); n < ((index&~3)+4); n++)

	{

		pointStage[n*4+(index&3)] = as_float(atom_xchg((global int*)pointPool[pointOffset[n]+(index&3)],as_int(pointStage[n*4+(index&3)])));

	}

	return (float4)(pointStage[index*4],pointStage[index*4+1],pointStage[index*4+2],pointStage[index*4+3]);

}

void drawPoint(global float4* renderBuffer, image2d_t palette, int2 dim, float4 point)

{

	int2 coords = rasterizePoint(point, dim);

	if ((coords.x >= 0.0f)&&(coords.y >= 0.0f)&&(coords.x < dim.x)&&(coords.y < dim.y))

	{

		float4 color = read_imagef(palette, paletteSampler, (float2)(point.w,0.0f));

		int index = get_local_id(0);

		renderBuffer[coords.y*dim.x+coords.x]+=color;

	}

}

float2 linear(float2 pos)

{

	float xout,yout;

	float x = pos.x;

	float y = pos.y;

	

				xout = x;

				yout = y;

			

	return (float2) (xout,yout);

}

float2 sinusoidal(float2 pos)

{

	float xout,yout;

	float x = pos.x;

	float y = pos.y;

	

				xout = native_sin(x);

				yout = native_sin(y);

			

	return (float2) (xout,yout);

}

float2 custom(float2 pos, float custom1, float custom2)

{

	float xout,yout;

	float x = pos.x;

	float y = pos.y;

	

				if (custom1 > x)

					xout = y*custom2-x;

				else 

					xout = x;

				yout = y;

			

	return (float2) (xout,yout);

}

float4 xform3(float4 point)

{

	float2 outpos = (float2)(0,0);

	float2 pos = point.xx*(float2)(-1,-8.742278e-08)+point.yy*(float2)(-4.371139e-08,1)+(float2)(0.9999999,-0.9999999);

	outpos+=0.8*linear(pos);

	outpos+=0.2*sinusoidal(pos);

	

	float col = mix(point.w,(float)1,(float)0.5);

	return (float4)(outpos.x,outpos.y,0.0f,col);

}

float4 xform1(float4 point)

{

	float2 outpos = (float2)(0,0);

	float2 pos = point.xx*(float2)(0.9999998,4)+point.yy*(float2)(2,5)+(float2)(3,6);

	outpos+=1*custom(pos,0.26,-0.9);

	outpos+=0.5*custom(pos,-0.5,0.1);

	

	float col = mix(point.w,(float)0,(float)0.25);

	return (float4)(outpos.x,outpos.y,0.0f,col);

}

float4 xform2(float4 point)

{

	float2 outpos = (float2)(0,0);

	float2 pos = point.xx*(float2)(0.9999988,-22)+point.yy*(float2)(2,0.33)+(float2)(7.000003,-65);

	outpos+=0.4*sinusoidal(pos);

	

	float col = mix(point.w,(float)0.3,(float)0);

	return (float4)(outpos.x,outpos.y,0.0f,col);

}

kernel void renderBatch(global float4 *renderBuffer, image2d_t palette, float2 dimension)

{

	float rnd;

	float4 oldPoint;

	int iterations = 0;

	int discard = 0;

	float4 point = loadPoint(0);

node1:

	if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))

	{

		atomicExchangePoints(point,0);

	}

	else

		oldPoint = loadPoint(0);

	discard = 0;

	point = xform2(oldPoint);

	if (1!=0.0f)

		drawPoint(renderBuffer,palette,dimension,point);

	iterations++;

	if (iterations >= MAX_ITERATIONS)

		return;

	rnd = randWarpFloat();

	if (rnd <= 0.07518797)

		goto node2;

	else

		goto node3;

node2:

	if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))

	{

		atomicExchangePoints(point,1);

	}

	else

		oldPoint = loadPoint(1);

	discard = 0;

	point = xform1(oldPoint);

	if (1!=0.0f)

		drawPoint(renderBuffer,palette,dimension,point);

	iterations++;

	if (iterations >= MAX_ITERATIONS)

		return;

	rnd = randWarpFloat();

	if (rnd <= 0.3333333)

		goto node2;

	else

		goto node1;

node3:

	if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))

	{

		atomicExchangePoints(point,2);

	}

	else

		oldPoint = loadPoint(2);

	discard = 0;

	point = xform3(oldPoint);

	if (1!=0.0f)

		drawPoint(renderBuffer,palette,dimension,point);

	iterations++;

	if (iterations >= MAX_ITERATIONS)

		return;

	rnd = randWarpFloat();

	if (rnd <= 0.4966443)

		if (rnd <= 0.2013423)

			goto node2;

		else

			if (rnd <= 0.5973154)

				goto node1;

			else

				goto node4;

	else

		goto node3;

node4:

	if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))

	{

		atomicExchangePoints(point,3);

	}

	else

		oldPoint = loadPoint(3);

	discard = 0;

	point = xform2(oldPoint);

	if (0!=0.0f)

		drawPoint(renderBuffer,palette,dimension,point);

	iterations++;

	if (iterations >= MAX_ITERATIONS)

		return;

	rnd = randWarpFloat();

	goto node5;

node5:

	if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))

	{

		atomicExchangePoints(point,4);

	}

	else

		oldPoint = loadPoint(4);

	discard = 1;

	point = xform3(oldPoint);

	if (1!=0.0f)

		drawPoint(renderBuffer,palette,dimension,point);

	iterations++;

	if (iterations >= MAX_ITERATIONS)

		return;

	rnd = randWarpFloat();

	if (rnd <= 0.25)

		goto node2;

	else

		goto node1;

}

I believe this is, because the function has no parameters. I had a similar problem and adding some non-used parameter like test, to foo(int test) ‘fixed’ it.

Changing the functions without parameters to int SomeFuction(void) fixed the first problem, and now I have a whole brand new problem with an even more abstruse error message:

Error: Code selection failed to select: 0AAB9A98: v2i32 = bit_convert 0B0C75A0

Looks like an internal compiler bug??

I did some further testing, and it’s pretty clear that casting to int2 from float2 is broken.