I’ve run into a nasty problem with OpenCL - it refuses to compile this code, claiming that randInt() in not inlined. Adding the inline keyword to the function doesn’t change this. In addition, renaming the function doesn’t change this either - in fact, if I rename all the randInt to randInt_1 except for the one in atomicExchangePoints(), it still gives the exact same error,
but it says randInt, which shouldn’t even be defined. Changing the randInt in atomicExchangePoints() to randInt_1 makes the error message display the correct randInt_1, but still doesn’t solve the problem.
Anyone have any ideas what’s going on here? I don’t have any recursion, so it appears to be a compiler bug?
#pragma OPENCL_EXTENSION cl_khr_global_int32_base_atomics : enable
global float pointPool[4*NUM_POINTS_PER_NODE*5];
const sampler_t paletteSampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
constant int shift1[4] = {6, 2, 13, 3};
constant int shift2[4] = {13, 27, 21, 12};
constant int shift3[4] = {18, 2, 7, 13};
constant unsigned int offset[4] = {4294967294, 4294967288, 4294967280, 4294967168};
local unsigned int randStates[32];
local int pointOffset[BLOCK_SIZE];
local int oldPointOffset[BLOCK_SIZE];
local float pointStage[BLOCK_SIZE*4];
unsigned int TausStep(unsigned int z, int s1, int s2, int s3, unsigned int M)
{
unsigned int b = (((z << s1) ^ z) >> s2);
return (((z & M) << s3) ^b);
}
unsigned int randInt()
{
int index = get_local_id(0)&31;
randStates[index] = TausStep(randStates[index], shift1[index&3], shift2[index&3], shift3[index&3], offset[index&3]);
return (randStates[index]^randStates[(index+1)&31]^randStates[(index+2)&31]^randStates[(index+3)&31]);
}
float randFloat()
{
unsigned int y = randInt();
return as_float((y&0x007FFFFF)|0x3F800000)-1.0f;
}
float randFloatWarp()
{
unsigned int y = randInt();
return as_float((randStates[(get_local_id(0)&31)]&&0x007FFFFF)|0x3F800000)-1.0f;
}
float4 loadPoint(int nodeIndex)
{
int index = get_local_id(0);
oldPointOffset[index] = pointOffset[index];
pointOffset[index] = nodeIndex*NUM_POINTS_PER_NODE+randInt()%NUM_POINTS_PER_NODE;
for (int n = (index&~3); n < ((index&~3)+4);n++)
{
pointStage[n*4+(index&3)]=pointPool[pointOffset[n]+(index&3)];
}
return (float4)(pointStage[index*4],pointStage[index*4+1],pointStage[index*4+2],pointStage[index*4+3]);
}
float4 atomicExchangePoints(float4 point, int nodeIndex)
{
int index = get_local_id(0);
pointOffset[index] = nodeIndex*NUM_POINTS_PER_NODE*randInt()%NUM_POINTS_PER_NODE;
pointStage[index*4] = point.x;
pointStage[index*4+1] = point.y;
pointStage[index*4+2] = point.z;
pointStage[index*4+3] = point.w;
for (int n = (index&~3); n < ((index&~3)+4); n++)
{
pointStage[n*4+(index&3)] = as_float(atom_xchg((global int*)pointPool[pointOffset[n]+(index&3)],as_int(pointStage[n*4+(index&3)])));
}
return (float4)(pointStage[index*4],pointStage[index*4+1],pointStage[index*4+2],pointStage[index*4+3]);
}
void drawPoint(global float4* renderBuffer, image2d_t palette, int2 dim, float4 point)
{
int2 coords = rasterizePoint(point, dim);
if ((coords.x >= 0.0f)&&(coords.y >= 0.0f)&&(coords.x < dim.x)&&(coords.y < dim.y))
{
float4 color = read_imagef(palette, paletteSampler, (float2)(point.w,0.0f));
int index = get_local_id(0);
renderBuffer[coords.y*dim.x+coords.x]+=color;
}
}
float2 linear(float2 pos)
{
float xout,yout;
float x = pos.x;
float y = pos.y;
xout = x;
yout = y;
return (float2) (xout,yout);
}
float2 sinusoidal(float2 pos)
{
float xout,yout;
float x = pos.x;
float y = pos.y;
xout = native_sin(x);
yout = native_sin(y);
return (float2) (xout,yout);
}
float2 custom(float2 pos, float custom1, float custom2)
{
float xout,yout;
float x = pos.x;
float y = pos.y;
if (custom1 > x)
xout = y*custom2-x;
else
xout = x;
yout = y;
return (float2) (xout,yout);
}
float4 xform3(float4 point)
{
float2 outpos = (float2)(0,0);
float2 pos = point.xx*(float2)(-1,-8.742278e-08)+point.yy*(float2)(-4.371139e-08,1)+(float2)(0.9999999,-0.9999999);
outpos+=0.8*linear(pos);
outpos+=0.2*sinusoidal(pos);
float col = mix(point.w,(float)1,(float)0.5);
return (float4)(outpos.x,outpos.y,0.0f,col);
}
float4 xform1(float4 point)
{
float2 outpos = (float2)(0,0);
float2 pos = point.xx*(float2)(0.9999998,4)+point.yy*(float2)(2,5)+(float2)(3,6);
outpos+=1*custom(pos,0.26,-0.9);
outpos+=0.5*custom(pos,-0.5,0.1);
float col = mix(point.w,(float)0,(float)0.25);
return (float4)(outpos.x,outpos.y,0.0f,col);
}
float4 xform2(float4 point)
{
float2 outpos = (float2)(0,0);
float2 pos = point.xx*(float2)(0.9999988,-22)+point.yy*(float2)(2,0.33)+(float2)(7.000003,-65);
outpos+=0.4*sinusoidal(pos);
float col = mix(point.w,(float)0.3,(float)0);
return (float4)(outpos.x,outpos.y,0.0f,col);
}
kernel void renderBatch(global float4 *renderBuffer, image2d_t palette, float2 dimension)
{
float rnd;
float4 oldPoint;
int iterations = 0;
int discard = 0;
float4 point = loadPoint(0);
node1:
if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))
{
atomicExchangePoints(point,0);
}
else
oldPoint = loadPoint(0);
discard = 0;
point = xform2(oldPoint);
if (1!=0.0f)
drawPoint(renderBuffer,palette,dimension,point);
iterations++;
if (iterations >= MAX_ITERATIONS)
return;
rnd = randWarpFloat();
if (rnd <= 0.07518797)
goto node2;
else
goto node3;
node2:
if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))
{
atomicExchangePoints(point,1);
}
else
oldPoint = loadPoint(1);
discard = 0;
point = xform1(oldPoint);
if (1!=0.0f)
drawPoint(renderBuffer,palette,dimension,point);
iterations++;
if (iterations >= MAX_ITERATIONS)
return;
rnd = randWarpFloat();
if (rnd <= 0.3333333)
goto node2;
else
goto node1;
node3:
if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))
{
atomicExchangePoints(point,2);
}
else
oldPoint = loadPoint(2);
discard = 0;
point = xform3(oldPoint);
if (1!=0.0f)
drawPoint(renderBuffer,palette,dimension,point);
iterations++;
if (iterations >= MAX_ITERATIONS)
return;
rnd = randWarpFloat();
if (rnd <= 0.4966443)
if (rnd <= 0.2013423)
goto node2;
else
if (rnd <= 0.5973154)
goto node1;
else
goto node4;
else
goto node3;
node4:
if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))
{
atomicExchangePoints(point,3);
}
else
oldPoint = loadPoint(3);
discard = 0;
point = xform2(oldPoint);
if (0!=0.0f)
drawPoint(renderBuffer,palette,dimension,point);
iterations++;
if (iterations >= MAX_ITERATIONS)
return;
rnd = randWarpFloat();
goto node5;
node5:
if ((discard==0.0f)&&(isfinite(point.x))&&(isfinite(point.y)))
{
atomicExchangePoints(point,4);
}
else
oldPoint = loadPoint(4);
discard = 1;
point = xform3(oldPoint);
if (1!=0.0f)
drawPoint(renderBuffer,palette,dimension,point);
iterations++;
if (iterations >= MAX_ITERATIONS)
return;
rnd = randWarpFloat();
if (rnd <= 0.25)
goto node2;
else
goto node1;
}