Hi all,
today while compiling my project on my laptop i encountered a very weird error.
This is one the kernel where the problem occured:
kernel void get_used_voxels(read_only image3d_t dataGrid, //0 - Data volume
read_only image2d_t edgesTable, //1 - Edge table
read_only image2d_t numVertexTable, //2 - Vertices per voxel table
const float threshold, //3 - Threshold
global uint *vMask, //4 - Vertex mask
global uint *eMask, //5 - Edge mask
global uint *numVertex, //6 - Number of vertices per voxel cube
global uint *occupied, //7 - 1 if the voxel is used, 0 otherwise
uint4 vSize, //8 - Global size of the voxel volume
local float *cubeVertex
){
const uint idx = get_global_id(0);
const uint threadId = get_local_id(0);
const uint wgs = get_local_size(0);
//rIndex contains few bitwise operation for morton order index
const uint4 coord = rIndex(idx, vSize);
if(coord.x >= vSize.x || coord.y >= vSize.y || coord.z >= vSize.z)
return;
// float cubeVertex[8];
//The 8 vertex of the cube
cubeVertex[0*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x, coord.y + 1, coord.z , 0)).x;
cubeVertex[1*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x + 1, coord.y + 1, coord.z , 0)).x;
cubeVertex[2*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x + 1, coord.y, coord.z , 0)).x;
cubeVertex[3*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x , coord.y , coord.z , 0)).x;
cubeVertex[4*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x, coord.y + 1, coord.z + 1, 0)).x;
cubeVertex[5*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x + 1, coord.y + 1, coord.z + 1, 0)).x;
cubeVertex[6*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x + 1, coord.y, coord.z + 1, 0)).x;
cubeVertex[7*wgs + threadId] = read_imagef(dataGrid, smp, (int4)(coord.x, coord.y, coord.z + 1, 0)).x;
uint mask = 0;
mask |= ((cubeVertex[0*wgs + threadId] < threshold) << 0);
mask |= ((cubeVertex[1*wgs + threadId] < threshold) << 1);
mask |= ((cubeVertex[2*wgs + threadId] < threshold) << 2);
mask |= ((cubeVertex[3*wgs + threadId] < threshold) << 3);
mask |= ((cubeVertex[4*wgs + threadId] < threshold) << 4);
mask |= ((cubeVertex[5*wgs + threadId] < threshold) << 5);
mask |= ((cubeVertex[6*wgs + threadId] < threshold) << 6);
mask |= ((cubeVertex[7*wgs + threadId] < threshold) << 7);
vMask[idx] = mask;
//Both edgesTable and NumVertexTable are 2d texture created with opengl and shared in opencl
eMask[idx] = read_imageui(edgesTable, smp, (int2)(mask, 0)).x;
numVertex[idx] = read_imageui(numVertexTable, smp, (int2)(mask, 0)).x;
occupied[idx] = (mask > 0 && mask < 0xFF);
}
When i compile this with clcc and clBuildProgram they both crash with access violation, but if I swap the last lines in this way:
vMask[idx] = mask;
eMask[idx] = read_imageui(edgesTable, smp, (int2)(mask, 0)).x;
numVertex[idx] = read_imageui(numVertexTable, smp, (int2)(mask, 0)).x;
occupied[idx] = (mask > 0 && mask < 0xFF);
the code compiles…
Guess this is a big bug in the compiler, isn’t it?
The platform where the crash occured is a GeForce M9600GT, while on my primary workstasion with a GeForce 470GTX the project compile (and run) fine.
Any ideas?