This kernel works perfectly fine until I uncomment one of the last 2 lines. Then, the kernel compiles without warnings and runs but returns “CL_INVALID_COMMAND_QUEUE”.
I’m using toolkit_4.0.13 on Ubuntu.
__kernel void vector_add(__global float* a, __global float* b, __global float* c,
int count, int local_buf_size, int loop
)
{
float4 * f4 = (float4*) (& c[4]);
const int gsize = get_local_size(0); // group size
const int lid = get_local_id(0);
for(int k = 0; k < loop; k++) {
for(int i = lid; i < count; i += gsize)
c[i] = a[i] + b[i];
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
}
// (*f4).x=9;
// f4->x = 9;
}