Hi all,
I have a problem about get_global_id() …
I try to record the current running thread ID in the Kernel function and can not get the global_id.
It seems like the kernel does not run in parallel…
see the following code:
__kernel void BuildResponseLayer(__global float* responses,
__global float* laplacian,
__global float* img,
uint h,
uint w,
uint s,
uint stride,
__global int* deviceGid)
{
[b] int gid = get_global_id(0);
deviceGid[1] = gid; [/b]
int ss = step[gid]; // step size for this filter
int b = (filter[gid] - 1) / 2 + 1; // border for this filter
int l = filter[gid] / 3; // lobe for this filter (filter size / 3)
int ww = filter[gid]; // filter size
float inverse_area = 1.f/(ww*ww); // normalisation factor
float Dxx, Dyy, Dxy;
for(int r, c, ar = 0, index = 0; ar < h * co[gid]; ++ar)
{
for(int ac = 0; ac < w * co[gid]; ++ac, index++)
{
r = ar * ss;
c = ac * ss;
// Compute response components
Dxx = BoxIntegral(img, stride, h ,ww, r - l + 1, c - b, 2*l - 1, ww)
- BoxIntegral(img, stride, h, ww, r - l + 1, c - l / 2, 2*l - 1, l)*3;
Dyy = BoxIntegral(img, stride, h, ww, r - b, c - l + 1, ww, 2*l - 1)
- BoxIntegral(img, stride, h, ww, r - l / 2, c - l + 1, l, 2*l - 1)*3;
Dxy = + BoxIntegral(img, stride, h, ww, r - l, c + 1, l, l)
+ BoxIntegral(img, stride, h, ww, r + 1, c - l, l, l)
- BoxIntegral(img, stride, h, ww, r - l, c - l, l, l)
- BoxIntegral(img, stride, h, ww, r + 1, c + 1, l, l);
// Normalise the filter responses with respect to their size
Dxx *= inverse_area;
Dyy *= inverse_area;
Dxy *= inverse_area;
// Get the determinant of hessian response & laplacian sign
responses[gid * h * w + index] = (Dxx * Dyy - 0.81f * Dxy * Dxy);
laplacian[gid * h * w + index] = (Dxx + Dyy >= 0 ? 1 : 0);
}
}
}
After we transfer the deviceGid back to the host and print it. These elements seem strange and similar to some uninitialized data.
Thanks for any help! :rolleyes: