I modified the scan the sdk “scanLargeArray” but did not get any correct result.
Even after commenting out the code I cannot get any true value.
Could someone tell me what is wrong:
This is what I did
[codebox]
device float finalScan(float* s_data, int n)
{
int offset = 1;
int thid = threadIdx.x;
s_data[thid] = 1;
__syncthreads();
for (int d = n>>1; d > 0; d >>= 1 )
{
__syncthreads();
if (thid < d)
{
int i = __mul24(__mul24(2,offset), thid);
int ai = i + offset - 1;
int bi = ai + offset;
ai += CONFLICT_FREE_OFFSET(ai);
bi += CONFLICT_FREE_OFFSET(bi);
s_data[bi] += s_data[ai];
}
offset *= 2;
}
for (int d = 1; d <= n; d *= 2)
{
offset >>= 1;
__syncthreads();
if (thid < d)
{
int i = __mul24(__mul24(2,offset), thid);
int ai = i + offset - 1;
int bi = ai + offset;
ai += CONFLICT_FREE_OFFSET(ai);
bi += CONFLICT_FREE_OFFSET(bi);
float t = s_data[ai];
s_data[ai] = s_data[bi];
s_data[bi] += t;
}
}
__syncthreads();
return s_data[n-1];
}
[/codebox]