SDK scan large array

I modified the scan the sdk “scanLargeArray” but did not get any correct result.

Even after commenting out the code I cannot get any true value.

Could someone tell me what is wrong:

This is what I did

[codebox]

device float finalScan(float* s_data, int n)

{

int offset = 1;

int thid = threadIdx.x;



s_data[thid] = 1;

__syncthreads();

for (int d = n>>1; d > 0; d >>= 1 )

{

	__syncthreads();

	if (thid < d)      

    {

		int i  = __mul24(__mul24(2,offset), thid);

        int ai = i + offset - 1;

        int bi = ai + offset;

ai += CONFLICT_FREE_OFFSET(ai);

        bi += CONFLICT_FREE_OFFSET(bi);

s_data[bi] += s_data[ai];

    }

    offset *= 2;

}



 for (int d = 1; d <= n; d *= 2)

{

    offset >>= 1;

    __syncthreads();

if (thid < d)

    {

		int i  = __mul24(__mul24(2,offset), thid);

        int ai = i + offset - 1;

        int bi = ai + offset;

ai += CONFLICT_FREE_OFFSET(ai);

        bi += CONFLICT_FREE_OFFSET(bi);

        float t  =  s_data[ai];

        s_data[ai] =  s_data[bi];

        s_data[bi] += t;

    }

}

__syncthreads();

return s_data[n-1];

}

[/codebox]

I modified the scan the sdk “scanLargeArray” but did not get any correct result.

Even after commenting out the code I cannot get any true value.

Could someone tell me what is wrong:

This is what I did

[codebox]

device float finalScan(float* s_data, int n)

{

int offset = 1;

int thid = threadIdx.x;



s_data[thid] = 1;

__syncthreads();

for (int d = n>>1; d > 0; d >>= 1 )

{

	__syncthreads();

	if (thid < d)      

    {

		int i  = __mul24(__mul24(2,offset), thid);

        int ai = i + offset - 1;

        int bi = ai + offset;

ai += CONFLICT_FREE_OFFSET(ai);

        bi += CONFLICT_FREE_OFFSET(bi);

s_data[bi] += s_data[ai];

    }

    offset *= 2;

}



 for (int d = 1; d <= n; d *= 2)

{

    offset >>= 1;

    __syncthreads();

if (thid < d)

    {

		int i  = __mul24(__mul24(2,offset), thid);

        int ai = i + offset - 1;

        int bi = ai + offset;

ai += CONFLICT_FREE_OFFSET(ai);

        bi += CONFLICT_FREE_OFFSET(bi);

        float t  =  s_data[ai];

        s_data[ai] =  s_data[bi];

        s_data[bi] += t;

    }

}

__syncthreads();

return s_data[n-1];

}

[/codebox]