Dear all;
I has downloaded the code pf bitonic sort and ran it
but when I use array of non power of two, for example 10 elements, the array is not sorted correctly
can you please help me in sorting the array?
here is the code
__global__ static void bitonicSort(int * values)
{
__shared__ int shared[SHARED_SIZE_LIMIT];
const int tid = threadIdx.x;
// Copy input to shared mem.
shared[tid] = values[tid + blockIdx.x * SHARED_SIZE_LIMIT];
// shared[(tid)+(SHARED_SIZE_LIMIT)+blockIdx.x *SHARED_SIZE_LIMIT] = values[tid + (SHARED_SIZE_LIMIT)];
__syncthreads();
// Parallel bitonic sort.
for (int k = 2; k <= SHARED_SIZE_LIMIT; k *= 2)
{
// Bitonic merge:
for (int j = k / 2; j>0; j /= 2)
{
int ixj = tid ^ j;
if (ixj > tid)
{
if ((tid & k) == 0)
{
if (shared[tid] > shared[ixj])
{
swap(shared[tid], shared[ixj]);
}
}
else
{
if (shared[tid] < shared[ixj])
{
swap(shared[tid], shared[ixj]);
}
}
}
__syncthreads();
}
}
// Write result.
values[tid + blockIdx.x * SHARED_SIZE_LIMIT] = shared[tid];
}
its downloaded from
http://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/Data-Parallel_Algorithms.html