__global__ void sortBlocks(int *a)
{
int i = 2;
__shared__ int temp[THREADS];
//int temp[THREADS];
while (i <= THREADS)
{
if ((threadIdx.x % i) == 0)
{
int index1 = threadIdx.x + (blockIdx.x * blockDim.x);
int endIndex1 = index1 + i / 2;
int index2 = endIndex1;
int endIndex2 = index2 + i / 2;
int targetIndex = threadIdx.x;
int done = 0;
while (!done)
{
if ((index1 == endIndex1) && (index2 < endIndex2))
temp[targetIndex++] = a[index2++];
else if ((index2 == endIndex2) && (index1 < endIndex1))
temp[targetIndex++] = a[index1++];
else if (a[index1] < a[index2])
temp[targetIndex++] = a[index1++];
else
temp[targetIndex++] = a[index2++];
if ((index1 == endIndex1) && (index2 == endIndex2))
done = 1;
}
}
__syncthreads();
a[threadIdx.x + (blockIdx.x * blockDim.x)] = temp[threadIdx.x];
__syncthreads();
i *= 2;
}
}
Here is the code to mergesort. In this function, as in the code above, “int shared temp [THREADS]” like this this fine works.
But without using shared memory “int temp [THREADS]” This makes the value output strange.
Why is that? How do I allocate memory if I don’t use shared memory?
The THREADS value is 512.