Hi, i have some problem with my code.
my int tabel = 3,5,8,9,7,4,2,1
and after sorting my table value are 9 8 3 5 4 7 2 1. but i expect 1,2,3,4,5,7,8,9.
Where i make mistake.
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
__global__ static void bitonicSort(int * values, int NUM)
{
extern __shared__ int shared[];
const unsigned int tid = threadIdx.x;
shared[tid] = values[tid];
__syncthreads();
for (unsigned int k = 2; k <= 5; k *= 2){
for (unsigned int j = k / 2; j>0; j /= 2){
unsigned int ixj = tid ^ j;
if (ixj > tid){
if ((tid & k) == 0)
if (shared[tid] > shared[ixj])
{
int temp=shared[tid];
shared[tid]=shared[ixj];
shared[ixj]=temp;
}
else
if (shared[tid]< shared[ixj]){
int temp=shared[tid];
shared[tid]=shared[ixj];
shared[ixj]=temp;
}
}
__syncthreads();
}
}
// Write result.
values[tid] = shared[tid];
}
int main(void)
{
int NUM = 8;
int values[] = {3,5,8,9,7,4,2,1};
int * dvalues;
cudaMalloc((void**)&dvalues, sizeof(int) * NUM);
cudaMemcpy(dvalues, values, sizeof(int) * NUM,cudaMemcpyHostToDevice);
for(int i=0;i<8;i++){printf("%d ",values[i]);}printf("\n");
bitonicSort<<<1, NUM, sizeof(int) * NUM>>>(dvalues, NUM);
cudaMemcpy(values, dvalues, sizeof(int) * NUM, cudaMemcpyDeviceToHost);
cudaFree(dvalues);
for(int i=0;i<8;i++){printf("%d ",values[i]);}
}