I’m trying to achieve a bitonic sort algorithm in my application. But it seems that using _syncthreads() will get a run-time error. So how to do the thread sync? Or, can I just use CUDA with my OptiX context to sort an OptiX Buffer?
Here is my code:
RT_PROGRAM void bitonic_sort() {
output_buffer[launch_index] = intput_buffer[launch_index];
__syncthreads();
int sum = intput_buffer.size();
for (unsigned int i = 2; i <= sum; i <<= 1) {
for (unsigned int j = i >> 1; j>0; j >>= 1) {
unsigned int tid_comp = launch_index ^ j;
if (tid_comp < sum) {
if (tid_comp > launch_index) {
if ((launch_index & i) == 0) { //ascending
if (output_buffer[launch_index]>output_buffer[tid_comp]) {
swap(output_buffer[launch_index], output_buffer[tid_comp]);
}
}
else { //desending
if (output_buffer[launch_index]<output_buffer[tid_comp]) {
swap(output_buffer[launch_index], output_buffer[tid_comp]);
}
}
}
}
__syncthreads();
}
}
}
Can someone give me a hand?