Hi Moderators and Developers:
I want to ask a favor about the computing scheme I’m working on. I have a series of codes that sort the certain arrays according to the keys:
file name: csort.cu
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
extern "C" {
void sort_int_wrapper(int *data, int *value, int N)
{
thrust::device_ptr<int> dev_ptr_data(data);
thrust::device_ptr<int> dev_ptr_value(value);
thrust::sort_by_key(dev_ptr_data, dev_ptr_data + N, dev_ptr_value);
}
}
file name: thrust_module.f90
module thrust
interface thrustsort
subroutine sort_int(input, value, N) bind(C,name='sort_int_wrapper')
use iso_c_binding
integer(c_int),device :: input(*), value(*)
integer(c_int),value :: N
end subroutine
end interface
end module
and the main file name: test_sort.f90 (main)
program testsort
use thrust
integer, allocatable :: cpuData(:), cpuData1(:), cpuData2(:), cpuData3(:)
integer, allocatable, device :: gpuData(:), gpuData1(:), gpuData2(:), gpuData3(:)
integer, allocatable, device :: tmpData(:)
integer :: N = 100
allocate(cpuData(N)); allocate(cpuData1(N)); allocate(cpuData2(N))
allocate(cpuData3(N))
allocate(gpuData(N)); allocate(gpuData1(N)); allocate(gpuData2(N))
allocate(gpuData3(N))
allocate(tmpData(N))
do i = 1, N
cpuData(i) = int(random(i)*10 + random(i)*100)
end do
do i = 1, N
cpuData1(i) = int(random(i)*7 + random(i)*2)
cpuData2(i) = int(random(i)*8 + random(i)*3)
cpuData3(i) = int(random(i)*9 + random(i)*4)
end do
gpuData = cpuData; tmpData = gpuData
gpuData1 = cpuData1; gpuData2 = cpuData2; gpuData = cpuData3
call thrustsort(gpuData, gpuData1, N)
gpuData = tmpData
call thrustsort(gpuData, gpuData2, N)
gpuData = tmpData
call thrustsort(gpuData, gpuData3, N)
end program
As you can see, the function sort_by_key in Thrust can only sort one array at one time. But I have three arrays, therefore I create the temperory array to store the initial key and do the sort operation three times in the Fortran main code. It costs much time on the repeated sort process and data transferring. Is there any possible approach that avoiding these redundant treatment? Thank you guys for offering the suggestions.