whats the best way to handle interleave data reduction, eg 50k sample and summing every other sample. i try the https://docs.nvidia.com/cuda/samples/6_Advanced/reduction/doc/reduction.pdf but has some issue with mapping result at the end.
Here’s a worked example of how it might be done in thrust:
$ cat t991.cu
#include <thrust/reduce.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/device_vector.h>
#include <iostream>
#define DSIZE 100000
#define SEL_MASK 1
#define SEL_VAL 1
typedef float mytype;
typedef thrust::tuple<mytype, int> ttype;
struct select_functor : public thrust::unary_function<ttype, mytype>
{
__host__ __device__
mytype operator()(ttype &t) {
return ((thrust::get<1>(t)&SEL_MASK) == SEL_VAL)?thrust::get<0>(t):0;
}
};
int main(){
thrust::device_vector<mytype> data(DSIZE, 1.25f);
mytype sum = thrust::reduce(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(data.begin(), thrust::make_counting_iterator<int>(0))), select_functor()), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(data.end(), thrust::make_counting_iterator<int>(DSIZE))), select_functor()));
std::cout << sum << std::endl;
}
$ nvcc -o t991 t991.cu
$ ./t991
62500
$
If you’re not familiar with thrust, the quick start guide is a good place to start: