reduction and interleave data

whats the best way to handle interleave data reduction, eg 50k sample and summing every other sample. i try the https://docs.nvidia.com/cuda/samples/6_Advanced/reduction/doc/reduction.pdf but has some issue with mapping result at the end.

Here’s a worked example of how it might be done in thrust:

$ cat t991.cu
#include <thrust/reduce.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/device_vector.h>
#include <iostream>

#define DSIZE 100000
#define SEL_MASK 1
#define SEL_VAL  1

typedef float mytype;


typedef thrust::tuple<mytype, int> ttype;

struct select_functor : public thrust::unary_function<ttype, mytype>
{
  __host__ __device__
  mytype operator()(ttype &t) {
    return ((thrust::get<1>(t)&SEL_MASK) == SEL_VAL)?thrust::get<0>(t):0;
  }
};


int main(){

  thrust::device_vector<mytype> data(DSIZE, 1.25f);
  mytype sum = thrust::reduce(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(data.begin(), thrust::make_counting_iterator<int>(0))), select_functor()), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(data.end(), thrust::make_counting_iterator<int>(DSIZE))), select_functor()));
  std::cout << sum << std::endl;
}
$ nvcc -o t991 t991.cu
$ ./t991
62500
$

If you’re not familiar with thrust, the quick start guide is a good place to start:

https://github.com/thrust/thrust/wiki/Quick-Start-Guide