custom Thrust iterator to combine consecutive elements of array

Hi, I am trying to come up with an iterator which traverses an array in strided manner and which treats stride consecutive elements of the array as one big element. The stride is determined at runtime.

Let me give you a simple example.

indices = [2,0,1]
array = [0,1,2,3,4,5,6,7,8,9,10,11]
resultarray = [0,0,0,0,0,0,0,0,0,0,0]
stride = 4

thrust::gather(indices.begin(),
indices.end(),
CustomIterator(array, stride), //create “virtual” array [[0,1,2,3], [4,5,6,7], [8,9,10,11]]
CustomIterator(resultarray, stride));

After this, resultarray should be [8,9,10,11,0,1,2,3,4,5,6,7]

Is there a way to combine Thrust iterators to achieve this? So far, I only found the example code for strided iterators which performs strided access, but uses only 1 element instead of stride elements. https://github.com/thrust/thrust/blob/master/examples/strided_range.cu

This strided chunked iterator can return chunks of elements (i.e. a stride plus adjacent access):

https://stackoverflow.com/questions/42230488/how-make-a-stride-chunk-iterator-thrust-cuda/42235487#42235487

I don’t think it does what you want however, because it does not reorder according to an index array. It seems to me what you want can be done with just a permutation iterator with a transform iterator for the map?

$ cat t362.cu
#include <thrust/device_vector.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/copy.h>
#include <iostream>

struct my_idx : public thrust::unary_function<int,int>
{
  int *order;
  int chunk_size;
  my_idx(int *d_order, int cs) : order(d_order), chunk_size(cs) {};
  __host__ __device__
  int operator()(int idx){
    return order[idx/chunk_size]*chunk_size + idx%chunk_size;}
};



int main(){

  int my_cs = 4;
  int idcs[] = {2,0,1};

  int is = sizeof(idcs)/sizeof(idcs[0]);
  thrust::device_vector<int> d_idcs(idcs, idcs+is);
  thrust::device_vector<int> d_data(is*my_cs);
  thrust::sequence(d_data.begin(), d_data.end());
  thrust::device_vector<int> d_res(d_data.size());
  thrust::copy_n(thrust::make_permutation_iterator(d_data.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), my_idx(thrust::raw_pointer_cast(d_idcs.data()), my_cs))), d_data.size(), d_res.begin());
  thrust::copy(d_res.begin(), d_res.end(), std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl;
}
$ nvcc -o t362 t362.cu
$ ./t362
8,9,10,11,0,1,2,3,4,5,6,7,
$