 # Thrust question

Hi,
Is there an easy way to do a segmented conditional copy in thrust?
Suppose I have the following:

``````int partitions[] = {0, 0, 0, 1, 2, 2, 2};
int values[] = {1, 2, 3, 4, 5, 6, 7};
``````

I would like the output to be:

``````output = {2, 3, <Empty>, <Empty>, 6, 7, <Empty>
``````

So in that example, I copy the next value to the current position but with partitions in mind.
Something like inclusive_scan_by_key

I’m not seeing the logic in how you got output, but this link might provide some insight.

https://stackoverflow.com/questions/39240098/cuda-thrust-copy-transformed-result-only-if-it-satisfies-a-predicate

I’m trying to move the input one position left per partition…

Maybe a simpler example would be better:

``````int partitions[] = {0, 0, 0, 0, 1, 1, 1, 1};
int values[] = {1, 2, 3, 4, 5, 6, 7, 8};

output = {2, 3, 4, <NULL>, 6, 7, 8, <NULL>}
``````

I don’t wont want to guarantee this is optimal (or exactly what you want) but does the following work?

``````#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <vector>

struct Copy
{
template< class T >
__host__ __device__
T operator()( const T &a, const T &b ) const
{
return ( b );
}
};

int main () {

std::vector<int> h_part  = { 0, 0, 0, 0, 1, 1, 1, 1 };
std::vector<int> h_data  = { 1, 2, 3, 4, 5, 6, 7, 8 };

// Copy data to GPU
thrust::device_vector<int> d_part ( h_part );
thrust::device_vector<int> d_data ( h_data );
thrust::device_vector<int> d_rle (2); // Assuming number of partition are known at compile time
thrust::device_vector<int> d_len (2);

// Compute RLE
size_t num_runs = thrust::reduce_by_key( d_part.begin(),
d_part.end(),                      // input key sequence
thrust::constant_iterator<int>(1), // input value sequence
d_rle.begin(),                     // output key sequence
d_len.begin()                      // output value sequence
).first - d_rle.begin();            // compute the output size

// Create stencil
thrust::device_vector<int> d_stencil ( d_data.size(), 1 );

for ( int i = 0; i < num_runs; i++)
d_stencil[i * d_len[i]] = 0;

// Output array
thrust::device_vector<int> d_output ( d_data.size(), -1 );
thrust::identity<int> identity;

// Perform transformation
thrust::transform_if( d_part.begin(),
d_part.end(),
d_data.begin(),
d_stencil.begin(),
d_output.data(),
Copy(),
identity );

thrust::host_vector<int> h_output (d_output);

for (auto & i : h_output)
std::printf("%d\n", i);

// {-1, 2, 3, 4, -1, 6, 7, 8}
}
``````

Thanks for the help, the stencil is a nice idea but the results are not what I wanted.
I need to copy the values in an offset (of 1…x)
So in your output the values are:

``````// {-1, 2, 3, 4, -1, 6, 7, 8}
``````

whereas the output I need is this:

``````// {2, 3, 4, -1, 6, 7, 8, -1}
``````

I guess this is a general thrust question.
How I can iterate/manipulate/read values from an underlying device pointer in a different
offset than the one thrust is currently looking at.

I solved it like this, but I wanted to know if there’s a more elegant way:

``````template <typename T>
struct LagWithNulls
{
size_t m_rows;
unsigned int m_offset;
unsigned int *m_partitions;
T *m_aggregated;
T *m_out;

LagWithNulls(size_t rows, unsigned int offset, unsigned int *partitions, T *aggergated, T *out)
: m_rows(rows), m_offset(offset), m_partitions(partitions), m_aggregated(aggergated),
m_out(out) {}

__device__ void operator()(unsigned int index)
{
unsigned int partition = m_partitions[index];

int lag_index = index - m_offset;
if (lag_index < 0) {
m_out[index] = 0;
return;
}
unsigned int lag_partition = m_partitions[lag_index];
if (partition == lag_partition)
{
m_out[index] = m_aggregated[lag_index];
}
else
{
m_out[index] = 0;
}
}
};

thrust::counting_iterator<unsigned int> index(0);
thrust::for_each(index, index + N,
LagWithNulls<T>(N, offset,
thrust::raw_pointer_cast(d_partitions.data()),
thrust::raw_pointer_cast(d_aggregated.data()),
thrust::raw_pointer_cast(d_sum_out.data()))
``````