Thrust for each is not working

I am using cuda with opencv and thrust libraries. I am creating the buffer array but I am unable to get the result. It will be great if you suggest me the way here.

#include “opencv2/opencv.hpp”
#include <opencv2\cudaarithm.hpp>
#include “thrust\device_vector.h”
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include “opencv2\core\cuda.hpp”
#include “thrust\for_each.h”

using namespace std;
using namespace cv;
using namespace cv::cuda;

struct ClusterCenter
float l;
float a;
float b;
float x;
float y;
float d;

__host__ __device__ ClusterCenter(float l = 0.f, float a = 0.f, float b = 0.f, float x = 0.f, float y = 0.f, float d = -16.f) : l(l), a(a), b(b), x(x), y(y), d(d) {}


struct InitClusterFunctor
const cv::cuda::PtrStepSz image;
uint32_t nrSuperpixelPerRow;
uint32_t nrSuperpixelPerCol;
uint32_t step;
//const KernelPointer param;

__host__ __device__  InitClusterFunctor(const cv::cuda::PtrStepSz<uchar3> image, uint32_t nrSuperpixelPerRow, uint32_t nrSuperpixelPerCol, uint32_t step) : image(image), nrSuperpixelPerRow(nrSuperpixelPerRow), nrSuperpixelPerCol(nrSuperpixelPerCol), step(step)

__host__ __device__  void operator()(thrust::tuple<ClusterCenter, uint32_t> element);


int main()
uint32_t nrSuperpixelPerRow = 30;
uint32_t nrSuperpixelPerCol = 30;
uint32_t step = 15;
uint32_t realNrSuperpixel = 520;
thrust::device_vector cluster;

VideoCapture cap("leftFront.avi");

Mat frame0;
GpuMat frame;

cap >> frame0;

thrust::counting_iterator<uint32_t> init_index(0);
thrust::counting_iterator<uint32_t> last_index(init_index + realNrSuperpixel);

auto begin = thrust::make_zip_iterator(thrust::make_tuple(cluster.begin(), init_index));
auto end = thrust::make_zip_iterator(thrust::make_tuple(cluster.end(), last_index));

thrust::for_each(begin, end, InitClusterFunctor(frame, nrSuperpixelPerRow, nrSuperpixelPerCol, step));

thrust::host_vector<ClusterCenter> hcluster(realNrSuperpixel);

thrust::copy(hcluster.begin(), hcluster.end(), cluster.begin());

std::cout << hcluster[100].x << std::endl;


return 0;


host device void InitClusterFunctor::operator()(thrust::tuple<ClusterCenter, uint32_t> element)
int spx_x = element.get<1>() % nrSuperpixelPerRow;
int spx_y = element.get<1>() / nrSuperpixelPerCol;

int cur_x = (spx_x + 1) * static_cast<int>(step);
int cur_y = (spx_y + 1) * static_cast<int>(step);

uchar3 color = image(cur_y, cur_x);
//int2 center = getMinGradient(image, cur_x, cur_y);

element.get<0>().l = color.x;
element.get<0>().a = color.y;
element.get<0>().b = color.z;
element.get<0>().x = cur_x;
element.get<0>().y = cur_y;
element.get<0>().d = -16.;


There may be other issues with this code but I will point out 2 things that look problematic to me:

  1. thrust::device_vector in many respects is behaviorally similar to std::vector. If you did this:
std::vector<ClusterCenter> cluster;

would that vector object have any allocated size? (hint: no)
it is not any different with thrust::device_vector

  1. In C and C++, the ordinary function parameter mechanism is pass-by-value. This means a copy of the arguments is made for use within the function body. CUDA C/C++ is no different. If you are passing a tuple to the functor operator here:
__host__ __device__ void InitClusterFunctor::operator()(thrust::tuple<ClusterCenter, uint32_t> element)

and it is being passed by value, how do you expect changes to your arguments made in the operator body to show up in the calling environment?

HI Robert,

I am very thankful for your reply. I am a beginner for Cuda. I have tried to create a device pointer for cluster but it is also not working.


Finally, it is solved by doing the following change.

host device void operator()(Tuple element)