Thrust::binary_search with functor in a loop not working

Hello,

I wrote a code to remove repeated ids from a nested std::vector with thrust::remove_if and thrust::binary_search scaning for repeted ids one outer vector each time. I want to remove only the ids that are most unique in the global scale . The code with thrust::find works correctly coping only the unique ids weareas for the code with thrust::binary_search I does not copy anything. I checked the input vector, and also sorted the ids before using binary search

Thank in advance

Rafael Scatena

functors:

struct nonRepeatedTriangle {
    int64_t* idSetBegin;    int64_t* idSetEnd;

    __host__ __device__
    nonRepeatedTriangle(int64_t* begin, int64_t* end) : idSetBegin(begin), idSetEnd(end) {}

    __host__ __device__
    bool operator()(const int64_t& t) const {   
    
    bool result = thrust::binary_search(thrust::device, idSetBegin, idSetEnd, t);
    printf("Checking %lld: Result = %d\n", t, result);
    return !result;}};
      
      
  struct nonRepeatedTriangle_find {
    int64_t* id_start;    int64_t* id_end;

    __host__ __device__
    nonRepeatedTriangle_find(int64_t* start, int64_t* end) : id_start(start), id_end(end) {}

    __host__ __device__
    bool operator()(const int64_t& t) const {return thrust::find(thrust::device, id_start, id_end, t) != id_end;}};

function:

__host__ void delete_repeated_triangles(std::vector<std::vector<int64_t>>& TriangleL,std::vector<OctreeNode>& vector_nodes,int MAX_LEVEL){

                std::unordered_set<int64_t> unique_ids;
                for (const auto& row : TriangleL) {for (int64_t id : row) {unique_ids.insert(id);}}
                
               // std::set<int64_t> new_ids(unique_ids.size());

                int unique_count = unique_ids.size();

                std::vector<int64_t> hi;
                hi.push_back((int)unique_ids.size()+1);
                hi.push_back((int)unique_ids.size()+2);
                thrust::device_vector<int64_t> d_id(2);
                thrust::copy(hi.begin(), hi.end(), d_id.begin());            
                thrust::device_vector<int64_t> new_id;  
                
                thrust::sort(thrust::device, d_id.begin(), d_id.end());



        //        std::cout << "unique : "<< unique_count <<  std::endl;                
              
                int counter =0;
              
            //   #pragma omp parallel for
                 
                 for (int i =0; i< TriangleL.size(); i++){
                 
                 if((vector_nodes[i].Level==MAX_LEVEL)&&(TriangleL[i].size()>0)){
                
    //            thrust::host_vector<int64_t> h_vector(TriangleL[i].size());
                
    //            thrust::copy(TriangleL[i].begin(), TriangleL[i].end(), h_vector.begin());            
                
                  
                thrust::device_vector<int64_t> d_vector(TriangleL[i].size());
     //           thrust::device_vector<int64_t> d_output(TriangleL[i].size());    
                thrust::copy(TriangleL[i].begin(), TriangleL[i].end(), d_vector.begin());            
     
                thrust::sort(thrust::device,d_vector.begin(), d_vector.end());                 
                thrust::sort(thrust::device, d_id.begin(), d_id.end());             
                
               int64_t* id_start = thrust::raw_pointer_cast(d_id.data());
               int64_t* id_end = thrust::raw_pointer_cast(d_id.data() + d_id.size());            
               
               if(counter==0){d_id = d_vector;}
               else{
               auto new_end_d_vector = thrust::remove_if(thrust::device, d_vector.begin(), d_vector.end(), 
                                                       nonRepeatedTriangle(id_start, id_end));
                d_vector.resize(new_end_d_vector - d_vector.begin());}


     
                    std::cout << "d_vector size: "<< d_vector.size() <<  std::endl;                
                    new_id.clear();                 new_id.shrink_to_fit();                new_id.resize(d_vector.size());  

                    
                    auto end = thrust::copy(thrust::device, d_vector.begin(), d_vector.end(), new_id.begin());
                    new_id.resize(end - new_id.begin());

                 d_id.insert(d_id.end(), new_id.begin(), new_id.end());
                 thrust::sort(thrust::device, d_id.begin(), d_id.end());
                 d_id.erase(thrust::unique(thrust::device, d_id.begin(), d_id.end()), d_id.end());
        
        
    //   std::cout<< "d_id: "  <<  std::endl;for (size_t j = 0; j < std::min(d_id.size(), size_t(1000000000)); j++) {std::cout<< d_id[j] << " ";}
          
                 std::cout << "new id size: "<< new_id.size() <<"   id size: "<< d_id.size() <<  std::endl;                

        TriangleL[i].assign(d_vector.begin(), d_vector.end());  counter++;

        }}}

binary search output (all negatives):

d_vector size: 5826   d_output size: 0
t: 23804, result: 0 result 2: 0
t: 23807, result: 0 result 2: 0
t: 24130, result: 0 result 2: 0
t: 24131, result: 0 result 2: 0
new id size: 0   id size: 109
t: 18180, result: 0 result 2: 0
t: 18185, result: 0 result 2: 0
t: 18186, result: 0 result 2: 0
t: 18216, result: 0 result 2: 0

find output (some positives):

Checking 2441838: Result = 1
Checking 2441875: Result = 1
Checking 2441902: Result = 1
Checking 2441928: Result = 1
Checking 2441959: Result = 1
Checking 2450944: Result = 1
Checking 2450971: Result = 1
Checking 2451006: Result = 1
Checking 2451037: Result = 1

FWIW thrust already has a vectorized binary search so its probably not necessary to create your own.

In the future, if you want help with something like this, I suggest creating a minimal but complete test case. I’ll demonstrate.

When I create a simple complete test case around the binary search, I observe that indeed with thrust::device execution policy, I don’t seem to get the proper result, but with thrust::seq execution policy, I do seem to get the proper result:

# cat t346.cu
#include <thrust/binary_search.h>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <cstdint>
#include <iostream>
#include <thrust/execution_policy.h>


struct nonRepeatedTriangle {
    int64_t* idSetBegin;    int64_t* idSetEnd;

    __host__ __device__
    nonRepeatedTriangle(int64_t* begin, int64_t* end) : idSetBegin(begin), idSetEnd(end) {}

    __host__ __device__
    bool operator()(const int64_t& t) const {

    bool result = thrust::binary_search(thrust::seq, idSetBegin, idSetEnd, t);
    printf("Checking %lld: Result = %d\n", t, result);
    return !result;}
};

int main(){

  int64_t vals[] = {0LL, 1LL, 2LL, 3LL, 4LL};
  size_t sz = sizeof(vals)/sizeof(vals[0]);
  thrust::device_vector<int64_t> d(vals, vals+sz);
  thrust::device_vector<int64_t> t(3);
  t[0] = 1;
  t[1] = 8;
  t[2] = 4;
  thrust::device_vector<bool> r(3);
  thrust::transform(t.begin(), t.end(), r.begin(), nonRepeatedTriangle(thrust::raw_pointer_cast(&d[0]), thrust::raw_pointer_cast(&(d[sz]))));
  thrust::host_vector<bool> hr = r;
  thrust::copy_n(hr.begin(), hr.size(), std::ostream_iterator<bool>(std::cout, ","));
  std::cout << std::endl;
}
# nvcc -o t346 t346.cu
t346.cu: In member function ‘bool nonRepeatedTriangle::operator()(const int64_t&) const’:
t346.cu:20:8: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘long int’ [-Wformat=]
   20 |     printf("Checking %lld: Result = %d\n", t, result);
      |        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  ~
      |                                        |
      |                                        long int
# compute-sanitizer ./t346
========= COMPUTE-SANITIZER
Checking 1: Result = 1
Checking 8: Result = 0
Checking 4: Result = 1
0,1,0,
========= ERROR SUMMARY: 0 errors
#

(CUDA 12.2)

You’re going to make it easier for others to help you if you do the work that I did. If I can do it, it’s evident you could do it.

Do you observe correct behavior in your test case, with your data, by switching from thrust::device to thrust::seq ? I’m not suggesting this explains anything, it’s merely a diagnostic, and to make sure that my simplified test case shows the same behavior as your case.