Cluster siliarity calculated with usage of array reduction optimization

Triangular matrix elements can be enumerated with a linear index. With the correct index transformation, your problem could be solved with thrust::transform_reduce like this

//nvcc --expt-relaxed-constexpr -O0 -g -std=c++17 triangular.cu -o triangular

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>

#include <iostream>
#include <cmath>

struct MatrixIndex{
    int i = 0;
    int j = 0;
};

std::ostream& operator<<(std::ostream& os, const MatrixIndex& m){
    os << "(" << m.i << "," << m.j << ")";
    return os;
}

struct ConvertLinearIndexToTriangularMatrixIndex{
    int dim;

    __host__ __device__
    ConvertLinearIndexToTriangularMatrixIndex(int dimension) : dim(dimension){}

    __host__ __device__
    MatrixIndex operator()(int linear) const {
        MatrixIndex result;
       //check if those indices work for you

        //compute i and j from linear index https://stackoverflow.com/questions/27086195/linear-index-upper-triangular-matrix
        result.i = dim - 2 - floor(sqrt(-8*linear + 4*dim*(dim-1)-7)/2.0 - 0.5);
        result.j = linear + result.i + 1 - dim*(dim-1)/2 + (dim-result.i)*((dim-result.i)-1)/2;

        return result;
    }
};


struct ComputeDelta{
    //A_type A
    //B_type B

    __host__ __device__
    ComputeDelta(/* init A and B*/){
        /* init A and B*/
    }

    __host__ __device__
    float operator()(const MatrixIndex& index) const{
        
        float da = 0;
        float db = 0;

        //... compute da, db from index.i, index.j

        return (da-db) * (da-db);
    }
};

int main(){
    const int dim = 5;
    const int elems = 10; //upper triangular matrix with dim 5 without diagonal has 10 elements
    auto matrixIndexIterator = thrust::make_transform_iterator(
        thrust::make_counting_iterator(0),
        ConvertLinearIndexToTriangularMatrixIndex{dim}
    );

    for(int i = 0; i < elems; i++){
        std::cout << matrixIndexIterator[i] << " ";
    }
    std::cout << "\n";

    float result = thrust::transform_reduce(
        thrust::host,
        matrixIndexIterator, 
        matrixIndexIterator + elems, 
        ComputeDelta{}, 
        float(0), 
        thrust::plus<float>{}
    );

    return 0;
}

1 Like