I am trying to use CUB reductions on a custom datatype and having trouble creating the reduction functor.
The idea here is to reduce the array by adding each ‘pack’ of 4 ints together such that pack[0] is reduced with all other pack[0], pack[1] with all other pack[1] etc.
I followed the example here for making a functor and this is what I have so far
#include <array>
#include <cstring>
#include <vector>
#include <cuda/std/array>
#include <cub/cub.cuh>
#include <iostream>
struct pack_t
{
int pack[4] = {1,2,3,4};
pack_t()
{
}
__device__ pack_t(cuda::std::array<int,4> set)
{
memcpy(pack ,set.data(), 4 * sizeof(int));
}
int& operator[](int idx)
{
return pack[idx];
}
const int& operator[](int idx) const
{
return pack[idx];
}
};
void printPack(const pack_t& item)
{
std::cout<<item.pack[0]<<item.pack[1]<<item.pack[2]<<item.pack[3]<<"\n";
}
__device__ pack_t operator+(const pack_t& a,const pack_t& b)
{
auto w = a.pack[0]+b.pack[0];
auto x = a.pack[1]+b.pack[1];
auto y = a.pack[2]+b.pack[2];
auto z = a.pack[3]+b.pack[3];
cuda::std::array<int, 4> data = {w,x,y,z};
return pack_t(data);
}
struct customReduce
{
__device__ __forceinline__
pack_t operator()(const pack_t& a, const pack_t& b) const
{
return a+b;
}
};
int main()
{
const size_t N = 128;
std::vector<pack_t> host_data(N);
std::vector<pack_t> host_data_out(1);
pack_t * dev_data = NULL;
pack_t * dev_data_out = NULL;
void * dev_temp = NULL;
size_t tempBytes = 0;
cudaMalloc((void**)&dev_data, N * sizeof(pack_t));
cudaMalloc((void**)&dev_data_out, sizeof(pack_t));
cudaMemcpy(dev_data, host_data.data(), N * sizeof(pack_t),cudaMemcpyDefault);
customReduce reductionOp;
cub::DeviceReduce::Reduce(dev_temp, tempBytes, dev_data, dev_data_out, N, reductionOp,0);
cudaMalloc((void**)&dev_temp, tempBytes);
cub::DeviceReduce::Reduce(dev_temp, tempBytes, dev_data, dev_data_out, N, reductionOp,0);
cudaMemcpy(host_data_out.data(), dev_data_out, sizeof(pack_t), cudaMemcpyDefault);
printPack(host_data_out.at(1));
return 0;
}
I get compiler errors:
usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/detail/type_traits.cuh(59): error: class "cuda::std::__4::result_of<customReduce (int, pack_t)>" has no member "type"
typename ::cuda::std::result_of<Invokable(Args...)>::type;
^
detected during:
instantiation of type "cub::CUB_200400_520_NS::detail::invoke_result_t<customReduce, int, pack_t>" at line 67
instantiation of type "cub::CUB_200400_520_NS::detail::accumulator_t<customReduce, int, pack_t>" at line 547 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/dispatch/dispatch_reduce.cuh
processing of template argument list for "cub::CUB_200400_520_NS::DispatchReduce" based on template arguments <pack_t *, pack_t *, OffsetT, customReduce, int> at line 209 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/device_reduce.cuh
instantiation of "cudaError_t cub::CUB_200400_520_NS::DeviceReduce::Reduce(void *, size_t &, InputIteratorT, OutputIteratorT, NumItemsT, ReductionOpT, T, cudaStream_t) [with InputIteratorT=pack_t *, OutputIteratorT=pack_t *, ReductionOpT=customReduce, T=int, NumItemsT=size_t]" at line 81 of main.cu
/usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/util_device.cuh(868): error: incomplete type "cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy300" is not allowed
typename PrevPolicyT::ActivePolicy,
^
detected during:
instantiation of class "cub::CUB_200400_520_NS::ChainedPolicy<PTX_VERSION, PolicyT, PrevPolicyT> [with PTX_VERSION=300, PolicyT=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy300, PrevPolicyT=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy300]" at line 447 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/dispatch/dispatch_reduce.cuh
instantiation of class "cub::CUB_200400_520_NS::DeviceReducePolicy<AccumT, OffsetT, ReductionOpT>::Policy300 [with AccumT=<error-type>, OffsetT=unsigned long long, ReductionOpT=customReduce]" at line 868
instantiation of class "cub::CUB_200400_520_NS::ChainedPolicy<PTX_VERSION, PolicyT, PrevPolicyT> [with PTX_VERSION=350, PolicyT=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy350, PrevPolicyT=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy300]" at line 469 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/dispatch/dispatch_reduce.cuh
instantiation of class "cub::CUB_200400_520_NS::DeviceReducePolicy<AccumT, OffsetT, ReductionOpT>::Policy350 [with AccumT=<error-type>, OffsetT=unsigned long long, ReductionOpT=customReduce]" at line 868
instantiation of class "cub::CUB_200400_520_NS::ChainedPolicy<PTX_VERSION, PolicyT, PrevPolicyT> [with PTX_VERSION=600, PolicyT=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy600, PrevPolicyT=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>::Policy350]" at line 492 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/dispatch/dispatch_reduce.cuh
instantiation of class "cub::CUB_200400_520_NS::DeviceReducePolicy<AccumT, OffsetT, ReductionOpT>::Policy600 [with AccumT=<error-type>, OffsetT=unsigned long long, ReductionOpT=customReduce]" at line 982 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/dispatch/dispatch_reduce.cuh
instantiation of "cudaError_t cub::CUB_200400_520_NS::DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT, InitT, AccumT, SelectedPolicy, TransformOpT>::Dispatch(void *, size_t &, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT, InitT, cudaStream_t, TransformOpT) [with InputIteratorT=pack_t *, OutputIteratorT=pack_t *, OffsetT=unsigned long long, ReductionOpT=customReduce, InitT=int, AccumT=<error-type>, SelectedPolicy=cub::CUB_200400_520_NS::DeviceReducePolicy<<error-type>, unsigned long long, customReduce>, TransformOpT=cuda::std::__4::__identity]" at line 216 of /usr/local/cuda-12/bin/../targets/x86_64-linux/include/cub/device/device_reduce.cuh
instantiation of "cudaError_t cub::CUB_200400_520_NS::DeviceReduce::Reduce(void *, size_t &, InputIteratorT, OutputIteratorT, NumItemsT, ReductionOpT, T, cudaStream_t) [with InputIteratorT=pack_t *, OutputIteratorT=pack_t *, ReductionOpT=customReduce, T=int, NumItemsT=size_t]" at line 81 of main.cu
2 errors detected in the compilation of "main.cu".
How do I create functors for CUB?