Hi,
I’m getting troubles compiling device function with partial specialization template
Here is the code :
template<typename T,int shared_mem_size,int shared_mem_size_it>
struct warperMax2;
template<typename T,int shared_mem_size>
struct warperMax2<T,shared_mem_size,0> {
static __device__ void warp_reduce() {
return;
}
};
template<typename T,int shared_mem_size,int shared_mem_size_it>
struct warperMax2 {
static __device__ void warp_reduce(T smem[shared_mem_size]) {
smem[threadIdx.x] = smem[threadIdx.x+shared_mem_size_it/2] > smem[threadIdx.x] ?
smem[threadIdx.x+shared_mem_size/2] : smem[threadIdx.x];
__syncthreads();
warperMax2<T,shared_mem_size,(shared_mem_size_it/2)>::warp_reduce(smem);
}
};
template<typename T,int shared_mem_size,int shared_mem_size_it>
__device__ void warp_reduce_max3( T smem[shared_mem_size]){
warperMax2<T,shared_mem_size,shared_mem_size_it>::warp_reduce(smem);
}
the compiler show the following error when i use warp_reduce_max3 inside a kernel
error : too many arguments
for line :
warperMax2<T,shared_mem_size,(shared_mem_size_it/2)>::warp_reduce(smem);
ran some test with a similar host functions instead of device and it worked,
someone got an idea about how to resolve the problem ?
Testi