cuda and partial specialization template

Hi,

I’m getting troubles compiling device function with partial specialization template

Here is the code :

template<typename T,int shared_mem_size,int shared_mem_size_it>

struct warperMax2;

template<typename T,int shared_mem_size>

struct warperMax2<T,shared_mem_size,0> {

	static __device__ void warp_reduce() {

		return;

	} 

	

};

template<typename T,int shared_mem_size,int shared_mem_size_it>

struct warperMax2 {

	static __device__ void warp_reduce(T smem[shared_mem_size]) {

		smem[threadIdx.x] = smem[threadIdx.x+shared_mem_size_it/2] > smem[threadIdx.x] ? 

					smem[threadIdx.x+shared_mem_size/2] : smem[threadIdx.x];

                __syncthreads();

		warperMax2<T,shared_mem_size,(shared_mem_size_it/2)>::warp_reduce(smem);

	} 

};

template<typename T,int shared_mem_size,int shared_mem_size_it>

__device__ void warp_reduce_max3( T smem[shared_mem_size]){

			warperMax2<T,shared_mem_size,shared_mem_size_it>::warp_reduce(smem);

}

the compiler show the following error when i use warp_reduce_max3 inside a kernel

error : too many arguments

for line :

warperMax2<T,shared_mem_size,(shared_mem_size_it/2)>::warp_reduce(smem);

ran some test with a similar host functions instead of device and it worked,

someone got an idea about how to resolve the problem ?

Testi