Templates and global functions for unrolling

Environment : nvcc 3.0 Beta + gcc 4.3.4, Ubuntu 9.10, X64

Hi,

I’m trying to use templates to do some loop unrolling and I’m having problems, the next code is a simplified version

template <int numberSplices> class dummycudaFDTD

  {

	public:

	static void DoIt(dim3 dimGrid, dim3 dimBlock,strAW cudaStruct) 

	{

	  cudaFDTD<numberSplices> <<<dimGrid, dimBlock>>>(cudaStruct);

	};

	

};

cudaFDTD is a global function declared as

template <int numberOfSplices> __global__  void cudaFDTD(strAW param) { ... }

Later in the code I invoke the template function by

dummycudaFDTD<3>::DoIt(…)

and nvcc shows

error: ‘cudaFDTD’ was not declared in this scope

If I change “cudaFDTD” to “cudaFDTD<3>” or whatever, there is no compilation problem. In principle, the template expansion should be doing the same thing.

My guess is that the order of pre-processing between g++ and nvcc is messing somewhere, the code compiles also if the template cudaFDTD does not have the global keyword such

template <int numberOfSplices> void ReallyDummyCudaFDTD(strAW param) { ...}

Any thoughts? For the moment, I’m unrolling with a very ugly

...

 switch(numberOfSplices)

	  {

		case 1:

		  cudaFDTD<1><<<dimGrid,dimBlock>>>(cudaStruct);

		  break;

		case 2:

		  cudaFDTD<2><<<dimGrid,dimBlock>>>(cudaStruct);

		  break;

		case 3:

		  cudaFDTD<3><<<dimGrid,dimBlock>>>(cudaStruct);

		  break;

...

This could be replaced by a far more clean template such

template <int N> static inline void UnRollFDTD(dim3 dimGrid, dim3 dimBlock, int numberSplices, strAW  cudaStruct)

	 { 

	   if (N==numberSplices)

	   {

	 cudaFDTD<N><<<dimGrid,dimBlock>>>(cudaStruct);

	

	   }

	   else

	   {

	 UnRollFDTD<N-1>(dimGrid, dimBlock,  numberSplices,cudaStruct);

	   }

	 };

template<> static inline  void UnRollFDTD<1>(dim3 dimGrid, dim3 dimBlock, int numberSplices,strAW cudaStruct)

	 { 

	   cudaFDTD<1><<<dimGrid,dimBlock>>>(cudaStruct);

	

	 };

...

UnRollFDTD<100>(dimGrid, dimBlock, numberSplices,cudaStruct); // this replaces a 100 cases switch clause.

Thanks,

Sam