Environment : nvcc 3.0 Beta + gcc 4.3.4, Ubuntu 9.10, X64
Hi,
I’m trying to use templates to do some loop unrolling and I’m having problems, the next code is a simplified version
template <int numberSplices> class dummycudaFDTD
{
public:
static void DoIt(dim3 dimGrid, dim3 dimBlock,strAW cudaStruct)
{
cudaFDTD<numberSplices> <<<dimGrid, dimBlock>>>(cudaStruct);
};
};
cudaFDTD is a global function declared as
template <int numberOfSplices> __global__ void cudaFDTD(strAW param) { ... }
Later in the code I invoke the template function by
dummycudaFDTD<3>::DoIt(…)
and nvcc shows
error: ‘cudaFDTD’ was not declared in this scope
If I change “cudaFDTD” to “cudaFDTD<3>” or whatever, there is no compilation problem. In principle, the template expansion should be doing the same thing.
My guess is that the order of pre-processing between g++ and nvcc is messing somewhere, the code compiles also if the template cudaFDTD does not have the global keyword such
template <int numberOfSplices> void ReallyDummyCudaFDTD(strAW param) { ...}
Any thoughts? For the moment, I’m unrolling with a very ugly
...
switch(numberOfSplices)
{
case 1:
cudaFDTD<1><<<dimGrid,dimBlock>>>(cudaStruct);
break;
case 2:
cudaFDTD<2><<<dimGrid,dimBlock>>>(cudaStruct);
break;
case 3:
cudaFDTD<3><<<dimGrid,dimBlock>>>(cudaStruct);
break;
...
This could be replaced by a far more clean template such
template <int N> static inline void UnRollFDTD(dim3 dimGrid, dim3 dimBlock, int numberSplices, strAW cudaStruct)
{
if (N==numberSplices)
{
cudaFDTD<N><<<dimGrid,dimBlock>>>(cudaStruct);
}
else
{
UnRollFDTD<N-1>(dimGrid, dimBlock, numberSplices,cudaStruct);
}
};
template<> static inline void UnRollFDTD<1>(dim3 dimGrid, dim3 dimBlock, int numberSplices,strAW cudaStruct)
{
cudaFDTD<1><<<dimGrid,dimBlock>>>(cudaStruct);
};
...
UnRollFDTD<100>(dimGrid, dimBlock, numberSplices,cudaStruct); // this replaces a 100 cases switch clause.
Thanks,
Sam