I have just found that this bug has been fixed in 2.2 beta. But I find no link to delete this post. :-(
It seems that the type inference system for global template functions in namespace has some problem. Compiling the following code with “nvcc test.cu” and there will be error message:
$ nvcc test.cu
In file included from test.cu:26:
/tmp/tmpxft_00003a06_00000000-1_test.cudafe1.stub.c:20: error: specialization of ‘template void NS::__globfunc_bar(T, int*)’ in different namespace
/tmp/tmpxft_00003a06_00000000-1_test.cudafe1.stub.h:9: error: from definition of ‘template void NS::__globfunc_bar(T, int*)’
Currently, I have to write the parameters in the call to the global function explicitly to workaround.
OS: Fedora Core 10, x86
CUDA 2.1
#include <cuda.h>
#include <stdio.h>
namespace NS{
template<class T>
__global__ void bar(T n, int * arr){
arr[0]=sizeof(T);
}
};
using namespace NS;
int main(){
int * arr;
cudaMalloc((void**)&arr,sizeof(int));
bar<<<1,1>>>(0,arr);
int temp=0;
cudaMemcpy(&temp,arr,sizeof(int),cudaMemcpyDeviceToHost);
printf("%d\n",temp);
return 0;
}