Dear all, I have a simple code that I want to apply some template programming technique that involves with enum (as a compile time parameter). To make it separate with other module I put the enum to the namespace.
However code without namespace will be compile with nvcc however the other give compile error
This is the code with namespace
#include <cuda.h>
#include <iostream>
namespace foo {
enum bar {
BAR1, BAR2, BAR3
};
}
template<typename T, foo::bar strategy>
__global__ void Copy_kernel(T* d_o, T* d_i, int n) {
int cta_size = blockDim.x;
int cta_offset = blockIdx.x * cta_size;
d_o += cta_offset;
d_i += cta_offset;
d_o[threadIdx.x] = d_i[threadIdx.x];
}
template<typename T>
void Copy(T* d_o, T* d_i, int n) {
dim3 threads(256);
dim3 grids(n / 256);
Copy_kernel<T, foo::BAR1><<<grids, threads, 0, 0 >>>(d_o, d_i, n);
}
int main(int argc, char** argv) {
if (argc < 2) {
std::cerr << argv[0] << " num_elements" << std::endl;
}
int n = atoi(argv[1]);
float* dA;
float* dB;
cudaMalloc((void**)&dA, n * sizeof(float));
cudaMalloc((void**)&dB, n * sizeof(float));
Copy<float>(dA, dB, n);
return 0;
}
and similar one without namespace
#include <cuda.h>
#include <iostream>
enum bar {
BAR1, BAR2, BAR3
};
#endif
template<typename T, bar strategy>
__global__ void Copy_kernel(T* d_o, T* d_i, int n) {
int cta_size = blockDim.x;
int cta_offset = blockIdx.x * cta_size;
d_o += cta_offset;
d_i += cta_offset;
d_o[threadIdx.x] = d_i[threadIdx.x];
}
template<typename T>
void Copy(T* d_o, T* d_i, int n) {
dim3 threads(256);
dim3 grids(n / 256);
Copy_kernel<T, BAR1><<<grids, threads, 0, 0 >>>(d_o, d_i, n);
}
int main(int argc, char** argv) {
if (argc < 2) {
std::cerr << argv[0] << " num_elements" << std::endl;
}
int n = atoi(argv[1]);
float* dA;
float* dB;
cudaMalloc((void**)&dA, n * sizeof(float));
cudaMalloc((void**)&dB, n * sizeof(float));
Copy<float>(dA, dB, n);
return 0;
}
I compile the second version without any problem
nvcc -o copyTest copyTest.cu -O3
but with the first version I have the problem
In file included from /tmp/tmpxft_000012a9_00000000-1_copyTest.cudafe1.stub.c:2:
/tmp/tmpxft_000012a9_00000000-1_copyTest.cudafe1.stub.c:6: error: ‘_ZN3foo4BAR1E’ was not declared in this scope
/tmp/tmpxft_000012a9_00000000-1_copyTest.cudafe1.stub.c:6: error: template-id ‘__wrapper__device_stub_Copy_kernel<float, >’ for ‘void __wrapper__device_stub_Copy_kernel(float*&, float*&, int&)’ does not match any template declaration
My system is Ubuntu 10.10 , nvcc 4.0 V0.2.121. I tested on Centos 6.0 and the same problem still appear.
It seems that nvcc have some problem with enum in the namespace.
Could some one give me the solution for this problem.
Thank you very much