Hello everyone,
I am trying to write a c++ wrapper for a global device memory buffer. In the constructor I allocate memory using cudaMalloc and in the destructor I want to free the memory only if the destructor gets called on host side. I read about the CUDA_ARCH macro and tried to apply this to my code. In free functions the macro works perfectly fine and also when compiling the macro seems to be present.
But when the program gets executed it seems as if the preprocessor did something wrong when an object method gets called.
Here is the code I am compiling:
class cuda_buffer {
size_t n = 0;
float* dev_buffer = nullptr;
public:
cuda_buffer(size_t _n);
__host__ __device__ ~cuda_buffer();
};
//==============================================================================
/// allocate memory on gpu
cuda_buffer::cuda_buffer(size_t _n) : n{_n} {
cudaMalloc(&dev_buffer, sizeof(float) * n);
}
//------------------------------------------------------------------------------
/// free memory when object is deleted on host but do not free when on device
/// FIXME __CUDA_ARCH__ seems not to work in methods of classes
__host__ __device__ cuda_buffer::~cuda_buffer() {
#if defined(__CUDA_ARCH__)
#warning compiling device destructor
printf("do not free on device %i\n", __CUDA_ARCH__);
#else
#warning compiling host destructor
printf("free on host\n");
cudaFree(dev_buffer);
dev_buffer = nullptr;
#endif
}
//==============================================================================
/// __CUDA_ARCH__ works fine in free functions
__host__ __device__ void foo(int i) {
#if defined(__CUDA_ARCH__)
#warning compiling device foo
printf("foo on device %i\n", __CUDA_ARCH__);
#else
#warning compiling host foo
printf("foo on host %i\n", i);
#endif
}
__global__ void kernel(cuda_buffer copy) {
foo(1);
// copy gets destroyed here; host code is executed on device
}
int main() {
cuda_buffer original(8);
kernel<<<3,1>>>(original); // foo gets called three times and correctly uses device code
foo(2); // foo gets called on host and uses host code correctly
// original gets destroyed here; host code is executed as wished
}
This is the compiler output:
cuda_arch.cu:22:2: warning: #warning compiling device destructor [-Wcpp]
#warning compiling device destructor
^~~~~~~
cuda_arch.cu:35:2: warning: #warning compiling device foo [-Wcpp]
#warning compiling device foo
^~~~~~~
cuda_arch.cu:25:2: warning: #warning compiling host destructor [-Wcpp]
#warning compiling host destructor
^~~~~~~
cuda_arch.cu:38:2: warning: #warning compiling host foo [-Wcpp]
#warning compiling host foo
^~~~~~~
This is what the program prints:
free on host
foo on device 300
foo on device 300
foo on device 300
foo on host 2
free on host
The first line should be “do not free on device 300”
And some information about my system:
nvcc --version:
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243
uname -r
5.3.5-arch1-1-ARCH
compiled with the command
nvcc cuda_arch.cu -O0
Is this a known issue or am I doing something wrong?