Multiply defined device globals (not) causing linkage problems

I’ve encountered a problem, which I’ve reduced to a minimal case. The basic concept is that there are multiple .cu files which define the same device global variable, and when linked, it gives no warnings or errors, but when executed, it causes one of the kernels to give a “unknown error.” On my system, bad1 fails with “unknown error.” but bad2 succeeds.

I think what should happen is that the compiler should catch the fact that device char *buf is being declared twice, and emit an error, or at least a warning. I’m not sure if it makes sense for two files to define the same device variable. If not, the compiler should catch it.

Here’s bad1.cu:

#include <stdio.h>

__device__ char *buf;

__global__ void badKernel1() {

	buf[0] = 'x';

}

void bad1() {

	void *d_Buf;

	cudaMalloc(&d_Buf, 4096);

	void *d_BufPtr;

	cudaGetSymbolAddress(&d_BufPtr, buf);

	cudaMemcpy(d_BufPtr, &d_Buf, sizeof(char *), cudaMemcpyHostToDevice);

	badKernel1<<<1, 1>>>();

	cudaError_t err = cudaThreadSynchronize();

	if (cudaSuccess != err) {

		printf("error: %s.\n", cudaGetErrorString(err));

	}

}

Here’s bad2.cu

#include <stdio.h>

__device__ char *buf;

__global__ void badKernel2() {

	buf[0] = 'x';

}

void bad2() {

	void *d_Buf;

	cudaMalloc(&d_Buf, 4096);

	void *d_BufPtr;

	cudaGetSymbolAddress(&d_BufPtr, buf);

	cudaMemcpy(d_BufPtr, &d_Buf, sizeof(char *), cudaMemcpyHostToDevice);

	badKernel2<<<1, 1>>>();

	cudaError_t err = cudaThreadSynchronize();

	if (cudaSuccess != err) {

		printf("error: %s.\n", cudaGetErrorString(err));

	}

}

And here’s main.cpp:

#include <stdio.h>

void bad1();

void bad2();

int main(void) {

#if 0

	printf("calling bad1...\n");

	bad1();

#else

	printf("calling bad2...\n");

	bad2();

#endif

	printf("\nPress ENTER to exit...\n");

	getchar();

	return 0;

}