CUDA bug in dealing with cudaMemcpyToSymbol

This is a CUDA toolkit bug, exists for both 2.3 and 3.0. (I have only tried these two.)

For the following code, an “invalid device symbol” error message is always generated, which shouldn’t happen.

#include <stdio.h>

#include <cuda.h>

#  define CUT_CHECK_ERROR(errorMessage) do {							 \

	cudaError_t err = cudaGetLastError();								   \

	if( cudaSuccess != err) {										   \

		fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",	\

				errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\

		exit(EXIT_FAILURE);											   \

	}																   \

	err = cudaThreadSynchronize();										 \

	if( cudaSuccess != err) {										   \

		fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",	\

				errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\

		exit(EXIT_FAILURE);											   \

	} } while (0)

#  define CE(call) do {								\

		call;CUT_CHECK_ERROR("------- Error ------\n"); \

	 } while (0)

namespace HMM_GPU{

__constant__	char * a;

};

using namespace HMM_GPU;

void bar(char * & x){

		CE(cudaMemcpyToSymbol(a, &x, sizeof(a), 0, cudaMemcpyHostToDevice));

		CE(cudaMemcpyFromSymbol(&x, a, sizeof(a), 0, cudaMemcpyDeviceToHost));

}

int main(){

		char * x=0;

		bar(x);

		printf("%p\n",x);

		return 0;

}

Doing any one of the following gets rid of the message:

  1. do not use namespace

  2. change the type of a to void *

  3. change the type of a to int

I looked into the header files, and I think the bug lies in the following code in cuda_runtime.h:

static __inline__ __host__ cudaError_t cudaMemcpyToSymbol(

		char				*symbol,

  const void				*src,

		size_t			   count,

		size_t			   offset = 0,

		enum cudaMemcpyKind  kind   = cudaMemcpyHostToDevice

)

{

  return cudaMemcpyToSymbol((const char*)symbol, src, count, offset, kind);

}

template<class T>

__inline__ __host__ cudaError_t cudaMemcpyToSymbol(

  const T				   &symbol,

  const void				*src,

		size_t			   count,

		size_t			   offset = 0,

		enum cudaMemcpyKind  kind   = cudaMemcpyHostToDevice

)

{

  return cudaMemcpyToSymbol((const char*)&symbol, src, count, offset, kind);

}

I commented out the first definition (also the corresponding definition for cudaMemcpyFromSymbol), and the code runs smoothly.

There are something I don’t understand, though:

  1. why the error does not occur when I don’t use namespace?

  2. what’s the first definition for?

  3. In the second definition, the address of symbol is taken and converted into const char *, what is that for?