I am trying to use cudaMemCpyToSymbolAsync, but must be passing parameters incorrectly. In the function simpleTest, cudaMemCpyToSymbol appears to execute, but cudaMemCpyToSymbolAsync gives error value 11 – invalid argument. What am I doing wrong?
My “big picture” objective is to do calculations in a kernel in stream[1]. When a certain time period passes, I will use stream[2] to upload a flag to the device. stream[1] will poll this variable periodically. When the flag is set, stream[1] will terminate (after doing some finalizing steps). Is there a better way to do this? What is more efficient for polling – global, constant, or pinned memory?
unsigned int h_testVar[2];
device unsigned int d_testVar[2];
void chkCudaReturn(cudaError_t err, unsigned int myErrLoc)
{
if (!err == cudaSuccess)
{
printf(“\a\a\n***ERROR CUDA ERROR %u\n”, myErrLoc);
printf(“Error Val %u\n”,err);
printf(cudaGetErrorString(err));
}
}
void simpleTest()
{
h_testVar[0] = 100;
h_testVar[1] = 200;
cudaStream_t stream[2];
for (int i = 0; i < 2; ++i) cudaStreamCreate(&stream[i]);
//Good One
chkCudaReturn(cudaMemcpyToSymbol (d_testVar, h_testVar, sizeof(h_testVar), 0, cudaMemcpyHostToDevice),1);
//Bad One
chkCudaReturn(cudaMemcpyToSymbolAsync (d_testVar, h_testVar, sizeof(h_testVar), 0, cudaMemcpyHostToDevice, stream[1]),2);
chkCudaReturn(cudaGetLastError(),3);
}