The following code breaks at the second assert – after doing some IO. This seems to suggest that the cudaMemcpyFromSymbol call is actually not doing anything at all – the second time. The weird thing is that if I change the initializer value (e.g. from 0 to 1), it works as it should. The code is supposed to allow primitive GPU assert’s – and it worked before, until I changed some seemingly unrelated things.
static __device__ int _failed_line = 0;
static __device__ void gpu_assert(bool value, int line) {
if (!value) {
atomicCAS(&_failed_line, 0, line);
}
}
static void check_gpu_kernel(const char *kernel_name) {
int failed_line;
CU_ASSERT(cudaGetLastError());
// need to sync threads before grabbing the symbol
CU_ASSERT(cudaThreadSynchronize());
CU_ASSERT(cudaMemcpyFromSymbol<int>(&failed_line, _failed_line, 1, 0,
cudaMemcpyDeviceToHost));
if (failed_line != 0) {
fprintf(stderr, "FATAL - GPU assertion in kernel %s, line %d\n",
kernel_name, failed_line);
exit(1);
} else {
fprintf(stderr, "Kernel '%s' ran successfully.\n", kernel_name);
}
}
// main method
// read in some file, do some arithmetic...
check_gpu_kernel("initial_check-2");
std::cout << "hello world" << std::endl;
check_gpu_kernel("initial_check-1.5");
The output is “FATAL - GPU assertion in kernel initial_check-1.5, line -1208601088”.
Thanks in advance,
Nicholas