problems with cudaMemcpyFromSymbol seems to break w/ unrelated changes

The following code breaks at the second assert – after doing some IO. This seems to suggest that the cudaMemcpyFromSymbol call is actually not doing anything at all – the second time. The weird thing is that if I change the initializer value (e.g. from 0 to 1), it works as it should. The code is supposed to allow primitive GPU assert’s – and it worked before, until I changed some seemingly unrelated things.

static __device__ int _failed_line = 0;

static __device__ void gpu_assert(bool value, int line) {

    if (!value) {

        atomicCAS(&_failed_line, 0, line);

    }

}

static void check_gpu_kernel(const char *kernel_name) {

    int failed_line;

    CU_ASSERT(cudaGetLastError());

    // need to sync threads before grabbing the symbol

    CU_ASSERT(cudaThreadSynchronize());

    CU_ASSERT(cudaMemcpyFromSymbol<int>(&failed_line, _failed_line, 1, 0,

              cudaMemcpyDeviceToHost));

    if (failed_line != 0) {

        fprintf(stderr, "FATAL - GPU assertion in kernel %s, line %d\n",

                kernel_name, failed_line);

        exit(1);

    } else {

        fprintf(stderr, "Kernel '%s' ran successfully.\n", kernel_name);

    }

}

// main method

// read in some file, do some arithmetic...

    check_gpu_kernel("initial_check-2");

    std::cout << "hello world" << std::endl;

    check_gpu_kernel("initial_check-1.5");

The output is “FATAL - GPU assertion in kernel initial_check-1.5, line -1208601088”.

Thanks in advance,

Nicholas