Describe the bug
When attempting to debug a simple CUDA program using Unified Memory (cudaMallocManaged) with cuda-gdb, the debugger crashes with an internal error: internal-error: create_module: Assertion 'context' failed. immediately after starting the run command with a breakpoint set.
Environment
- OS: Debian GNU/Linux 13.4 (5.10.16.3-microsoft-standard-WSL2)
- CUDA Toolkit Version: 13.2
- cuda-gdb Version: NVIDIA (R) cuda-gdb 13.2 (Based on GNU gdb 16.3)
- GPU Model: NVIDIA GeForce RTX 3060 Laptop GPU
Steps to Reproduce
- Save the following code as
kernel.cu. - Save the Makefile
- Compile the code with make:
make -j - Start the debugger:
cuda-gdb main - Set a breakpoint at line 47 (or on the kernel launch):
(cuda-gdb) break 47 - Run the program:
(cuda-gdb) run
NVCC = /usr/local/cuda/bin/nvcc
NVCC_FLAGS = -g -G -Xcompiler -Wall
main: kernel.cu
$(NVCC) $(NVCC_FLAGS) $^ -o $@
clean:
rm -f *.o *.exe
kernel.cu
#include <stdio.h>
#define N 64
#define TPB 32
float scale(int i, int n)
{
return ((float)i)/(n - 1);
}
__device__
float distance(float x1, float x2)
{
return sqrt((x2 - x1)*(x2 - x1));
}
__global__
void distanceKernel(float *d_out, float *d_in, float ref)
{
const int i = blockIdx.x*blockDim.x + threadIdx.x;
const float x = d_in[i];
d_out[i] = distance(x, ref);
printf("i = %2d: dist from %f to %f is %f.\n", i, ref, x, d_out[i]);
}
int main()
{
const float ref = 0.5f;
float *in = 0;
float *out = 0;
// Allocate managed memory for input and output arrays
cudaMallocManaged(&in, N*sizeof(float));
cudaMallocManaged(&out, N*sizeof(float));
for (int i = 0; i < N; ++i)
{
in[i] = scale(i, N);
}
distanceKernel<<<N/TPB, TPB>>>(out, in, ref);
cudaDeviceSynchronize();
cudaFree(in);
cudaFree(out);
return 0;
}
Expected Behavior
Debugger should properly load the CUDA module, hit the breakpoint without crashing, and allow inspecting variables/execution context.

