I’m trying to get VSCode + WSL setup as my dev environment so that I can use cuda-gdb for debugging kernels. I’ve managed to get everything installed and I can build and run Cuda applications, but I can only debug CPU code using cuda-gdb. This isn’t just limited to VSCode either. If I run my app through cuda-gdb on the command line and add a break point inside the kernel code it will set the breakpoint on the closing bracket of the kernel instead of the line I picked
tasks.json
{
"version": "2.0.0",
"tasks": [
{
"type": "cmake",
"label": "CMake: configure",
"command": "configure",
"problemMatcher": [],
"detail": "CMake template configure task"
},
{
"type": "shell",
"label": "Build",
"command": "make dbg=1",
"group": {
"kind": "build",
"isDefault": true,
},
"problemMatcher": ["$nvcc"],
"dependsOn": ["CMake: configure"],
"options": {
"cwd": "${workspaceFolder}/build"
}
}
]
}
launch.json
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/build/ISP_AMF",
}
]
}
The output of cuda-gdb when used via the command line:
(cuda-gdb) break main.cu:6
Breakpoint 1 at 0xb0a7: file /mnt/c/Dev/ISP-AMF/main.cu, line 8.
(cuda-gdb)
The default saxpy kernel code
#include <stdio.h>
__global__
void saxpy(int n, float a, float *x, float *y)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n) y[i] = a*x[i] + y[i];
}
int main(void)
{
int N = 1<<20;
float *x, *y, *d_x, *d_y;
x = (float*)malloc(N*sizeof(float));
y = (float*)malloc(N*sizeof(float));
cudaMalloc(&d_x, N*sizeof(float));
cudaMalloc(&d_y, N*sizeof(float));
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
// Perform SAXPY on 1M elements
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = max(maxError, abs(y[i]-4.0f));
printf("Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
}
Based on some other posts similar I have already checked Windows regedit for the key that is commonly missing or set to false and it is set to 1. So I am at a bit of a loss of what to do to get kernel debugging working. Any help would be appreciated