Cuda-gdb aborted

version

exec: /usr/local/cuda/bin/cuda-gdb-python3.10-tui --version

NVIDIA (R) cuda-gdb 12.6

Portions Copyright (C) 2007-2024 NVIDIA Corporation

Based on GNU gdb 13.2

Copyright (C) 2023 Free Software Foundation, Inc.

License GPLv3+: GNU GPL version 3 or later http://gnu.org/licenses/gpl.html

This is free software: you are free to change and redistribute it.

There is NO WARRANTY, to the extent permitted by law.

code

include <cuda.h>
include <stdio.h>

// compute vector sum C = A + B
// each thread peforms one pair-wise addition
global void vecAddKernel(float *A, float *B, float *C, int n) {
int i = threadIdx.x + blockDim.x * blockIdx.x;
if (i < n) {
C[i] = A[i] + B[i];
}
}

// What is the canonical way to check for errors using the CUDA runtime API? - Stack Overflow
define gpuErrchk(ans) { gpuAssert((ans), FILE, LINE); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) {
if (code != cudaSuccess) {
fprintf(stderr, “GPUassert: %s %s %d\n”, cudaGetErrorString(code), file, line);
if (abort) {
exit(code);
}
}
}

inline unsigned int cdiv(unsigned int a, unsigned int b) {
return (a + b - 1) / b;
}

void vecAdd(float *A, float *B, float *C, int n) {
float *A_d, *B_d, *C_d;
size_t size = n * sizeof(float);

cudaMalloc((void **)&A_d, size);
cudaMalloc((void **)&B_d, size);
cudaMalloc((void **)&C_d, size);

cudaMemcpy(A_d, A, size, cudaMemcpyHostToDevice);
cudaMemcpy(B_d, B, size, cudaMemcpyHostToDevice);

const unsigned int numThreads = 256;
unsigned int numBlocks = cdiv(n, numThreads);

vecAddKernel<<<numBlocks, numThreads>>>(A_d, B_d, C_d, n);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());

cudaMemcpy(C, C_d, size, cudaMemcpyDeviceToHost);

cudaFree(A_d);
cudaFree(B_d);
cudaFree(C_d);
}

int main() {
const int n = 1000;
float A[n];
float B[n];
float C[n];

// generate some dummy vectors to add
for (int i = 0; i < n; i += 1) {
A[i] = float(i);
B[i] = A[i] / 1000.0f;
}

vecAdd(A, B, C, n);

// print result
for (int i = 0; i < n; i += 1) {
if (i > 0) {
printf(“, “);
if (i % 10 == 0) {
printf(”\n”);
}
}
printf(“%8.3f”, C[i]);
}
printf(“\n”);
return 0;
}

compile

nvcc -g -G vector_add.cu -o vector_add

device

Wed Nov 20 02:18:33 2024
±----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.6 |
|-----------------------------------------±-----------------------±---------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A100-SXM4-80GB On | 00000000:00:0B.0 Off | 0 |
| N/A 33C P0 62W / 400W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
±----------------------------------------±-----------------------±---------------------+
| 1 NVIDIA A100-SXM4-80GB On | 00000000:00:0C.0 Off | 0 |
| N/A 31C P0 62W / 400W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
±----------------------------------------±-----------------------±---------------------+
| 2 NVIDIA A100-SXM4-80GB On | 00000000:00:0D.0 Off | 0 |
| N/A 33C P0 63W / 400W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
±----------------------------------------±-----------------------±---------------------+
| 3 NVIDIA A100-SXM4-80GB On | 00000000:00:0E.0 Off | 0 |
| N/A 32C P0 61W / 400W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
±----------------------------------------±-----------------------±---------------------+

±----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
±----------------------------------------------------------------------------------------+

debug

cuda-gdb vector_add

error info

NVIDIA (R) cuda-gdb 12.6

Portions Copyright (C) 2007-2024 NVIDIA Corporation

Based on GNU gdb 13.2

Copyright (C) 2023 Free Software Foundation, Inc.

License GPLv3+: GNU GPL version 3 or later http://gnu.org/licenses/gpl.html

This is free software: you are free to change and redistribute it.

There is NO WARRANTY, to the extent permitted by law.

Type “show copying” and “show warranty” for details.

This CUDA-GDB was configured as “x86_64-pc-linux-gnu”.

Type “show configuration” for configuration details.

For bug reporting instructions, please see:

https://forums.developer.nvidia.com/c/developer-tools/cuda-developer-tools/cuda-gdb.

Find the CUDA-GDB manual and other documentation resources online at:

https://docs.nvidia.com/cuda/cuda-gdb/index.html.

For help, type “help”.

Type “apropos word” to search for commands related to “word”…

Reading symbols from vector_addition…

(cuda-gdb) b main

Breakpoint 1 at 0xae0a: file /root/lectures/lecture_002/vector_addition/vector_addition.cu, line 53.

(cuda-gdb) run

Starting program: /root/lectures/lecture_002/vector_addition/vector_addition

warning: Error disabling address space randomization: Operation not permitted

[Thread debugging using libthread_db enabled]

Using host libthread_db library “/usr/lib/x86_64-linux-gnu/libthread_db.so.1”.

Breakpoint 1, main () at /root/lectures/lecture_002/vector_addition/vector_addition.cu:53

Fatal signal: Segmentation fault

----- Backtrace -----

0x66d167 ???

0x80c28f ???

0x80c2ff ???

0x7ffb9b24251f ???

0x7ffb9b2a5449 ???

0x7ffb9b967439 ???

0xc77759 ???

0xc77909 ???

0x7ffb9bb11ef4 ???

0x7ffb9b8abf62 ???

0x7ffb9b9fd508 ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b91cbf6 ???

0x7ffb9b91ce0f ???

0x7ffb9b98a0fd ???

0x7ffb9b97e88d ???

0x7ffb9b91aadb ???

0x7ffb9b8b7289 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f8fea ???

0x7ffb9b963898 ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b9f92fd ???

0x7ffb9b963022 ???

0x7ffb9b91cfeb ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c75e ???

0x7ffb9ba262a3 ???

0x7ffb9b8b5714 ???

0x7ffb9b9fd3ae ???

0x7ffb9b9f83dd ???

0x7ffb9b9f896c ???

0x7ffb9b9637b2 ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b1775 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b69c7 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b4efd ???

0x7ffb9b932a90 ???

0x7ffb9b8adb56 ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b363d ???

0x7ffb9b9fd3ae ???

0x7ffb9b8b3467 ???

0x7ffb9b9fd3ae ???

0x7ffb9b91c3f3 ???

0x7ffb9b91c882 ???

0x9aa8df ???

0x8123c2 ???

0xa07c73 ???

0xa08302 ???

0xa0c2be ???

0xa20cfa ???

0xa21762 ???

0x896931 ???

0x6a02ea ???

0x8a9a33 ???

0x89777e ???

0x8a712b ???

0xc5095c ???

0xc50b4e ???

0x8f001e ???

0x8f1934 ???

0x583894 ???

0x7ffb9b229d8f ???

0x7ffb9b229e3f ???

0x58b5f4 ???

0xffffffffffffffff ???


A fatal error internal to GDB has been detected, further

debugging is not possible. GDB will now terminate.

This is a bug, please report it. For instructions, see:

https://forums.developer.nvidia.com/c/developer-tools/cuda-developer-tools/cuda-gdb.

Segmentation fault (core dumped)

Hi, @1553725576

Have you tried to run the sample without cuda-gdb ? Can it run to end successfully ?

Hi, @1553725576

We can’t reproduce your issue.

$ /usr/local/cuda-12.6/bin/cuda-gdb-python3.10-tui ./vector_add
NVIDIA (R) cuda-gdb 12.6
Portions Copyright (C) 2007-2024 NVIDIA Corporation
Based on GNU gdb 13.2
Copyright (C) 2023 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later http://gnu.org/licenses/gpl.html
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type “show copying” and “show warranty” for details.
This CUDA-GDB was configured as “x86_64-pc-linux-gnu”.
Type “show configuration” for configuration details.
For bug reporting instructions, please see:
https://forums.developer.nvidia.com/c/developer-tools/cuda-developer-tools/cuda-gdb.
Find the CUDA-GDB manual and other documentation resources online at:
https://docs.nvidia.com/cuda/cuda-gdb/index.html.

For help, type “help”.
Type “apropos word” to search for commands related to “word”…
Reading symbols from ./vector_add…
(cuda-gdb) b main
Breakpoint 1 at 0xae0a: file /home/test/daniel/forum/20241121/vector_add.cu, line 53.
(cuda-gdb) r
Starting program: /home/test/daniel/forum/20241121/vector_add
[Thread debugging using libthread_db enabled]
Using host libthread_db library “/lib/x86_64-linux-gnu/libthread_db.so.1”.

Breakpoint 1, main () at /home/test/daniel/forum/20241121/vector_add.cu:53
53 int main() {
(cuda-gdb) n
54 const int n = 1000;

yes, it can run successfully.

From the warning message

warning: Error disabling address space randomization: Operation not permitted

This seems related with your set up. Please check if gdb can’t work also.
If yes, please try
$echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope

cuda-gdb work now, thanks.

Good to know about this !

This topic was automatically closed 2 days after the last reply. New replies are no longer allowed.