I am unable to debug a c++ code compiled with nvc++ for gpu using std::for_each algorithms:
There is a deadlock in memory allocation, I’ve tried to disable unnecessary use of CUDA Unified memory, however some internal use of new operator calls pgi_managed_new in debug mode.
However this probelem occurs with the update of the GPU driver, which now uses cuda 12.1 I didn’t have theses problem when the driver was under CUDA 11, however the old HPC SDK 22 is no longer being supported by the GPU card. I have tried HPC SDK 23.7 and 23.9, and also standalone cuda-gdb 12.3, no success.
Here is a log
Using python library libpython3.6m.so
Reading symbols from build/cmake/lbsolver-gpuDebug-noMPI/build/lbsolver…
(cuda-gdb)
(cuda-gdb) run
Starting program: /odile/home/rkuate/workspace/prolb/lbsolver/gpu/developLast/build/cmake/lbsolver-gpuDebug-noMPI/build/lbsolver -i test_cases/CT01_pulse_uniform/input.txt -d test_cases
[Thread debugging using libthread_db enabled]
Using host libthread_db library “/lib64/libthread_db.so.1”.
^C
Program received signal SIGINT, Interrupt.
0x00007ffff48d7e0f in nvompAcquireLock (lock=lock@entry=0x7ffff72e2bb8 <__pgi_uacc_data+72>) at nvomp_lock.h:38
38 nvomp_lock.h: Aucun fichier ou dossier de ce type.
(cuda-gdb) bt
#0 0x00007ffff48d7e0f in nvompAcquireLock (lock=lock@entry=0x7ffff72e2bb8 <__pgi_uacc_data+72>) at nvomp_lock.h:38
#1 __nvomp_acquire_lock (lock=lock@entry=0x7ffff72e2bb8 <__pgi_uacc_data+72>) at nvomp_lock.c:17
#2 0x00007ffff70b9a9d in __pgi_uacc_enumerate () at …/…/src/init.c:579
#3 __pgi_uacc_initialize () at …/…/src/init.c:693
#4 0x00007ffff693116c in do_managed_new (n=23, func_name=0x7ffff6932b58 “new”) at …/…/src/cuda_managed_new.cpp:59
#5 0x00007ffff6930e44 in __pgi_managed_new (n=23) at …/…/src/cuda_managed_new.cpp:153
#6 0x0000000000d3f549 in allocate (this=0x7fffffff8b28, __n=23, _T567_39031=0x0)
at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/ext/new_allocator.h:111
#7 0x0000000000d10ee9 in allocate (__a=…, __n=23) at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/bits/alloc_traits.h:436
#8 0x0000000000d2bb6e in _M_create (this=0x7fffffff8b28, __capacity=0x7fffffff8a88, __old_capacity=0)
at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/bits/basic_string.tcc:153
#9 0x0000000000d2d704 in _M_construct (this=0x7fffffff8b28, __beg=0x5353140 “SystemCategories.proto”, __end=0x5353156 “”, _T13_38380=…)
at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/bits/basic_string.tcc:219
#10 0x00007ffff0fb6a90 in ?? () from /lib64/libcudadebugger.so.1
#11 0x00007ffff0fb261d in ?? () from /lib64/libcudadebugger.so.1
#12 0x00007ffff0f60160 in ?? () from /lib64/libcudadebugger.so.1
#13 0x00007ffff0fbc46f in ?? () from /lib64/libcudadebugger.so.1
#14 0x00007ffff0fbc45a in ?? () from /lib64/libcudadebugger.so.1
#15 0x00007ffff7de3e0a in call_init (l=, argc=argc@entry=5, argv=argv@entry=0x7fffffffcbd8, env=env@entry=0x5169f60) at dl-init.c:72
#16 0x00007ffff7de3f0a in call_init (env=0x5169f60, argv=0x7fffffffcbd8, argc=5, l=) at dl-init.c:118
#17 _dl_init (main_map=0x5366290, argc=5, argv=0x7fffffffcbd8, env=0x5169f60) at dl-init.c:119
#18 0x00007ffff3b9e1dc in _dl_catch_exception () from /lib64/libc.so.6
#19 0x00007ffff7de7b2e in dl_open_worker (a=0x7fffffff90e0) at dl-open.c:819
#20 dl_open_worker (a=0x7fffffff90e0) at dl-open.c:782
#21 0x00007ffff3b9e184 in _dl_catch_exception () from /lib64/libc.so.6
#22 0x00007ffff7de7d11 in _dl_open (file=0x7ffff1a4c7c0 “libcudadebugger.so.1”, mode=, caller_dlopen=0x7ffff1870e37, nsid=-2, argc=5,
argv=, env=0x5169f60) at dl-open.c:900
#23 0x00007ffff7bcc1ea in dlopen_doit () from /lib64/libdl.so.2
#24 0x00007ffff3b9e184 in _dl_catch_exception () from /lib64/libc.so.6
#25 0x00007ffff3b9e243 in _dl_catch_error () from /lib64/libc.so.6
#26 0x00007ffff7bcc969 in _dlerror_run () from /lib64/libdl.so.2
#27 0x00007ffff7bcc28a in dlopen@@GLIBC_2.2.5 () from /lib64/libdl.so.2
#28 0x00007ffff1870e37 in ?? () from /lib64/libcuda.so
#29 0x00007ffff15a4c7c in ?? () from /lib64/libcuda.so
–Type for more, q to quit, c to continue without paging–
#30 0x00007ffff1649a86 in ?? () from /lib64/libcuda.so
#31 0x00007ffff16c0df8 in ?? () from /lib64/libcuda.so
#32 0x00007ffff6b56b98 in __pgi_uacc_cuda_init () at …/…/src/cuda_init.c:464
#33 0x00007ffff70ba1ff in __pgi_uacc_enumerate () at …/…/src/init.c:607
#34 __pgi_uacc_initialize () at …/…/src/init.c:693
#35 0x00007ffff693116c in do_managed_new (n=60, func_name=0x7ffff6932b58 “new”) at …/…/src/cuda_managed_new.cpp:59
#36 0x00007ffff6930e44 in __pgi_managed_new (n=60) at …/…/src/cuda_managed_new.cpp:153
#37 0x0000000000d3f549 in allocate (this=0x7fffffff98a0, __n=60, _T567_39031=0x0)
at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/ext/new_allocator.h:111
#38 0x0000000000d10ee9 in allocate (__a=…, __n=60) at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/bits/alloc_traits.h:436
#39 0x0000000000d2bb6e in _M_create (this=0x7fffffff98a0, __capacity=0x7fffffff9878, __old_capacity=0)
at /usr/lib/gcc/x86_64-redhat-linux/8/…/…/…/…/include/c++/8/bits/basic_string.tcc:153
#40 0x0000000003a8839f in void std::__cxx11::basic_string<char, std::char_traits, std::allocator >::_M_construct<char const*>(char const*, char const*, std::forward_iterator_tag) [clone .constprop.140] ()
#41 0x0000000003a905ce in vtksys::SystemTools::CollapseFullPath(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&, char const*) ()
#42 0x0000000003a90756 in vtksys::SystemTools::AddKeepPath(std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&) ()
#43 0x0000000003a90824 in vtksys::SystemTools::ClassInitialize() ()
#44 0x0000000000bc8b22 in _GLOBAL__sub_I_vtkGenericEnSightReader.cxx ()
#45 0x0000000003ccabfd in __libc_csu_init ()
#46 0x00007ffff3a6ec7e in __libc_start_main () from /lib64/libc.so.6
(cuda-gdb) q
A debugging session is active.
Inferior 1 [process 584806] will be killed.
Quit anyway? (y or n) y
[rkuate@odile-gpu01 developLast]$ nvidia-smi
Tue Nov 7 15:28:51 2023
±--------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |
|-----------------------------------------±---------------------±---------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 NVIDIA A100 80GB PCIe Off | 00000000:17:00.0 Off | 0 |
| N/A 32C P0 43W / 300W | 4MiB / 81920MiB | 0% Default |
| | | Disabled |
±----------------------------------------±---------------------±---------------------+
±--------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| No running processes found |
±--------------------------------------------------------------------------------------+