ENOMEM when running CUDA sample on host GPU where another GPU is passed through via IOMMU/vfio-pci

Hi there,
I’m after some advice on how to configure a linux host to pass through one of two GPUs to a guest VM while leaving the remaining GPU available to the host for CUDA applications. So far I have managed to successfully configure passthough to the guest VM, but it appears that after the GPU is passed through, there is some issue preventing CUDA from initialising on the host. The host is an Ubuntu 18.04 system running kernel 4.15.0-50-generic and the GPUs are both TITAN Xp. While the examples below were generated using the 410.48 driver and CUDA 10.0 samples, the latest NVIDIA driver shows the same behaviour.

Thanks in advance for any assistance!


Below I give (1) an example of the error, (2) an example of the expected output, (3) strace output when the error is encountered, (4) lspci output, (5) nvidia-smi output, and (6) dmesg output. Where possible, I compare with and without vfio-pci passthrough enabled. Examples/logs below are given with the guest VM shut off.

[1] Example of error:

~/NVIDIA_CUDA-10.0_Samples/0_Simple/matrixMul$ ./matrixMul 
[Matrix Multiply Using CUDA] - Starting...
GPU Device 0: "TITAN Xp" with compute capability 6.1

MatrixA(320,320), MatrixB(640,320)

(test hangs)

[2] Expected output:
This was generated by preventing vfio-pci from claiming the GPU to be passed through. This test passes for both GPUs.

~/NVIDIA_CUDA-10.0_Samples/0_Simple/matrixMul$ ./matrixMul 
[Matrix Multiply Using CUDA] - Starting...
GPU Device 0: "TITAN Xp" with compute capability 6.1

MatrixA(320,320), MatrixB(640,320)
Computing result using CUDA Kernel...
done
Performance= 1453.61 GFlop/s, Time= 0.090 msec, Size= 131072000 Ops, WorkgroupSize= 1024 threads/block
Checking computed result for correctness: Result = PASS

NOTE: The CUDA Samples are not meant for performancemeasurements. Results may vary when GPU Boost is enabled.

[3] Output of strace on hung matrixMul process:

strace -p 9280
strace: Process 9280 attached
close(12)                               = 0
mmap(0x15249c000000, 4294967296, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 1103806595072, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 554050781184, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 279172874240, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 141733920768, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 73014444032, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 38654705664, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 21474836480, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 12884901888, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 8589934592, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 6442450944, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 5368709120, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4831838208, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4563402752, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4429185024, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4362076160, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4328521728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4311744512, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4303355904, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4299161600, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4297064448, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4296015872, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4295491584, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4295229440, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4295098368, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4295032832, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4295000064, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294983680, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294975488, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294971392, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294969344, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294968320, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967808, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967552, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967424, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967360, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967328, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967312, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967304, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967300, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967298, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(0x15249c000000, 4294967297, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 12
fstat(12, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(12, "200000000-300200000 ---p 0000000"..., 1024) = 1024
close(12)
...

additional info from cuda-gdb

(cuda-gdb) info stack
#0  0x00007ffff77c5a15 in strtoll_l () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x00007ffff77e1b21 in psiginfo () from /lib/x86_64-linux-gnu/libc.so.6
#2  0x00007ffff77deeba in sscanf () from /lib/x86_64-linux-gnu/libc.so.6
#3  0x00007ffff66d3af3 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4  0x00007ffff66d56ae in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#5  0x00007ffff67d61ef in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#6  0x00007ffff66c6580 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#7  0x00007ffff66d01c6 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8  0x00007ffff67b55f3 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9  0x00007ffff67b59a6 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#10 0x00007ffff667c3a9 in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#11 0x00007ffff667e59f in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#12 0x00007ffff65af09c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#13 0x00007ffff6711726 in cuDevicePrimaryCtxRetain () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#14 0x000055555558f2c0 in cudart::contextStateManager::initPrimaryContext(cudart::device*) ()
#15 0x000055555558fdcd in cudart::contextStateManager::initDriverContext() ()
#16 0x00005555555908ec in cudart::contextStateManager::getRuntimeContextState(cudart::contextState**, bool) ()
#17 0x000055555558413c in cudart::doLazyInitContextState() ()
#18 0x0000555555562258 in cudart::cudaApiMalloc(void**, unsigned long) ()
#19 0x00005555555a499f in cudaMalloc ()
#20 0x000055555555abd0 in MatrixMultiply(int, char**, int, dim3 const&, dim3 const&) ()
#21 0x000055555555b6b8 in main ()

[4] Output of lspci for each device:
lspci output generated with vfio-pci claiming on of the GPUs

0a:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP102 [TITAN Xp] [10de:1b02] (rev a1)
	Subsystem: NVIDIA Corporation GP102 [TITAN Xp] [10de:11df]
	Kernel driver in use: vfio-pci
	Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia
0a:00.1 Audio device [0403]: NVIDIA Corporation GP102 HDMI Audio Controller [10de:10ef] (rev a1)
	Subsystem: NVIDIA Corporation GP102 HDMI Audio Controller [10de:11df]
	Kernel driver in use: vfio-pci
	Kernel modules: snd_hda_intel
...
42:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP102 [TITAN Xp] [10de:1b02] (rev a1)
	Subsystem: NVIDIA Corporation GP102 [TITAN Xp] [10de:11df]
	Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia
42:00.1 Audio device [0403]: NVIDIA Corporation GP102 HDMI Audio Controller [10de:10ef] (rev a1)
	Subsystem: NVIDIA Corporation GP102 HDMI Audio Controller [10de:11df]
	Kernel driver in use: snd_hda_intel
	Kernel modules: snd_hda_intel

lspci output generated without vfio-pci claiming on of the GPUs

0a:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP102 [TITAN Xp] [10de:1b02] (rev a1)
	Subsystem: NVIDIA Corporation GP102 [TITAN Xp] [10de:11df]
	Kernel driver in use: nvidia
	Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia
0a:00.1 Audio device [0403]: NVIDIA Corporation GP102 HDMI Audio Controller [10de:10ef] (rev a1)
	Subsystem: NVIDIA Corporation GP102 HDMI Audio Controller [10de:11df]
	Kernel driver in use: snd_hda_intel
	Kernel modules: snd_hda_intel
...
42:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP102 [TITAN Xp] [10de:1b02] (rev a1)
	Subsystem: NVIDIA Corporation GP102 [TITAN Xp] [10de:11df]
	Kernel driver in use: nvidia
	Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia
42:00.1 Audio device [0403]: NVIDIA Corporation GP102 HDMI Audio Controller [10de:10ef] (rev a1)
	Subsystem: NVIDIA Corporation GP102 HDMI Audio Controller [10de:11df]
	Kernel driver in use: snd_hda_intel
	Kernel modules: snd_hda_intel

[5] Output of nvidia-smi (everything seems fine here):
nvidia-smi with vfio-pci claiming one of the GPUs

Sat May 18 10:46:32 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.48                 Driver Version: 410.48                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  TITAN Xp            Off  | 00000000:42:00.0 Off |                  N/A |
| 23%   37C    P8    17W / 250W |     10MiB / 12194MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

nvidia-smi without vfio-pci claiming one of the GPUs

Sat May 18 11:00:35 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.48                 Driver Version: 410.48                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  TITAN Xp            Off  | 00000000:0A:00.0 Off |                  N/A |
| 19%   34C    P0    56W / 250W |      0MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN Xp            Off  | 00000000:42:00.0 Off |                  N/A |
| 26%   41C    P0    58W / 250W |      0MiB / 12194MiB |      6%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

[6] Output of dmesg:
dmesg output generated with vfio-pci claiming one of the GPUs

[    0.000000] Command line: BOOT_IMAGE=/boot/vmlinuz-4.15.0-50-generic root=UUID=9b4a0707-5715-4c26-8267-3853d45feb25 ro quiet splash nomdmonddf nomdmonisw intel_iommu=on modprobe.blacklist=nouveau,nvidia,nvidia_uvm,nvidia_drm,nvidia_modeset vt.handoff=1
[    0.000000] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-4.15.0-50-generic root=UUID=9b4a0707-5715-4c26-8267-3853d45feb25 ro quiet splash nomdmonddf nomdmonisw intel_iommu=on modprobe.blacklist=nouveau,nvidia,nvidia_uvm,nvidia_drm,nvidia_modeset vt.handoff=1
...
[  389.145729] nvidia: module verification failed: signature and/or required key missing - tainting kernel
[  389.158074] nvidia-nvlink: Nvlink Core is being initialized, major device number 235
[  389.158879] vfio-pci 0000:0a:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
[  389.158883] nvidia 0000:42:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=io+mem
[  389.258457] NVRM: The NVIDIA probe routine was not called for 1 device(s).
[  389.258458] NVRM: This can occur when a driver such as: 
               NVRM: nouveau, rivafb, nvidiafb or rivatv 
               NVRM: was loaded and obtained ownership of the NVIDIA device(s).
[  389.258459] NVRM: Try unloading the conflicting kernel module (and/or
               NVRM: reconfigure your kernel without the conflicting
               NVRM: driver(s)), then try loading the NVIDIA kernel module
               NVRM: again.
[  389.258461] NVRM: loading NVIDIA UNIX x86_64 Kernel Module  410.48  Thu Sep  6 06:36:33 CDT 2018 (using threaded interrupts)
[  389.323027] nvidia-uvm: Loaded the UVM driver in 8 mode, major device number 510
[  389.848994] resource sanity check: requesting [mem 0x000c0000-0x000fffff], which spans more than PCI Bus 0000:00 [mem 0x000c0000-0x000dffff window]
[  389.849146] caller os_map_kernel_space.part.7+0xda/0x120 [nvidia] mapping multiple BARs

dmesg output generated without vfio-pci claiming one of the GPUs

[    0.000000] Command line: BOOT_IMAGE=/boot/vmlinuz-4.15.0-50-generic root=UUID=9b4a0707-5715-4c26-8267-3853d45feb25 ro quiet splash nomdmonddf nomdmonisw intel_iommu=on modprobe.blacklist=nouveau,nvidia,nvidia_uvm,nvidia_drm,nvidia_modeset vt.handoff=1
[    0.000000] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-4.15.0-50-generic root=UUID=9b4a0707-5715-4c26-8267-3853d45feb25 ro quiet splash nomdmonddf nomdmonisw intel_iommu=on modprobe.blacklist=nouveau,nvidia,nvidia_uvm,nvidia_drm,nvidia_modeset vt.handoff=1
...
[  159.021388] nvidia: module verification failed: signature and/or required key missing - tainting kernel
[  159.033875] nvidia-nvlink: Nvlink Core is being initialized, major device number 235
[  159.034478] nvidia 0000:0a:00.0: enabling device (0000 -> 0003)
[  159.034627] nvidia 0000:0a:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=none
[  159.134599] nvidia 0000:42:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=io+mem
[  159.234361] NVRM: loading NVIDIA UNIX x86_64 Kernel Module  410.48  Thu Sep  6 06:36:33 CDT 2018 (using threaded interrupts)
[  159.405021] nvidia-uvm: Loaded the UVM driver in 8 mode, major device number 510
[  161.077620] resource sanity check: requesting [mem 0x000c0000-0x000fffff], which spans more than PCI Bus 0000:00 [mem 0x000c0000-0x000dffff window]
[  161.077740] caller os_map_kernel_space.part.7+0xda/0x120 [nvidia] mapping multiple BARs
[  360.072376] resource sanity check: requesting [mem 0x000c0000-0x000fffff], which spans more than PCI Bus 0000:00 [mem 0x000c0000-0x000dffff window]
[  360.072521] caller os_map_kernel_space.part.7+0xda/0x120 [nvidia] mapping multiple BARs

Additional info from a toy “hello world” CUDA example:

#include "stdio.h"

__global__ void cuda_hello(){
    printf("Hello World from GPU!\n");
}

int main() {
    cuda_hello<<<1,1>>>();
    return 0;
}

The resulting executable, compiled with nvcc -g -G, hangs:

(cuda-gdb) info stack
#0  0x00007ffff7888613 in __fxstat64 () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x00007ffff77fd82f in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#2  0x00007ffff780d0b2 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
#3  0x00007ffff780bf24 in _IO_file_underflow () from /lib/x86_64-linux-gnu/libc.so.6
#4  0x00007ffff780d182 in _IO_default_uflow () from /lib/x86_64-linux-gnu/libc.so.6
#5  0x00007ffff77ff1fa in _IO_getline_info () from /lib/x86_64-linux-gnu/libc.so.6
#6  0x00007ffff77fe1ab in fgets () from /lib/x86_64-linux-gnu/libc.so.6
#7  0x00007ffff66d3ab2 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8  0x00007ffff66d56ae in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9  0x00007ffff67d61ef in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#10 0x00007ffff66c6580 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#11 0x00007ffff66d01c6 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#12 0x00007ffff67b55f3 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#13 0x00007ffff67b59a6 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#14 0x00007ffff667c3a9 in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#15 0x00007ffff667e59f in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#16 0x00007ffff65af09c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#17 0x00007ffff6711726 in cuDevicePrimaryCtxRetain () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#18 0x000055555558d950 in cudart::contextStateManager::initPrimaryContext(cudart::device*) ()
#19 0x000055555558e4dd in cudart::contextStateManager::initDriverContext() ()
#20 0x000055555558ef7c in cudart::contextStateManager::getRuntimeContextState(cudart::contextState**, bool) ()
#21 0x0000555555576849 in cudart::cudaApiLaunchKernelCommon(void const*, dim3, dim3, void**, unsigned long, CUstream_st*, bool) ()
#22 0x0000555555576cf7 in cudart::cudaApiLaunchKernel(void const*, dim3, dim3, void**, unsigned long, CUstream_st*) ()
#23 0x00005555555ab0b5 in cudaLaunchKernel ()
#24 0x000055555555ad95 in cudaLaunchKernel<char> (
    func=0x55555555ac97 <cuda_hello()> "UH1145502676777720]03UH1145H0354

(cuda-gdb) info stack
#0 0x00007ffff7888613 in __fxstat64 () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x00007ffff77fd82f in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x00007ffff780d0b2 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
#3 0x00007ffff780bf24 in _IO_file_underflow () from /lib/x86_64-linux-gnu/libc.so.6
#4 0x00007ffff780d182 in _IO_default_uflow () from /lib/x86_64-linux-gnu/libc.so.6
#5 0x00007ffff77ff1fa in _IO_getline_info () from /lib/x86_64-linux-gnu/libc.so.6
#6 0x00007ffff77fe1ab in fgets () from /lib/x86_64-linux-gnu/libc.so.6
#7 0x00007ffff66d3ab2 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8 0x00007ffff66d56ae in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9 0x00007ffff67d61ef in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#10 0x00007ffff66c6580 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#11 0x00007ffff66d01c6 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#12 0x00007ffff67b55f3 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#13 0x00007ffff67b59a6 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#14 0x00007ffff667c3a9 in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#15 0x00007ffff667e59f in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#16 0x00007ffff65af09c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#17 0x00007ffff6711726 in cuDevicePrimaryCtxRetain () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#18 0x000055555558d950 in cudart::contextStateManager::initPrimaryContext(cudart::device*) ()
#19 0x000055555558e4dd in cudart::contextStateManager::initDriverContext() ()
#20 0x000055555558ef7c in cudart::contextStateManager::getRuntimeContextState(cudart::contextState**, bool) ()
#21 0x0000555555576849 in cudart::cudaApiLaunchKernelCommon(void const*, dim3, dim3, void**, unsigned long, CUstream_st*, bool) ()
#22 0x0000555555576cf7 in cudart::cudaApiLaunchKernel(void const*, dim3, dim3, void**, unsigned long, CUstream_st*) ()
#23 0x00005555555ab0b5 in cudaLaunchKernel ()
#24 0x000055555555ad95 in cudaLaunchKernel (
func=0x55555555ac97 <cuda_hello()> “UH\211\345\350\326\376\377\377\220]\303UH\211\345H\203\354\020H\211}\370H\213E\370H\211\005&\324\a”, gridDim=…, blockDim=…, args=0x7fffffffe870, sharedMem=0, stream=0x0)
at /usr/local/cuda/bin/…//include/cuda_runtime.h:202
#25 0x000055555555ac3f in __device_stub__Z10cuda_hellov () at /tmp/tmpxft_00004f9a_00000000-5_test.cudafe1.stub.c:13
#26 0x000055555555aca0 in cuda_hello () at test.cu:3
#27 0x000055555555aaff in main () at test.cu:8

20H11}70H13E70H11

(cuda-gdb) info stack
#0 0x00007ffff7888613 in __fxstat64 () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x00007ffff77fd82f in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x00007ffff780d0b2 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
#3 0x00007ffff780bf24 in _IO_file_underflow () from /lib/x86_64-linux-gnu/libc.so.6
#4 0x00007ffff780d182 in _IO_default_uflow () from /lib/x86_64-linux-gnu/libc.so.6
#5 0x00007ffff77ff1fa in _IO_getline_info () from /lib/x86_64-linux-gnu/libc.so.6
#6 0x00007ffff77fe1ab in fgets () from /lib/x86_64-linux-gnu/libc.so.6
#7 0x00007ffff66d3ab2 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#8 0x00007ffff66d56ae in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#9 0x00007ffff67d61ef in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#10 0x00007ffff66c6580 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#11 0x00007ffff66d01c6 in cuEGLApiInit () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#12 0x00007ffff67b55f3 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#13 0x00007ffff67b59a6 in cuVDPAUCtxCreate () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#14 0x00007ffff667c3a9 in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#15 0x00007ffff667e59f in cudbgApiDetach () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#16 0x00007ffff65af09c in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#17 0x00007ffff6711726 in cuDevicePrimaryCtxRetain () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#18 0x000055555558d950 in cudart::contextStateManager::initPrimaryContext(cudart::device*) ()
#19 0x000055555558e4dd in cudart::contextStateManager::initDriverContext() ()
#20 0x000055555558ef7c in cudart::contextStateManager::getRuntimeContextState(cudart::contextState**, bool) ()
#21 0x0000555555576849 in cudart::cudaApiLaunchKernelCommon(void const*, dim3, dim3, void**, unsigned long, CUstream_st*, bool) ()
#22 0x0000555555576cf7 in cudart::cudaApiLaunchKernel(void const*, dim3, dim3, void**, unsigned long, CUstream_st*) ()
#23 0x00005555555ab0b5 in cudaLaunchKernel ()
#24 0x000055555555ad95 in cudaLaunchKernel (
func=0x55555555ac97 <cuda_hello()> “UH\211\345\350\326\376\377\377\220]\303UH\211\345H\203\354\020H\211}\370H\213E\370H\211\005&\324\a”, gridDim=…, blockDim=…, args=0x7fffffffe870, sharedMem=0, stream=0x0)
at /usr/local/cuda/bin/…//include/cuda_runtime.h:202
#25 0x000055555555ac3f in __device_stub__Z10cuda_hellov () at /tmp/tmpxft_00004f9a_00000000-5_test.cudafe1.stub.c:13
#26 0x000055555555aca0 in cuda_hello () at test.cu:3
#27 0x000055555555aaff in main () at test.cu:8

05&24\a", gridDim=..., blockDim=..., args=0x7fffffffe870, sharedMem=0, stream=0x0)
    at /usr/local/cuda/bin/..//include/cuda_runtime.h:202
#25 0x000055555555ac3f in __device_stub__Z10cuda_hellov () at /tmp/tmpxft_00004f9a_00000000-5_test.cudafe1.stub.c:13
#26 0x000055555555aca0 in cuda_hello () at test.cu:3
#27 0x000055555555aaff in main () at test.cu:8