$ PGI_ACC_DEBUG=1 optirun ./acc_c2.exe
ACC: detected 1 CUDA devices
ACC: device[1] is NVIDIA CUDA device 0
argument memory for queue 8 device:0x500100000 host:0x200000000
ACC: initialized 1 CUDA devices
ACC: device[2] is PGI native
ACC: device[0] is PGI native
pinitialize for thread 1
curr_devid for thread 1 is 1
pgi_uacc_begin( compute region, file=/opt/pgi/linux86-64/13.10/etc/samples/openacc/acc_c2i.c, function=main, lines=32:76, startline=54, endline=60, devid=0, threadid=1 )
pgi_uacc_begin( file=/opt/pgi/linux86-64/13.10/etc/samples/openacc/acc_c2i.c, function=main, lines=32:76, startline=54, endline=60, devid=1, threadid=1 ) dindex=1
pgi_uacc_enter( devid=1 )
pgi_uacc_dataon( devid=1, threadid=1 )
pgi_uacc_dataon(devptr=0x0,hostptr=0x7f959f894010,poffset=0,offset=0,stride=1,size=1000000,extent=-1,eltsize=4,lineno=54,name=r,flags=0xb00=create+present+copyout,threadid=1)
pgi_uacc_dataon( devid=1, threadid=1 ) dindex=1
NO map for host:0x7f959f894010
pgi_uacc_alloc(size=4000000,devid=1,threadid=1)
allocate device memory 0x500200000(4000256B)
pgi_uacc_alloc(size=4000000,devid=1,threadid=1) returns 0x500200000
map dev:0x500200000 host:0x7f959f894010 size:4000000 offset:0 data[dev:0x500200000 host:0x7f959f894010 size:4000000] (line:54 name:r)
alloc done with devptr at 0x500200000
pgi_uacc_dataon( devid=1, threadid=1 )
pgi_uacc_dataon(devptr=0x0,hostptr=0x7f959fc65010,poffset=0,offset=0,stride=1,size=1000000,extent=-1,eltsize=4,lineno=54,name=a,flags=0x700=create+present+copyin,threadid=1)
pgi_uacc_dataon( devid=1, threadid=1 ) dindex=1
NO map for host:0x7f959fc65010
pgi_uacc_alloc(size=4000000,devid=1,threadid=1)
allocate device memory 0x5005e0000(4000256B)
pgi_uacc_alloc(size=4000000,devid=1,threadid=1) returns 0x5005e0000
map dev:0x5005e0000 host:0x7f959fc65010 size:4000000 offset:0 data[dev:0x5005e0000 host:0x7f959fc65010 size:4000000] (line:54 name:a)
alloc done with devptr at 0x5005e0000
pgi_uacc_dataupx(devptr=0x5005e0000,hostptr=0x7f959fc65010,poffset=0,offset=0,stride=1,size=1000000,extent=-1,eltsize=4,lineno=54,name=a,flags=0x0,threadid=1)
pgi_uacc_cuda_dataup1(devdst=0x5005e0000,hostsrc=0x7f959fc65010,offset=0,stride=1,size=1000000,eltsize=4,lineno=54,name=a,thread=0)
pgi_uacc_datadone( async=-1, devid=1 )
pgi_uacc_cuda_wait(lineno=-1,async=-1,dindex=1)
pgi_uacc_cuda_wait(sync on stream=(nil))
pgi_uacc_cuda_wait done
pgi_uacc_launch funcnum=4202208 argptr=(nil) sizeargs=0x7fffeb0f31c0 async=140737137029560 devid=-1
pgi_uacc_dataoff(devptr=0x5005e0000,hostptr=0x7f959fc65010,poffset=0,offset=0,stride=1,size=1000000,extent=-1,eltsize=4,lineno=60,name=a,flags=0x700=create+present+copyin,threadid=1)
mapped host:0x7f959fc65010 dev:0x5005e0000 offset:0 (host:0x7f959fc65010 dev:0x5005e0000 size:4000000 offset:0 data[host:0x7f959fc65010 dev:0x5005e0000 size:4000000] line:54 name:a)
unmap dev:0x5005e0000 host:0x7f959fc65010 size:4000000 offset:0 data[dev:0x5005e0000 host:0x7f959fc65010 size:4000000] (line:54 name:a)
pgi_uacc_free(ptr=0x5005e0000,devid=1,threadid=1)
save device memory 0x5005e0000(4000256B)
device data 0x5005e0000(4000256B) now available for reuse
pgi_uacc_dataoff(devptr=0x500200000,hostptr=0x7f959f894010,poffset=0,offset=0,stride=1,size=1000000,extent=-1,eltsize=4,lineno=60,name=r,flags=0xb00=create+present+copyout,threadid=1)
mapped host:0x7f959f894010 dev:0x500200000 offset:0 (host:0x7f959f894010 dev:0x500200000 size:4000000 offset:0 data[host:0x7f959f894010 dev:0x500200000 size:4000000] line:54 name:r)
pgi_uacc_datadownx(devptr=0x500200000,hostptr=0x7f959f894010,poffset=0,offset=0,stride=1,size=1000000,extent=-1,eltsize=4,lineno=60,name=r,flags=0x0,threadid=1)
pgi_uacc_cuda_datadown1(devdst=0x500200000,hostsrc=0x7f959f894010,offset=0,stride=1,size=1000000,eltsize=4,lineno=60,name=r,async=-1,dindex=1)
unmap dev:0x500200000 host:0x7f959f894010 size:4000000 offset:0 data[dev:0x500200000 host:0x7f959f894010 size:4000000] (line:54 name:r)
pgi_uacc_free(ptr=0x500200000,devid=1,threadid=1)
save device memory 0x500200000(4000256B)
device data 0x500200000(4000256B) now available for reuse
pgi_uacc_datadone( async=-1, devid=1 )
pgi_uacc_cuda_wait(lineno=-1,async=-1,dindex=1)
pgi_uacc_cuda_wait(sync on stream=(nil))
move (0x7f959f894010 <= 0x202100000, size=4000000)
pgi_uacc_cuda_wait done
acc_c2.exe: acc_c2i.c:71: main: Assertion `__builtin_fabsf(r > - e> ) < 0.000001f’ failed.