I am running an iterative tomographic application on a Tesla 1070-1U system. The host runs openSUSE 11.1. Two of four GPUs in this system are used for the computation, each running within a dedicated pthread. At some point of iteration cudaMemcpy2D never returns back to the caller and thus caused the entire program to be stuck in the waiting state. The following are the trace from gdb where thread 1 is the main thread and the two GPUs are ruuning in thread 2 and 3 respectively. Any hint of the source of problem is greatly appreciated.
Shawn
(gdb) where
#0 0x00007f00fab08d59 in pthread_cond_wait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
#1 0x00000000004091f5 in CGpuThread::mWaitThreadsFinish ()
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:229
#2 0x0000000000409745 in CGpuThread::Reconstruct ()
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:97
#3 0x000000000040312c in CClientMain::mStartRecon (this=0x7fff6c554930)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ClientMain.cpp:388
#4 0x0000000000403ae2 in CClientMain::mProcessCmd (this=0x7fff6c554930)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ClientMain.cpp:106
#5 0x0000000000403e66 in CClientMain::DoIt (this=0x7fff6c554930)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ClientMain.cpp:65
#6 0x000000000040907d in main (argc=1, argv=0x7fff6c554a48)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Gpurecon.cpp:42
(gdb) info threads
3 Thread 0x7f00f8f8c950 (LWP 13792) 0xffffffffff600132 in ?? ()
2 Thread 0x7f00f878b950 (LWP 13793) 0xffffffffff600132 in ?? ()
- 1 Thread 0x7f00faf15700 (LWP 13790) 0x00007f00fab08d59 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
(gdb) thread 3
[Switching to thread 3 (Thread 0x7f00f8f8c950 (LWP 13792))]#0 0xffffffffff600132 in ?? ()
(gdb) where
#0 0xffffffffff600132 in ?? ()
#1 0x00007fff6c5ff650 in ?? ()
#2 0x00007fff6c5ff826 in gettimeofday ()
#3 0x00007f00f96426fa in gettimeofday () from /lib64/libc.so.6
#4 0x00007f00fa64a432 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f00fa12960a in ?? () from /usr/lib64/libcuda.so.1
#6 0x00007f00fa12031e in ?? () from /usr/lib64/libcuda.so.1
#7 0x00007f00fa117f8a in ?? () from /usr/lib64/libcuda.so.1
#8 0x00007f00fa1066be in ?? () from /usr/lib64/libcuda.so.1
#9 0x00007f00fa197d5e in ?? () from /usr/lib64/libcuda.so.1
#10 0x00007f00fa8e33e9 in ?? ()
from /usr/local/cuda/lib64/libcudart.so.2
#11 0x00007f00fa8cf3c6 in cudaMemcpy2D ()
from /usr/local/cuda/lib64/libcudart.so.2
#12 0x0000000000482b45 in CPitchedPtr2D::ToHost(float*) ()
#13 0x000000000042452f in CBackProjImpl::ToHost (this=0x8790d0,
pVolume=0x6e0530, iSliceY=321)
at /home/szheng/gpuprojs/CuProjs/CuSartlib/CuSart/App/BackProjImpl.cpp:77
#14 0x00000000004546f8 in CTomoSartImpl::DoIt (this=0x879060,
uiIterStart=0, uiIterEnd=20, iY=321, pVolume=0x6e0530)
at /home/szheng/gpuprojs/CuProjs/CuSartlib/CuSart/App/TomoSartImpl.cpp:44
#15 0x0000000000422d06 in CTomoSart::DoIt (this=0x6e0f58, iStartY=0,
iSizeY=490) at TomoSart.cpp:60
#16 0x0000000000408176 in CDoSartRecon::DoIt (this=0x6e0e10, iStartY=0,
iSizeY=490)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Recon/DoSartRecon.cpp:39
#17 0x000000000040be31 in CReconMain::DoIt (this=0x7f00f8f8c090,
iThreadID=0, iDeviceID=0)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ReconMain.cpp:92
#18 0x0000000000409345 in CGpuThread::mReconstruct (this=0x6df428)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:209
#19 0x00000000004094d0 in CGpuThread::ThreadMain (this=0x6df428)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:177
#20 0x0000000000488748 in Util_Thread::ThreadFunc (pParam=0x6df428)
at Util_Thread.cpp:105
#21 0x00007f00fab05070 in start_thread () from /lib64/libpthread.so.0
#22 0x00007f00f968311d in clone () from /lib64/libc.so.6
#23 0x0000000000000000 in ?? ()
(gdb) thread 2
[Switching to thread 2 (Thread 0x7f00f878b950 (LWP 13793))]#0 0xffffffffff600132 in ?? ()
(gdb) where
#0 0xffffffffff600132 in ?? ()
#1 0x00007fff6c5ff650 in ?? ()
#2 0x00007fff6c5ff826 in gettimeofday ()
#3 0x00007f00f96426fa in gettimeofday () from /lib64/libc.so.6
#4 0x00007f00fa64a432 in ?? () from /usr/lib64/libcuda.so.1
#5 0x00007f00fa12960a in ?? () from /usr/lib64/libcuda.so.1
#6 0x00007f00fa12031e in ?? () from /usr/lib64/libcuda.so.1
#7 0x00007f00fa117f8a in ?? () from /usr/lib64/libcuda.so.1
#8 0x00007f00fa1066be in ?? () from /usr/lib64/libcuda.so.1
#9 0x00007f00fa197d5e in ?? () from /usr/lib64/libcuda.so.1
#10 0x00007f00fa8e33e9 in ?? ()
from /usr/local/cuda/lib64/libcudart.so.2
#11 0x00007f00fa8cf3c6 in cudaMemcpy2D ()
from /usr/local/cuda/lib64/libcudart.so.2
#12 0x0000000000482b45 in CPitchedPtr2D::ToHost(float*) ()
#13 0x000000000042452f in CBackProjImpl::ToHost (this=0x7effe408e3f0,
pVolume=0x6e0530, iSliceY=809)
at /home/szheng/gpuprojs/CuProjs/CuSartlib/CuSart/App/BackProjImpl.cpp:77
#14 0x00000000004546f8 in CTomoSartImpl::DoIt (this=0x7effe408be80,
uiIterStart=0, uiIterEnd=20, iY=809, pVolume=0x6e0530)
at /home/szheng/gpuprojs/CuProjs/CuSartlib/CuSart/App/TomoSartImpl.cpp:44
#15 0x0000000000422d06 in CTomoSart::DoIt (this=0x7effe408e798,
iStartY=490, iSizeY=490) at TomoSart.cpp:60
#16 0x0000000000408176 in CDoSartRecon::DoIt (this=0x7effe408e650,
iStartY=490, iSizeY=490)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Recon/DoSartRecon.cpp:39
#17 0x000000000040be31 in CReconMain::DoIt (this=0x7f00f878b090,
iThreadID=1, iDeviceID=1)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ReconMain.cpp:92
#18 0x0000000000409345 in CGpuThread::mReconstruct (this=0x6df4a0)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:209
#19 0x00000000004094d0 in CGpuThread::ThreadMain (this=0x6df4a0)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:177
#20 0x0000000000488748 in Util_Thread::ThreadFunc (pParam=0x6df4a0)
at Util_Thread.cpp:105
#21 0x00007f00fab05070 in start_thread () from /lib64/libpthread.so.0
#22 0x00007f00f968311d in clone () from /lib64/libc.so.6
#23 0x0000000000000000 in ?? ()
(gdb) thread 1
[Switching to thread 1 (Thread 0x7f00faf15700 (LWP 13790))]#0 0x00007f00fab08d59 in pthread_cond_wait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
(gdb) where
#0 0x00007f00fab08d59 in pthread_cond_wait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
#1 0x00000000004091f5 in CGpuThread::mWaitThreadsFinish ()
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:229
#2 0x0000000000409745 in CGpuThread::Reconstruct ()
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/GpuThread.cpp:97
#3 0x000000000040312c in CClientMain::mStartRecon (this=0x7fff6c554930)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ClientMain.cpp:388
#4 0x0000000000403ae2 in CClientMain::mProcessCmd (this=0x7fff6c554930)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ClientMain.cpp:106
#5 0x0000000000403e66 in CClientMain::DoIt (this=0x7fff6c554930)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Thread/ClientMain.cpp:65
#6 0x000000000040907d in main (argc=1, argv=0x7fff6c554a48)
at /home/szheng/gpuprojs/CuProjs/CuRecon/Proj/Gpurecon.cpp:42