I have a very strange problem. I am running the code below (omptest.cuf) for:
export OMP_NUM_THREADS=3
I have 3 NVIDIA graphics cards, so I am attaching one card to each openMP thread.
I am compiling the code with:
pgfortran omptst.cuf -mp
Now, if I run the code as it stands, the code hangs - it tells me that all three cards have been initialized - but it just sits there.
However, if I comment out the call to curk4 with argument Fdev (see my comments in the code) , the code finishes, almost instantaneously as it should as the code doesn’t actually do anything. Notice though that there is a return statement before this call. Commenting out the call should make no difference at all!
Anyone got any idea what’s going on? Is this me, or a compiler bug?
Rob.
module curk4_mod
use cudafor
implicit none
contains
! Kernel subroutines:
subroutine curk4( Fdev )
use prec_mod
implicit none
real( gpu ), device, intent(in) :: Fdev(2)
print*,'Dont even bother to call a kernel function....'
end subroutine curk4
! OMP wrapper:
subroutine omptst( F )
use prec_mod
use cudafor
implicit none
real (gpu) :: F (2)
real (gpu), device :: Fdev(2)
integer :: iflag,idev
return
!-----------------------
! If I comment out this next line, the code finishes.
! If I leave it in, the code hangs - even though there is a return
! statement above!
!-----------------------
call curk4( Fdev )
end subroutine
end module curk4_mod
program wrapper
use cudafor
use prec_mod
use curk4_mod
implicit none
integer :: i,j
integer :: numDev, iflag
real :: F(2),F2(2)
!$OMP PARALLEL PRIVATE(i,F2,iflag) SHARED(F)
!$OMP DO
do i=0,2
iflag = cudaSetDevice(i)
print*,'Device ',i,' set'
F2 = F
call omptst( F2 )
enddo
!$OMP END DO
!$OMP END PARALLEL
iflag = cudaThreadSynchronize()
print*,'Finished.'
end