Could you tell me why the CUDA function “cudaEventRecord()” a error message “invalid resource handle” ?
In order to get timing information on echo GPU, I use the CUDA function “cudaEventRecord()”.
However I got a error message "FILE: test.cuf LINE: 72 Error: invalid resource handle ".
When I ran on one GPU, there was no problem.
However when I ran more than two GPUs, the CUDA function “cudaEventRecord()” put the error message.
I suppose it is not possible for me to pass variables with the attribute dimension to the CUDA function “cudaEventRecord()”.
Here is the test program.
#define CHECK(err) _CHECK(err,FILE,LINE)
module EventTest
use cudafor
implicit none
contains
subroutine _CHECK(err, filename, linenum)
integer(kind=4),intent(in) :: err
character(kind=1,len=*), optional :: filename
integer(kind=4), optional :: linenum
if( err /= cudaSuccess ) then
if(present(filename) .and. present(linenum)) then
write(,‘(“FILE:”,1x,a,1x,“LINE:”,1x,i10,1x,“Error:”,1x,a)’),filename,linenum,cudaGetErrorString(err)
else
write(,‘(“Error:”,1x,a)’)cudaGetErrorString(err)
end if
stop
end if
end subroutine _CHECK
attributes(global) subroutine gpu_kernel(n,d_a)
integer,intent(in),value :: n
integer,dimension(n),intent(out),device :: d_a
integer :: idx
idx = threadIdx%x + (blockIdx%x - 1) * blockDim%x
if( ( idx < 1 ) .or. ( idx > n ) ) return
d_a(idx) = idx
end subroutine gpu_kernel
end module EventTest
program EventTestMain
use EventTest
integer,parameter :: n = 10000000
integer,parameter :: tbx = 128
integer :: devnum
integer :: devId
integer,dimension(:,:),pointer,device :: d_a
type(dim3) :: grid,tblock
type(cudaEvent),dimension(:),pointer :: startEvent, stopEvent
tblock = dim3(tbx,1,1)
grid = dim3( (n + tblock%x - 1) / tblock%x ,1,1)
! Number of GPUs
call CHECK ( cudaGetDeviceCount(devnum) )
allocate( d_a(n,0:devnum -1) )
allocate( startEvent(0:devnum -1) )
allocate( stopEvent(0:devnum -1) )
! Create Events
do devId = 0,devnum - 1
call CHECK( cudaEventCreate(startEvent(devId)) )
call CHECK( cudaEventCreate(stopEvent(devId)) )
end do
do devId = 0,devnum - 1
! Start Event
call CHECK( cudaDeviceSynchronize() )
call CHECK( cudaEventRecord(startEvent(devId),0) )
call CHECK( cudaSetDevice(devId) )
call gpu_kernel<<<grid,tblock>>>(n,d_a(:,devId))
! Stop Event
call CHECK( cudaEventRecord(stopEvent(devId),0) )
call CHECK( cudaDeviceSynchronize() )
end do
! Timing information
do devId = 0,devnum - 1
call CHECK( cudaEventElapsedTime(time,startEvent(devId),stopEvent(devId)) )
write(*,‘(“Device Id:”,3x,i2,3x,“Elapsed Time:”,3x,f10.8)’)devId,time
end do
! Destroy Events
do devId = 0,devnum - 1
call CHECK( cudaEventDestroy(startEvent(devId)) )
call CHECK( cudaEventDestroy(stopEvent(devId)) )
end do
deallocate(startEvent)
deallocate(stopEvent)
deallocate(d_a)
end program EventTestMain