Please, consider the following simple code, which represents a “stupid” scheme of the code I’m porting in CUDA Fortran.
module mod_test
use cudafor
implicit none
contains
attributes(device) real*8 function calc2(i,j,k,l)
implicit none
integer, value :: i,j,k,l
calc2 = (log(real(i))+log(real(j)))/exp(real(k))/exp(real(l))
end function
attributes(device) subroutine calc(counter,idx,Vettore)
implicit none
integer, value :: counter, idx
real*8, dimension(counter),device :: Vettore
real*8, dimension(32), shared :: vettore_shared
real*8 :: var
integer :: i,j,k,l, tid
tid = threadidx%x
do i=1,20
do j=1,20
do k=1,12
do l=1,200
!var = calc2(i,j,k,l) ! fast solution
vettore_shared(tid) = calc2(i,j,k,l) ! slow
!Vettore(idx) = calc2(i,j,k,l) ! slow
end do
enddo
enddo
enddo
end subroutine
attributes(global) subroutine kernel_test(counter,Vettore)
implicit none
integer, value :: counter
real*8, dimension(counter),device :: Vettore
integer :: idx
idx = (blockidx%x-1)*blockdim%x + threadidx%x
call calc(counter,idx,Vettore)
end subroutine
end module
program test
use cudafor
use mod_test
implicit none
integer :: nblocks, nthreads, counter
integer :: c1, c2
real*8, dimension(:), allocatable :: Vettore_host
real*8, dimension(:), allocatable, device :: Vettore_dev
counter = 9216
nthreads = 32
nblocks = counter/nthreads
call system_clock(count=c1)
allocate(Vettore_host(counter),Vettore_dev(counter))
Vettore_dev = 0.d0
Vettore_host = 0.d0
call kernel_test<<<nblocks,nthreads>>>(counter,Vettore_dev)
Vettore_host = Vettore_dev
deallocate(Vettore_host,Vettore_dev)
call system_clock(count=c2)
write(*,*) 'time ', c2-c1
end program
I have a routine which computes an element of the array Vettore_dev. If I update directly the device array Vettore_dev on the device subroutine calc, the code takes ~ 4 seconds to execute. The surprising fact for me is that if I use a shared array, i.e. vettore_shared, I still have ~ 4 sec of time to execute.
Why? I expected a much faster program using a shared memory array.
What’s wrong?
Thank you in advance for every comment!