Dear All,
Could let me know if the “managed” attributes should be declared in dummy arguments ?
I’d like to use the “unified memory”, but I don’t know if my source code is correct. Is it possible for you to check if it is correct ?
Here is my source code.
module gpu_kernel
use cudafor
implicit none
contains
attributes(global) subroutine stream_triad(n,a,b,c)
integer(kind=4),value :: n
real(kind=8),dimension(n),intent(in),managed :: a,b
real(kind=8),dimension(n),intent(inout),managed :: c
integer(kind=4) :: i,idx
real(kind=8) :: scalar
scalar = 0.5d0 * a(1)
idx = threadIdx%x + (blockIdx%x - 1) * blockDim%x
do i = idx,n,blockDim%xgridDim%x
if( i <= n ) then
c(i) = a(i) * scalarb(i)
end if
end do
end subroutine stream_triad
end module gpu_kernel
program unified_memory_test
use gpu_kernel
implicit none
integer(kind=4),parameter :: n=55000000,offset=0,ndim=n+offset,ntimes=10,nbpw=8
integer(kind=4),parameter :: nthreads = 128
integer(kind=4) :: i,j
integer(kind=4) :: nblocks
real(kind=8),dimension(:),allocatable,managed :: a,b,c
nblocks = (n + nthreads - 1) / nthreads
allocate(a(n))
allocate(b(n))
allocate(c(n))
do i = 1, n
a(i) = 2.0d0
b(i) = 0.5d0
c(i) = 0.0d0
end do
call stream_triad<<<nblocks,nthreads>>>(n,a,b,c)
cudaDeviceSynchronize()
deallocate(a)
deallocate(b)
deallocate(c)
end program unified_memory_test