Hi,
I have two questions regarding the code below.
The first question is regarding the copyin clause. In each solver routine call new data is generated by the gen_data(…) method on the host side. This data needs to be copied to the device in each solver call. Is my understanding correct that in the first call to the solver routine memory is allocated on the device and the data is copied from the host to the device. But in the second and later calls no new memory is allocated but the new data generated from gen_data(…) is copied to the device?
The second question is regarding the copyout clause. In the code below the solver routine is called several times from the main loop. At the end of the solver routine I want to update the data on the host. Until now I used the update host(…) directive but I want to make it more visible at the beginning of the data region which data is updated so I inserted a copyout clause. But with the copyout clause the printed data is incorrect. Is the use of copyout correct?
Thank you for your help!
main.f90
program main
use openacc
implicit none
integer, parameter :: n = 5
integer, numdevices
integer, i
real*8 vec(n)
numdevices = acc_get_num_devices(acc_device_nvidia)
if(numdevices.ne.0)then
call acc_set_device_num(mod(0,numdevices),acc_device_nvidia)
endif
vec = 0.0d0
! main loop
!$acc data copyin(vec)
do i=1,10
call solver(i, n, vec)
write(*,*)vec
enddo
!$acc end data
end program main
solver.f90
subroutine solver(it, n, vec)
use MyParams
implicit none
integer it, i, j, n
real*8, vec(n)
! allocate the memory
call alloc_data(n,VecData)
! generate new data
call gen_data(it * 1.0d0,VecData)
write(*,*)"Solver call: ",it
!$acc data copyin(VecData, VecData%vec, VecData%vec%vec1),&
!$acc copyout(vec)
do i=1,1
!$acc kernels present(vec, VecData%vec%vec1)
do j=1,10
vec(j) = VecData%vec%vec1(j)
enddo
!$acc end kernels
enddo
!!$acc update host(vec)
!$acc end data
! free all the data
call free_data(VecData)
end subroutine solver
my_type.f90
module MyData
implicit none
type vecs
real*8, allocatable :: vec1(:)
end type vecs
type Dtype
type(vecs) :: vec
end type Dtype
contains
subroutine alloc_data(x, vdata)
integer x
type(Dtype) :: vdata
allocate( vdata%vec%vec1(x) )
end subroutine alloc_data
subroutine gen_data(d, vdata)
real*8 d
type(Dtype) :: vdata
vdata%vec%vec1 = d
end subroutine gen_data
subroutine free_data(vdata)
type(Dtype) :: vdata
deallocate(vdata%vec%vec1)
end subroutine free_data
end module MyData
module MyParams
use MyData
implicit none
type(Dtype) :: VecData
end module MyParams
makefile
all:
mpif90 -acc -Mcuda -Minfo -ta=tesla my_type.f90 main.f90 solver.f90
clean:
rm *.o *.mod