I am trying to run following fortran code:
do thrdnm=0,ngpus-1
call acc_set_device_num(mod(thrdnm,ngpus),acc_device_nvidia)
!$acc enter data create(guv)
enddo
do iv=1,natv
thrdnm = mod(iv-1,ngpus)
call acc_set_device_num(thrdnm,acc_device_nvidia)
!$acc update device(guv(1:lgk,iv))
call rlft3i (guv(1:lgk,iv), ng3)
!$acc update self(guv(1:lgk,iv))
enddo
do thrdnm=0,ngpus-1
call acc_set_device_num(mod(thrdnm,ngpus),acc_device_nvidia)
!$acc exit data delete(guv)
enddo
where rlft3i subroutine calls cufft library:
subroutine rlft3i (fdata, ng3, key)
use cufft
use openacc
implicit none
integer :: ng3(3)
integer :: key
real(4), dimension ((ng3(1)+2)*ng3(2)*ng3(3)) :: fdata
integer :: ig,ig2, i,j,k, ngr,ngk, nx,ny,nz
integer(4) :: ierr
real, dimension ((ng3(1)+2)*ng3(2)*ng3(3)) :: work
ngr = ng3(1)*ng3(2)*ng3(3)
ngk = (ng3(1)+2)*ng3(2)*ng3(3)
nx = ng3(1)
ny = ng3(2)
nz = ng3(3)
if (key == 1) then
!$acc data create(work) present(fdata)
!$acc host_data use_device(fdata,work)
ierr = cufftExecR2C(plan_forward,fdata,work)
!$acc end host_data
!$acc kernels present(fdata,work)
fdata(1:ngk) = work(1:ngk)/ngr
!$acc end kernels
!$acc end data
return
endif
if (key == -1) then
!$acc data create(work) present(fdata)
!$acc host_data use_device(fdata,work)
ierr = cufftExecC2R(plan_backward,fdata,work)
!$acc end host_data
!$acc kernels present(fdata,work)
fdata(1:ngk) = work(1:ngk)
!$acc end kernels
!$acc end data
return
endif
end subroutine
the problem is that the acc_set_device_num does not switch devices for cufft library and the program fails. However, when I comment calls of the acc_set_device_num subroutines, the code works. Can I somehow switch devices for the cufft library from within cufft fortran interface? If not, maybe I could switch them using some CUDA function?