Hi All,
I have existing Fortran code (compiled with MPIFORT) which I want to add a CUDA-Fortran extension to (using PGF90). What I’m trying to do is pass a Fortran pointer to CUDA-Fortran, allocate memory on the device, and then pass this back to the original Fortran code.
I’ve used this query to do the same for Fortran/C-CUDA, and now I’m trying it for Fortran-CUDA.
Below is my code, which compiles but errors out with a segmentation fault at h2d
:
memtest.f90
program memtest
use iso_c_binding
integer, allocatable :: a(:)
type(c_ptr) :: pa(:)
integer(c_int) :: n=2
integer i
allocate(a(n))
a=1
write (*,*) "a = "
do i = 1,n
print*,a(i)
enddo
write (*,*) "init:"
call init_device(pa,n)
write (*,*) "h2d:"
call h2d(a,pa,n)
write (*,*) "increment:"
call increment(pa,10,n)
write (*,*) "d2h:"
call d2h(pa,a,n)
write (*,*) "a = "
do i = 1,n
print*,a(i)
enddo
write (*,*) "increment again:"
call increment(pa,10,n)
write (*,*) "d2h:"
call d2h(pa,a,n)
write (*,*) "a = "
do i = 1,n
print*,a(i)
enddo
write (*,*) "free:"
call free_device(pa)
deallocate(a)
end program
fcuda.cuf
module increment_m
contains
attributes(global) subroutine increment_d(a_d, inc, n)
implicit none
integer, value :: inc, n
integer, device :: a_d(n)
integer id
id=threadIdx%x
if (id.le.n) then
a_d(id) = a_d(id) + inc;
endif
end subroutine increment_d
end module increment_m
subroutine increment(pa, inc, n)
use cudafor
use increment_m
implicit none
integer :: inc, n
integer, device :: pa(n)
call increment_d<<<1,n>>>(pa, inc, n);
end subroutine increment
subroutine init_device(pa, n)
use cudafor
use increment_m
implicit none
integer :: n
integer, device, pointer :: pa(:)
allocate(pa(n))
end subroutine init_device
subroutine free_device(pa, n)
use cudafor
use increment_m
implicit none
integer :: n
integer, device, pointer :: pa(:)
deallocate(pa)
end subroutine free_device
subroutine h2d(h, d, n)
use cudafor
use increment_m
integer :: n
integer :: h(n)
integer, device, pointer :: d(:)
d=h
end subroutine h2d
subroutine d2h(d, h, n)
use cudafor
use increment_m
integer :: n
integer :: h(n)
integer, device, pointer :: d(:)
h=d
end subroutine d2h
Makefile
CUDA_INSTALL_PATH=/[NVIDIA_PATH]/20.11/compilers/
MPI_INSTALL_PATH=/[NVIDIA_PATH}/20.11/comm_libs/mpi/
PGF90=$(CUDA_INSTALL_PATH)/bin/pgf90
IFORT90=$(MPI_INSTALL_PATH)/bin/mpifort
CUDALIB=-L/[NVIDIA PATH]/20.11/cuda/10.2/lib64 -lcudart
FFILES=memtest.f90
CUFILES=fcuda.cuf
OBJECTS=memtest.o fcuda.o
EXENAME= memtest
all:
$(IFORT90) -c $(FFILES)
$(PGF90) $(CUDALIB) -c $(CUFILES)
$(IFORT90) $(OBJECTS) -Mcuda $(CUDALIB) -o ${EXENAME}
clean:
rm -f *.o
rm -f *.mod
rm -f ${EXENAME}
Thanks in advance for your help!