We are building a large fortran code base and have the code GPU accelerated with OpenMP target. It works with recent gcc / gfortran. I am in the process of “adapting” the code so that it compiles with the Nvidia compiler, but struggle with lots of internal compile errors. Her eis a reduced example of a code that refuses to compile. The setup is the following:
% cat /etc/redhat-release
Rocky Linux release 9.4 (Blue Onyx)
% nvfortran --version
nvfortran 24.7-0 64-bit target on x86-64 Linux -tp znver2
NVIDIA Compilers and Tools
Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
% nvfortran -mp=gpu -c test.f90
NVFORTRAN-F-0000-Internal compiler error. child tinfo should have been created at outlining function for host 324 (test.f90: 19)
NVFORTRAN/x86-64 Linux 24.7-0: compilation aborted
And the code is:
subroutine ctoprim(mem,prim,lb,ub,ii)
!$omp declare target
real, dimension(:,:,:,:,:) :: mem, prim
integer:: i3, ii, lb, ub
real, parameter :: smallr=1e-6
!$omp parallel do shared(mem,prim,ii,lb,ub)
do i3 = lb,ub
prim(1,1,1,i3,ii) = max(mem(1,1,1,i3,ii),smallr) ! density -> density
end do
end subroutine
subroutine slopes(prim,ii,lb,ub)
!$omp declare target
integer :: i3, ii, lb, ub
real, dimension(:,:,:,:,:) :: prim
!
!$omp parallel do
do i3=lb,ub
prim(1,1,1,i3,ii) = prim(1,1,1,i3,ii) - prim(1,1,1,i3-1,ii)
enddo
end subroutine