Here are reproducing examples. One is working; the other is not.
TOY_NO_ERROR.f90
program toy_no_error
use openacc
implicit none
integer,parameter :: &
NPmax=100000, &
NPNmax = 300, &
NSD = 3
integer,allocatable :: npnl(:), pair(:,:)
real(8),allocatable :: mass(:), at(:,:), p(:), rho(:), dwdx(:,:,:)
integer :: ii,i,j,k,err
real(8) :: sr, vr(NSD), dat_x, dat_y, dat_z
allocate(npnl(NPmax), pair(NPNmax,NPmax), mass(NPmax), at(NSD,NPmax), &
p(NPmax), rho(NPmax), dwdx(NSD,NPNmax,NPmax), stat=err)
if(err/=0) then
print '(A)', ' DYNAMIC ALLOCATION ERROR '
end if
do ii=1,10
!$acc parallel loop independent gang private(dat_x, dat_y, dat_z)
do i=1,NPmax
dat_x=0.0d0
dat_y=0.0d0
dat_z=0.0d0
!$acc loop independent private(j,sr,vr) reduction(+:dat_x,dat_y,dat_z)
do k=1,npnl(i)
j = pair(k,i)
sr = (p(i)+p(j))/(rho(i)*rho(j))
vr = sr*dwdx(:,k,i)
dat_x = dat_x - mass(j)*vr(1)
dat_y = dat_y - mass(j)*vr(2)
dat_z = dat_z - mass(j)*vr(3)
end do
at(1,i) = at(1,i) + dat_x
at(2,i) = at(2,i) + dat_y
at(3,i) = at(3,i) + dat_z
end do
end do
end program
TOY_ERROR.f90
program toy_error
use openacc
implicit none
integer,parameter :: &
NPmax=100000, &
NPNmax = 300, &
NSD = 3
integer,allocatable :: npnl(:), pair(:,:)
real(8),allocatable :: mass(:), at(:,:), p(:), rho(:), dwdx(:,:,:)
integer :: ii,i,j,k,err
real(8) :: sr, vr(NSD), dat(NSD)
allocate(npnl(NPmax), pair(NPNmax,NPmax), mass(NPmax), at(NSD,NPmax), &
p(NPmax), rho(NPmax), dwdx(NSD,NPNmax,NPmax), stat=err)
if(err/=0) then
print '(A)', ' DYNAMIC ALLOCATION ERROR '
end if
do ii=1,10
!$acc parallel loop independent gang private(dat)
do i=1,NPmax
dat=0.0d0
!$acc loop independent private(j,sr,vr) reduction(+:dat)
do k=1,npnl(i)
j = pair(k,i)
sr = (p(i)+p(j))/(rho(i)*rho(j))
vr = sr*dwdx(:,k,i)
dat = dat - mass(j)*vr
end do
at(:,i) = at(:,i) + dat
end do
end do
end program
yongcho@yongcho-XPS-8960:~$ nvfortran --version
nvfortran 23.9-0 64-bit target on x86-64 Linux -tp alderlake
NVIDIA Compilers and Tools
Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
yongcho@yongcho-XPS-8960:~$ nvfortran -acc TOY_ERROR.f90
yongcho@yongcho-XPS-8960:~$ ./a.out
Failing in Thread:1
Accelerator Fatal Error: call to cuLaunchKernel returned error 1: Invalid value
File: /home/yongcho/reproducing_example_error.f90
Function: toy:1
Line: 23
Thanks,
Yongsuk