Thank you, Mat. I tried to add the scales x, y, z and v to the ‘private’ clause, the code indeed can be parallelized, but the result is wrong and I also obtained
116, Reference argument passing prevents parallelization: gy
Reference argument passing prevents parallelization: gx
Reference argument passing prevents parallelization: dep
Reference argument passing prevents parallelization: z
Reference argument passing prevents parallelization: y
Reference argument passing prevents parallelization: x
Reference argument passing prevents parallelization: v
Reference argument passing prevents parallelization: gz
Reference argument passing prevents parallelization: u2
Reference argument passing prevents parallelization: u1
Reference argument passing prevents parallelization: t7
The full code is attached here.
Main.f90
module para
real*8,parameter :: re=6371.d0, mu=4.41d10, lambda=7.53d10
real*8 :: v, z
real*8 :: pi
integer :: nf, nob
type station
real*8 lon, lat, ux, uy, uz
end type
type fault
real*8 lat, lon, depth, slip, rake, strike, dip, length, width
end type
end module
program main
use para
use openacc
implicit none
integer :: i,j,k,n,m
type(fault),target,allocatable :: fsm(:)
type(station),target,allocatable :: obs(:)
real*8 x,y,dep,u1,u2,u3,gx,gy,gz,sum1,sum2,sum3
real*8 lons,lats,lonf,latf
real*8 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15
real :: start,finish
!$acc routine(indisp) seq
call CPU_TIME(start)
pi=4.d0*datan(1.d0)
v=(lambda+mu)/(lambda+2.d0*mu)
open(unit=1,file="fault.dat",status='old',err=101)
read(1,*)
read(1,*) nf
allocate(fsm(nf))
do i=1,nf
read(1,*) fsm(i)
enddo
close(1)
open(unit=2,file="observation point.dat",status='old',err=102)
read(2,*)
k=0
30 read(2,*,end=40)
k=k+1
goto 30
40 continue
nob=k
allocate(obs(nob))
rewind(2)
read(2,*)
do i=1,nob
read(2,*) obs(i)%lat, obs(i)%lon
enddo
close(2)
!$acc data copy(obs) copyin(re,fsm)
!$acc kernels loop gang vector private(latf,lonf,lats,lons,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,u1,u2,dep,gx,gy,gz,v,x,y,z) reduction(+:sum1,sum2,sum3)
do i=1,nob
sum1 = 0.d0
sum2 = 0.d0
sum3 = 0.d0
lats = obs(i)%lat*pi/180.d0
lons = obs(i)%lon*pi/180.d0
do j=1,nf
latf = fsm(j)%lat*pi/180.d0
lonf = fsm(j)%lon*pi/180.d0
t6 = dsin(lats)*dsin(latf)+dcos(lats)*dcos(latf)*dcos(lons-lonf)
if(lons.lt.lonf) then
t6=-dacos(t6)
else
t6=dacos(t6)
endif
t3 = t6*re
t12 = dcos(lats)*dsin(lons-lonf)/dsin(t6)
t13 = (dcos(latf)*dsin(lats)-dsin(latf)*dcos(lats)*dcos(lons-lonf))/dsin(t6)
t1 = t3*t12
t2 = t3*t13
t8 = fsm(j)%strike*pi/180.d0
t7 = fsm(j)%dip*pi/180.d0
t4 = t1*dsin(t8)+t2*dcos(t8)
t5 = -t1*dcos(t8)+t2*dsin(t8)
x = t4+0.5d0*fsm(j)%length
y = t5+0.5d0*fsm(j)%width*dcos(t7)
dep = fsm(j)%depth+0.5d0*fsm(j)%width*dsin(t7)
t9 = fsm(j)%rake*pi/180.d0
u1 = fsm(j)%slip*dcos(t9)
u2 = fsm(j)%slip*dsin(t9)
call indisp(u1,u2,0.d0,v,fsm(j)%length,fsm(j)%width,t7,dep,x,y,z,gx,gy,gz)
t10 = gx*dsin(t8)-gy*dcos(t8)
t11 = gx*dcos(t8)+gy*dsin(t8)
sum1 = sum1+t10
sum2 = sum2+t11
sum3 = sum3+gz
enddo
obs(i)%ux = sum1
obs(i)%uy = sum2
obs(i)%uz = sum3
enddo
!$acc end data
open(22,file="displacement_Okada.dat")
write(22,'(A)') " lon. lat. Ue(m), Un(m), Uz(m)"
do k=1,nob
write(22,55) obs(k)
enddo
close(22)
55 format(2f12.7,1x,3E15.5)
deallocate(fsm)
deallocate(obs)
call CPU_TIME(finish)
write(*,*) 'it takes',finish-start,'s'
goto 103
101 write(*,*) "fault model file is not found !"
goto 103
102 write(*,*) "observation point file is not found !"
103 continue
end program
The subroutines in fortran77 are
subroutine indisp(u1,u2,u3,alpha,L,W,dip,c,x,y,z,ux,uy,uz)
!$acc routine seq
implicit none
integer*4 i,j
real*8 u1,u2,u3,alpha,L,W,dip,c,x,y,z,ux,uy,uz,
. uas(3),uas_v(3),ubs(3),ucs(3),
. uad(3),uad_v(3),ubd(3),ucd(3),
. uat(3),uat_v(3),ubt(3),uct(3),
. fas(3,4),fas_v(3,4),fbs(3,4),fcs(3,4),
. fad(3,4),fad_v(3,4),fbd(3,4),fcd(3,4),
. fat(3,4),fat_v(3,4),fbt(3,4),fct(3,4),
. d,p,kesai,ita,pi,
. temp1(3),temp2(3),temp3(3),
. temp4(3),temp5(3),temp6(3),
. temp7(3),temp8(3),temp9(3),
. uxs,uys,uzs,
. uxd,uyd,uzd,
. uxt,uyt,uzt
!$acc routine(defor) seq
d=c-z
p=y*dcos(dip)+d*dsin(dip)
kesai=x
ita=p
call defor(c,kesai,ita,z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas(i,1)=temp1(i)
fbs(i,1)=temp2(i)
fcs(i,1)=temp3(i)
fad(i,1)=temp4(i)
fbd(i,1)=temp5(i)
fcd(i,1)=temp6(i)
fat(i,1)=temp7(i)
fbt(i,1)=temp8(i)
fct(i,1)=temp9(i)
enddo
kesai=x
ita=p-W
call defor(c,kesai,ita,z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas(i,2)=temp1(i)
fbs(i,2)=temp2(i)
fcs(i,2)=temp3(i)
fad(i,2)=temp4(i)
fbd(i,2)=temp5(i)
fcd(i,2)=temp6(i)
fat(i,2)=temp7(i)
fbt(i,2)=temp8(i)
fct(i,2)=temp9(i)
enddo
kesai=x-L
ita=p
call defor(c,kesai,ita,z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas(i,3)=temp1(i)
fbs(i,3)=temp2(i)
fcs(i,3)=temp3(i)
fad(i,3)=temp4(i)
fbd(i,3)=temp5(i)
fcd(i,3)=temp6(i)
fat(i,3)=temp7(i)
fbt(i,3)=temp8(i)
fct(i,3)=temp9(i)
enddo
kesai=x-L
ita=p-W
call defor(c,kesai,ita,z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas(i,4)=temp1(i)
fbs(i,4)=temp2(i)
fcs(i,4)=temp3(i)
fad(i,4)=temp4(i)
fbd(i,4)=temp5(i)
fcd(i,4)=temp6(i)
fat(i,4)=temp7(i)
fbt(i,4)=temp8(i)
fct(i,4)=temp9(i)
enddo
d=c+z
p=y*dcos(dip)+d*dsin(dip)
kesai=x
ita=p
call defor(c,kesai,ita,-z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas_v(i,1)=temp1(i)
fad_v(i,1)=temp4(i)
fat_v(i,1)=temp7(i)
enddo
kesai=x
ita=p-W
call defor(c,kesai,ita,-z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas_v(i,2)=temp1(i)
fad_v(i,2)=temp4(i)
fat_v(i,2)=temp7(i)
enddo
kesai=x-L
ita=p
call defor(c,kesai,ita,-z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas_v(i,3)=temp1(i)
fad_v(i,3)=temp4(i)
fat_v(i,3)=temp7(i)
enddo
kesai=x-L
ita=p-W
call defor(c,kesai,ita,-z,y,dip,alpha,temp1,temp2,temp3,temp4,
. temp5,temp6,temp7,temp8,temp9)
do i=1,3
fas_v(i,4)=temp1(i)
fad_v(i,4)=temp4(i)
fat_v(i,4)=temp7(i)
enddo
do i=1,3
uas(i)=0.d0
uas_v(i)=0.d0
ubs(i)=0.d0
ucs(i)=0.d0
uad(i)=0.d0
uad_v(i)=0.d0
ubd(i)=0.d0
ucd(i)=0.d0
uat(i)=0.d0
uat_v(i)=0.d0
ubt(i)=0.d0
uct(i)=0.d0
enddo
do i=1,3
uas(i)=fas(i,1)-fas(i,2)-fas(i,3)+fas(i,4)
uas_v(i)=fas_v(i,1)-fas_v(i,2)-fas_v(i,3)+fas_v(i,4)
ubs(i)=fbs(i,1)-fbs(i,2)-fbs(i,3)+fbs(i,4)
ucs(i)=fcs(i,1)-fcs(i,2)-fcs(i,3)+fcs(i,4)
uad(i)=fad(i,1)-fad(i,2)-fad(i,3)+fad(i,4)
uad_v(i)=fad_v(i,1)-fad_v(i,2)-fad_v(i,3)+fad_v(i,4)
ubd(i)=fbd(i,1)-fbd(i,2)-fbd(i,3)+fbd(i,4)
ucd(i)=fcd(i,1)-fcd(i,2)-fcd(i,3)+fcd(i,4)
uat(i)=fat(i,1)-fat(i,2)-fat(i,3)+fat(i,4)
uat_v(i)=fat_v(i,1)-fat_v(i,2)-fat_v(i,3)+fat_v(i,4)
ubt(i)=fbt(i,1)-fbt(i,2)-fbt(i,3)+fbt(i,4)
uct(i)=fct(i,1)-fct(i,2)-fct(i,3)+fct(i,4)
enddo
pi=4.d0*datan(1.d0)
uxs=u1*(uas(1)-uas_v(1)+ubs(1)+z*ucs(1))/2.d0/pi
uys=u1*((uas(2)-uas_v(2)+ubs(2)+z*ucs(2))*dcos(dip)-
. (uas(3)-uas_v(3)+ubs(3)+z*ucs(3))*dsin(dip))/2.d0/pi
uzs=u1*((uas(2)-uas_v(2)+ubs(2)-z*ucs(2))*dsin(dip)+
. (uas(3)-uas_v(3)+ubs(3)-z*ucs(3))*dcos(dip))/2.d0/pi
uxd=u2*(uad(1)-uad_v(1)+ubd(1)+z*ucd(1))/2.d0/pi
uyd=u2*((uad(2)-uad_v(2)+ubd(2)+z*ucd(2))*dcos(dip)-
. (uad(3)-uad_v(3)+ubd(3)+z*ucd(3))*dsin(dip))/2.d0/pi
uzd=u2*((uad(2)-uad_v(2)+ubd(2)-z*ucd(2))*dsin(dip)+
. (uad(3)-uad_v(3)+ubd(3)-z*ucd(3))*dcos(dip))/2.d0/pi
uxt=u3*(uat(1)-uat_v(1)+ubt(1)+z*uct(1))/2.d0/pi
uyt=u3*((uat(2)-uat_v(2)+ubt(2)+z*uct(2))*dcos(dip)-
. (uat(3)-uat_v(3)+ubt(3)+z*uct(3))*dsin(dip))/2.d0/pi
uzt=u3*((uat(2)-uat_v(2)+ubt(2)-z*uct(2))*dsin(dip)+
. (uat(3)-uat_v(3)+ubt(3)-z*uct(3))*dcos(dip))/2.d0/pi
ux=uxs+uxd+uxt
uy=uys+uyd+uyt
uz=uzs+uzd+uzt
end subroutine
subroutine defor(c,kesai,ita,z,y,dip,alpha,fas,fbs,fcs,
. fad,fbd,fcd,fat,fbt,fct)
!$acc routine seq
implicit none
real*8 c,kesai,ita,z,y,dip,alpha,fas(3),fas_v(3),fbs(3),fcs(3),
. fad(3),fad_v(3),fbd(3),fcd(3),
. fat(3),fat_v(3),fbt(3),fct(3),
. d,p,q,r,yy,dd,cc,theta,
. x,xx,I1,I2,I3,I4,y11,y32,z32,x11,x32,h,temp1,temp2,temp3
real*8 sheld
parameter(sheld=1.d-25)
d=c-z
p=y*dcos(dip)+d*dsin(dip)
q=y*dsin(dip)-d*dcos(dip)
r=dsqrt(kesai**2+ita**2+q**2)
yy=ita*dcos(dip)+q*dsin(dip)
dd=ita*dsin(dip)-q*dcos(dip)
cc=dd+z
if(dabs(q).lt.sheld) then
theta=0.d0
else
theta=datan(kesai*ita/q/r)
endif
xx=dsqrt(kesai**2+q**2)
if(dabs(dcos(dip)).lt.sheld) then
if(dabs(r+ita).lt.sheld) then
I3=0.5d0*(ita/(r+dd)+yy*q/((r+dd)**2)+dlog(r-ita))
else
I3=0.5d0*(ita/(r+dd)+yy*q/((r+dd)**2)-dlog(r+ita))
endif
if(abs(kesai).lt.(10.d0**(-16))) then
I4=0.d0
else
I4=0.5d0*kesai*yy/((r+dd)**2)
endif
else
if(dabs(r+ita).lt.sheld) then
I3=yy/dcos(dip)/(r+dd)-(-dlog(r-ita)-dsin(dip)*dlog(r+dd))
. /dcos(dip)/dcos(dip)
else
I3=yy/dcos(dip)/(r+dd)-(dlog(r+ita)-dsin(dip)*dlog(r+dd))
. /dcos(dip)/dcos(dip)
endif
if(abs(kesai).lt.(10.d0**(-16))) then
I4=0.d0
else
I4=dsin(dip)*kesai/dcos(dip)/(r+dd)+2.d0*
. datan((ita*(xx+q*dcos(dip))+xx*(r+xx)*dsin(dip))/kesai/(r+xx)
. /dcos(dip))/dcos(dip)/dcos(dip)
endif
endif
I1=-kesai/(r+dd)*dcos(dip)-I4*dsin(dip)
I2=dlog(r+dd)+I3*dsin(dip)
if(dabs(r+kesai).lt.sheld) then
x11=0.d0
x32=0.d0
else
x11=1.d0/r/(r+kesai)
x32=(2.d0*r+kesai)/(r**3)/((r+kesai)**2)
endif
if(dabs(r+ita).lt.sheld) then
y11=0.d0
y32=0.d0
else
y11=1.d0/r/(r+ita)
y32=(2.d0*r+ita)/(r**3)/((r+ita)**2)
endif
h=q*dcos(dip)-z
z32=dsin(dip)/(r**3)-h*y32
fas(1)=(theta+alpha*kesai*q*y11)/2.d0
fas(2)=alpha*q/2.d0/r
if(dabs(r+ita).lt.sheld) then
fas(3)=-(1.d0-alpha)*dlog(r-ita)/2.d0-alpha*q*q*y11/2.d0
else
fas(3)=(1.d0-alpha)*dlog(r+ita)/2.d0-alpha*q*q*y11/2.d0
endif
fbs(1)=-kesai*q*y11-theta-(1.d0-alpha)/alpha*I1*dsin(dip)
fbs(2)=-q/r+(1.d0-alpha)/alpha*yy*dsin(dip)/(r+dd)
fbs(3)=q*q*y11-(1.d0-alpha)/alpha*I2*dsin(dip)
fcs(1)=(1.d0-alpha)*kesai*y11*dcos(dip)-alpha*kesai*q*z32
fcs(2)=(1.d0-alpha)*(dcos(dip)/r+2.d0*q*y11*dsin(dip))
. -alpha*cc*q/(r**3)
fcs(3)=(1.d0-alpha)*q*y11*dcos(dip)
. -alpha*(cc*ita/(r**3)-z*y11+kesai**2*z32)
fad(1)=alpha*q/2.d0/r
fad(2)=(theta+alpha*ita*q*x11)/2.d0
if(dabs(r+kesai).lt.sheld) then
fad(3)=-(1.d0-alpha)*dlog(r-kesai)/2.d0-alpha*q*q*x11/2.d0
else
fad(3)=(1.d0-alpha)*dlog(r+kesai)/2.d0-alpha*q*q*x11/2.d0
endif
fbd(1)=-q/r+(1.d0-alpha)/alpha*I3*dsin(dip)*dcos(dip)
fbd(2)=-ita*q*x11-theta-(1.d0-alpha)/alpha*kesai*dsin(dip)*
. dcos(dip)/(r+dd)
fbd(3)=q*q*x11+(1.d0-alpha)/alpha*I4*dsin(dip)*dcos(dip)
fcd(1)=(1.d0-alpha)*dcos(dip)/r-q*y11*dsin(dip)-alpha*cc*q/(r**3)
fcd(2)=(1.d0-alpha)*yy*x11-alpha*cc*ita*q*x32
fcd(3)=-dd*x11-kesai*y11*dsin(dip)-alpha*cc*(x11-q*q*x32)
if(dabs(r+ita).lt.sheld) then
fat(1)=(1.d0-alpha)*dlog(r-ita)/2.d0-alpha*q*q*y11/2.d0
else
fat(1)=-(1.d0-alpha)*dlog(r+ita)/2.d0-alpha*q*q*y11/2.d0
endif
if(dabs(r+kesai).lt.sheld) then
fat(2)=(1.d0-alpha)*dlog(r-kesai)/2.d0-alpha*q*q*x11/2.d0
else
fat(2)=-(1.d0-alpha)*dlog(r+kesai)/2.d0-alpha*q*q*x11/2.d0
endif
fat(3)=(theta-alpha*q*(ita*x11+kesai*y11))/2.d0
fbt(1)=q*q*y11-(1.d0-alpha)/alpha*I3*dsin(dip)*dsin(dip)
fbt(2)=q*q*x11+(1.d0-alpha)/alpha*kesai*dsin(dip)*
. dsin(dip)/(r+dd)
fbt(3)=q*(ita*x11+kesai*y11)-theta-(1.d0-alpha)/alpha*I4*
. dsin(dip)*dsin(dip)
fct(1)=-(1.d0-alpha)*(dsin(dip)/r+q*Y11*dcos(dip))
. -alpha*(z*y11-q*q*z32)
fct(2)=(1.d0-alpha)*2.d0*kesai*y11*dsin(dip)+dd*x11
. -alpha*cc*(x11-q*q*x32)
fct(3)=(1.d0-alpha)*(yy*x11+kesai*y11*dcos(dip))
. +alpha*q*(cc*ita*x32+kesai*z32)
end subroutine