I would like to combine the OpenMP and CUDA in one program, and try to use the OpenMP to deal with the optimization level. The CUDA was applied to deal with the kernel of the objective function. However, after I compile the code by using commend “pgfortran -mp=allcores test.cuf”. The error occur
PGF90-F-0155-Compiler failed to translate accelerator region (see -Minfo message
s): Unsupported procedure (test.cuf: 1)
PGF90/x86-64 Windows 14.3-0: compilation aborted
Is anyone can kindly help me to troubleshoot? The source code is shown as following:
!------------------------------------------------------------
module parameters
real(kind=8):: a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6
real(kind=8):: rpm,u,vis,diam,b,pi,c,err1,err2,p_norm,norm,omega,delta
real(kind=8):: h2,h3,dx,d2x,dy,d2y,dx2,dy2,lr,br
real(kind=8):: pa,prr
integer:: x1,x2,x3,x4,x5,x6,x7,x8,y1,y2 ! recess coord.
integer:: n_x,n_y,n_x1,n_x2 ! recess coord. parameters
real(kind=8)::alpha,dxr,dyr ! recess coord. parameters
integer::nx,ny,ip,im,jp,jm,iter_in,pal_i
!$omp threadprivate(a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6)
!$omp threadprivate(rpm,u,vis,diam,b,pi,c,err1,err2,p_norm,norm,omega,delta)
!$omp threadprivate(h2,h3,dx,d2x,dy,d2y,dx2,dy2,lr,br,pa,prr)
!$omp threadprivate(x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
!$omp threadprivate(n_x,n_y,n_x1,n_x2)
!$omp threadprivate(alpha,dxr,dyr)
!$omp threadprivate(nx,ny,ip,im,jp,jm,iter_in)
!$omp threadprivate(n_x,n_y,n_x1,n_x2)
!$omp threadprivate(alpha,dxr,dyr)
!$omp threadprivate(nx,ny,ip,im,jp,jm,iter_in)
end module parameters
!------------------------------------------------------------
module sor_rb_mod
use parameters
use cudafor
contains
!-------------------------- SOR for GPU --------------------------------
attributes(device) subroutine sor_kernel(i,j,nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
implicit none
real(kind=8) :: pdev(0:nx,0:ny),hdev(0:nx,0:ny), pddev(0:nx,0:ny)
real(kind=8):: a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,b5,b6,h2,h3,prr
real(kind=8), value:: omega,delta,dx,d2x,dy,d2y,dx2,dy2
integer, value :: i,j,nx, ny,ip,im,jp,jm,np,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2
ip=i+1; im=i-1
if (i==nx) then; ip=1; end if
jp=j+1; jm=j-1
dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
h2=hdev(i,j)**2; h3=hdev(i,j)**3
b1=(hdev(ip,j)-hdev(im,j))/d2x ! dh/dx
b2=(hdev(i,jp)-hdev(i,jm))/d2y ! dh/dy
b3=(pdev(ip,j)-pdev(im,j))/d2x ! dp/dx
b4=(pdev(i,jp)-pdev(i,jm))/d2y ! dp/dy
b5=(pdev(ip,j)+pdev(im,j)-2.d0*pdev(i,j))/dx2 ! dp2/dx2
b6=(pdev(i,jp)+pdev(i,jm)-2.d0*pdev(i,j))/dy2 ! dp2/dy2
a1=(pdev(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*pdev(i,j)*b1)/d2x-(6.0d0*hdev(i,j))/d2x
a2=(pdev(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*pdev(i,j)*b1)/d2x+(6.0d0*hdev(i,j))/d2x
a3=(pdev(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*pdev(i,j)*b2)/d2y
a4=(pdev(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*pdev(i,j)*b2)/d2y
a5=(-2.0d0*pdev(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*pdev(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
a6=pdev(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*pdev(i,j)*(b1*b3+b2*b4)-6.0d0*hdev(i,j)*b3-6.0d0*pdev(i,j)*b1
delta=-(a1*pddev(ip,j)+a2*pddev(im,j)+a3*pddev(i,jp)+a4*pddev(i,jm)+a6)/a5-pddev(i,j)
pddev(i,j)=pddev(i,j)+omega*delta
if (j>=y1 .and. j<=y2) then
if (x1>x2) then
if (i>=x1) pddev(i,j)=prr
if (i<=x2) pddev(i,j)=prr
else
if (i>=x1 .and. i<=x2) pddev(i,j)=prr
end if
if (i>=x3 .and. i<=x4) pddev(i,j)=prr
if (i>=x5 .and. i<=x6) pddev(i,j)=prr
if (x8<x7) then
if (i>=x7) pddev(i,j)=prr
if (i<=x8) pddev(i,j)=prr
else
if (i>=x7 .and. i<=x8) pddev(i,j)=prr
end if
end if
end subroutine sor_kernel
!-------------------------- Red block --------------------------------
attributes(global) subroutine red_kernel(nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
implicit none
real(kind=8):: pdev(0:nx,0:ny), hdev(0:nx,0:ny), pddev(0:nx,0:ny)
real(kind=8), value:: dx,dy,omega,prr
integer, value :: nx, ny,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2
integer :: i, j
i = (blockidx%x-1) * 16 + threadidx%x
j = (blockidx%y-1) * 16 + threadidx%y
if (mod((i+j),2)==0) then
call sor_kernel(i,j,nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
end if
end subroutine red_kernel
!-------------------------- Black block -------------------------------
attributes(global) subroutine black_kernel(nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
implicit none
real(kind=8):: pdev(0:nx,0:ny), hdev(0:nx,0:ny), pddev(0:nx,0:ny)
real(kind=8), value:: dx,dy,omega,prr
integer, value :: nx, ny,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2
integer :: i, j
i = (blockidx%x-1) * 16 + threadidx%x
j = (blockidx%y-1) * 16 + threadidx%y
if (mod((i+j),2)==1) then
call sor_kernel(i,j,nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
end if
end subroutine black_kernel
!-------------------------- pbound block -----------------------------
attributes(global) subroutine pbound_kernel(nx,ny,pdev,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
implicit none
real(kind=8):: pdev(0:nx,0:ny)
integer, value :: nx, ny,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2
integer :: i, j
i = (blockidx%x-1) * 16 + threadidx%x
j = (blockidx%y-1) * 16 + threadidx%y
if (j>=y1 .and. j<=y2) then
if (x1>x2) then
if (i>=x1) pdev(i,j)=0
if (i<=x2) pdev(i,j)=0
else
if (i>=x1 .and. i<=x2) pdev(i,j)=0
end if
if (i>=x3 .and. i<=x4) pdev(i,j)=0
if (i>=x5 .and. i<=x6) pdev(i,j)=0
if (x8<x7) then
if (i>=x7) pdev(i,j)=0
if (i<=x8) pdev(i,j)=0
else
if (i>=x7 .and. i<=x8) pdev(i,j)=0
end if
end if
end subroutine pbound_kernel
!-------------------------- Sor GPU ------------------------------------
subroutine sor2(p,eps)
use cudafor
use parameters
implicit none
real(kind=8), device, allocatable, dimension(:,:) :: pdev,hdev,pddev
type(dim3) :: dimGrid, dimBlock
integer :: i,j,r,iter,t3,t4,tr
real(kind=8), intent(in)::eps
real(kind=8), intent(inout)::p(0:nx,0:ny)
real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
real(kind=8)::h0,rjac,theta,tim
integer::iter1,iter2,nt,thread,i1,j1,k
dx=(diam*pi/real(nx))
dy=(b/real(ny))
rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
!-------------- film thickness -------------------------------
do i=0,nx ; theta=360.0/real(nx)*real(i)
do j=0,ny
h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
end do
end do
!---------------------------------------------------------------
h0=minval(h)
!-----------normalization-----------
dx=dx/b
dy=dy/b
h=h/h0
p=p*(h0**2)/(vis*b*u) ! ambient pressure, kg/m**2
prr=4.5*pa*(h0**2)/(vis*b*u) ! recess pressure (atm)
!-------------------------------------
iter_in=0
iter2=0
p_norm=1.0d0
iter=0
allocate( pdev(0:nx,0:ny),hdev(0:nx,0:ny),pddev(0:nx,0:ny))
hdev=h
pdev=p
dimGrid = dim3( nx/16, ny/16, 1 )
dimBlock = dim3( 16, 16, 1 )
!------------------------ GPU --------------------------------------
call system_clock(t3,tr)
do while(p_norm > err2) ! outer-loop stopping criteria (pressure)
iter2=iter2+1
de=0.0d0
pddev=0.0d0
norm=1.0d0
p_old=p
pdev=p
call pbound_kernel<<<dimGrid,dimBlock>>>(nx,ny,pdev,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
r = cudathreadsynchronize()
p=pdev
iter1=0
do while (norm> err1 .and. iter1<5000) ! inner-loop stopping criteria (delta)
de_old=de
call red_kernel<<<dimGrid,dimBlock>>>(nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
r = cudathreadsynchronize()
call black_kernel<<<dimGrid,dimBlock>>>(nx,ny,pdev,hdev,dx,dy,omega,&
pddev,prr,x1,x2,x3,x4,x5,x6,x7,x8,y1,y2)
r = cudathreadsynchronize()
de=pddev
iter1=iter1+1
norm=sqrt(dx*dy*sum((de-de_old)**2))
end do
iter_in=iter_in+iter1
p=p+de
p(0,:)=p(nx,:)
p_norm=sqrt(dx*dy*sum((p-p_old)**2))
end do
r = cudathreadsynchronize()
!---------------------- End GPU ------------------------------------
p=p*(vis*b*u)/h0**2
dx=dx*b
dy=dy*b
deallocate( pdev, hdev,pddev)
end subroutine sor2
end module sor_rb_mod
!------------------------------------------------------------
Program pso_abrg ! Particle Swarm Algorithm
use parameters
implicit none
!A program for hydrodynmic lubrication recessed slider bearing optimization by using PSO
! Nv: No. of variables
! Np: Population size (number of particles)
! v: pseudovelocity
! c1: Cognitive trust parameter
! c2: Social trust parameter
! w0: Inertia
! wd:Inertia Reduction parameter
! k: Bound on velocity fraction
! vd: Velocity reduction parameter
! d: Dynamic inertia/velocity reduction delay (function evaluations)
! r1,r2: Random factors in the [0,1] interval
! f: Function value
! ik: iteration times
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
integer::i,j,Nv,Np,d,ik,ikmax,it,ic,n,icmax,thn
real(kind=8)::c1,c2,w0,wd,k,vd,g_f,g_f_old,tpso1,tpso2,tpsor
real(kind=8), allocatable::x_UB(:),x_LB(:),v_x_max(:),g_x(:),g_x_new(:),r1(:),r2(:)
real(kind=8), allocatable::v_x(:,:),x(:,:),x_best(:,:),f(:),f_best(:),p_x(:,:)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!$ integer:: omp_get_num_threads
!$ integer:: omp_get_num_procs
!$ thn=omp_get_num_procs()
!$ write(*,*) "The number of available processors/threads in the system: ",thn
thn=1 ! when OpenMP is not used
!$ write(*,*) "Enter the number of threads"
!$ read(*,*) thn
!$ call omp_set_num_threads(thn) ! set the number of threads
!write(*,*)'Np= (e.g. 100)'
!read(*,*)Np
!write(*,*)'icmax= (e.g. 20)'
!read(*,*)icmax
write(*,*) "Parallel by applying GPU? (YES: 1, NO: 2)"
read(*,*) pal_i
Np=40
icmax=60
Nv=3
write(*,*)'nx= (e.g. 256)'
read(*,*)nx
c1=0.8d0 !2.0d0
c2=0.8d0 !2.0d0
w0=0.5d0
wd=0.05d0
k=0.2d0 !*******************
vd=0.05d0
d=5
ikmax=15 !****************************
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
allocate(x_UB(1:Nv),x_LB(1:Nv),g_x(1:Nv),g_x_new(1:Nv),v_x(1:Nv,1:Np),v_x_max(1:Nv))
allocate(x(1:Nv,1:Np),x_best(1:Nv,1:Np),p_x(1:Nv,1:Np))
allocate(f(1:Np),f_best(1:Np),r1(1:Np),r2(1:Np))
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
g_f=0.0d0
x_UB(1)=60.0d-6
x_LB(1)=40.0d-6
x_UB(2)=30.0d-3
x_LB(2)=10.0d-3
x_UB(3)=60.0d-3
x_LB(3)=40.0d-3
do i=1,Nv
v_x_max(i)=k*(x_UB(i)-x_LB(i))
end do
ik=0
it=0
ic=0
do i=1,5
call random_number(x)
call random_number(v_x)
end do
g_f=1000000
do i=1,Nv
x(i,:)=x(i,:)*(x_UB(i)-x_LB(i))+x_LB(i)
end do
do i=1,Nv
v_x(i,:)=v_x(i,:)*v_x_max(i)
end do
call obfun(Nv,Np,x,f,thn)
f_best=f
x_best=x
p_x=x_best
do j=1,Np
if (f_best(j)<=g_f) then
g_x(:)=x_best(:,j)
g_f=f_best(j)
end if
end do
open(1,file='track_x.dat')
open(2,file='d_f.dat')
call cpu_time(tpso1)
do n=1,ikmax
g_f_old=g_f
write(2,'(1000(1x,f20.4))') g_x,-g_f
ik=ik+1
do i=1,Nv
call random_number(r1)
call random_number(r2)
v_x(i,:)=w0*v_x(i,:)+c1*r1(:)*(p_x(i,:)-x(i,:))+c2*r2(:)*(g_x(i)-x(i,:))
end do
do i=1,Nv
do j=1,Np
if (v_x(i,j)>v_x_max(i)) then
v_x(i,j)=v_x_max(i)
end if
x(i,j)=x(i,j)+v_x(i,j)
if (x(i,j)<=x_LB(i)) then; x(i,j)=x_LB(i); end if
if (x(i,j)>=x_UB(i)) then; x(i,j)=x_UB(i); end if
end do
end do
write(1,'(1000(1x,f10.4))') x
call obfun(Nv,Np,x,f,thn)
do j=1,Np
if (f(j)<=f_best(j)) then
f_best(j)=f(j)
x_best(:,j)=x(:,j)
p_x(:,j)=x_best(:,j)
end if
end do
do j=1,Np
if (f_best(j)<=g_f) then
g_f=f_best(j)
g_x_new(:)=x_best(:,j)
end if
end do
if (abs((g_f-g_f_old)/g_f)>1d-5) then
it=0
end if
it=it+1
g_x=g_x_new
!write(*,*) 'g_x: ',g_x
if (it==d) then
w0=w0*(1.0d0-wd)
do i=1,Nv
v_x_max(i)=v_x_max(i)*(1.0d0-vd)
end do
it=0
ic=ic+1
end if
if (ic==icmax) exit
write(*,*) 'ik/g_x/g_f: ', ik, g_x, g_f
end do
call cpu_time(tpso2)
close(1)
close(2)
write(*,*) tpso1, tpso2
write(*,*)'elapsed_time (s)=',(tpso2-tpso1)
write(*,*)'g_x, g_f=',g_x, g_f
write(*,*)'ik=',ik
stop
end program pso_abrg
!------------------------------------------------------
subroutine obfun(Nv,Np,x,f,thn)
use cudafor
use parameters
implicit none
integer,intent(in)::Nv,Np,thn
real(kind=8), intent(in)::x(1:Nv,1:Np)
real(kind=8), intent(out)::f(1:Np)
integer:: psoi
real(kind=8)::op,xv(Nv,1)
!$omp parallel do &
!$omp private (xv,op)
do psoi=1,Np
xv(:,1)=x(:,psoi)
call airbear(Nv,xv,op)
f(psoi)=-op
write(*,*) 'pi,x,f: ', psoi,x(:,psoi),f(psoi)
end do
!$omp end parallel do
return
end subroutine obfun
!------------------------------------------------------
subroutine airbear(Nv,xv,op)
use cudafor
use parameters
use sor_rb_mod
implicit none
integer,intent(in)::Nv
real(kind=8),intent(in)::xv(Nv,1)
real(kind=8), intent(out)::op
real(kind=8),allocatable::p(:,:)
real(kind=8)::eps,load,phi
c=xv(1,1)
lr=xv(2,1)
diam=xv(3,1)
call constant
call recess_coord ! define the recess coordinates
allocate(p(0:nx,0:ny))
p=pa
eps=0.7d0
if (pal_i==1) then
call sor2(p,eps)
else
call sor(p,eps)
end if
call load_brg(p,load,phi)
!pave=load/(diam*b)
op=load
end subroutine airbear
!------------------------------------------------------
subroutine constant
use parameters
implicit none
pi=4.0d0*atan(1.0d0)
!nx=256
ny=nint(nx/pi)
if (mod(ny,2)/=0) then; ny=ny+1; end if
vis=179.0d-7 ! air viscosity (ns/m^2)
!c=50.0d-6 ! clearance (m)
!diam=50.0d-3 ! bearing diameter
b=50.0d-3 ! bearing width (m)
err1=1.0d-5 ! allowable errors (Inner iteration)
err2=1.0d-4 ! allowable errors (Outer iteration)
pa=101347.0d0 ! atmosphere pressure (N/m^2)
rpm=6.0d4 ! rotation speed (rpm)
u=rpm*diam*pi/60.d0 ! bearing velocity
omega=1.6 ! relaxation no.
alpha=50.0d0 ! bearing orientation angle (0<alpha<90, deg)
dxr=pi*diam/real(nx) ! x-grid size
dyr=b/real(ny) ! y-grid size
!lr=10d-3 ! recess length (m)
br=40.0d-3 ! recess width (m)
prr=4.5 ! recess pressure (atm)
return;end subroutine constant
!------------------------------------------------------
subroutine sor(p,eps)
use parameters
implicit none
real(kind=8), intent(in)::eps
real(kind=8), intent(inout)::p(0:nx,0:ny)
real(kind=8)::de(0:nx,0:ny),de_old(0:nx,0:ny),p_old(0:nx,0:ny),h(0:nx,0:ny)
real(kind=8)::h0,rjac,theta
integer::i,j,iter1,iter2,nt,thread,i1,j1,k
dx=(diam*pi/real(nx))
dy=(b/real(ny))
rjac=(cos(pi/nx)+(dx/dy)**2*cos(pi/ny))/(1.0d0+(dx/dy)**2)
!-------------- film thickness -------------------------------
do i=0,nx ; theta=360.0/real(nx)*real(i)
do j=0,ny
h(i,j)=c*(1.0d0+eps*cos(pi/180.0d0*theta))
end do
end do
!---------------------------------------------------------------
h0=minval(h)
!-----------normalization-----------
dx=dx/b
dy=dy/b
h=h/h0
p=p*(h0**2)/(vis*b*u) ! ambient pressure, kg/m**2
prr=4.5*pa*(h0**2)/(vis*b*u) ! recess pressure (atm)
dx2=dx**2; dy2=dy**2; d2x=2.0d0*dx; d2y=2.0d0*dy
!-------------------------------------
iter_in=0
iter2=0
p_norm=1.0d0
do while(p_norm > err2 .and. iter2<200) ! outer-loop stopping criteria (pressure)
iter2=iter2+1
de=0.0d0
norm=1.0d0
p_old=p
iter1=0
do i=1, nx
do j=1,ny
if (j>=y1 .and. j<=y2) then
if (x1>x2) then
if (i>=x1) p(i,j)=0
if (i<=x2) p(i,j)=0
else
if (i>=x1 .and. i<=x2) p(i,j)=0
end if
if (i>=x3 .and. i<=x4) p(i,j)=0
if (i>=x5 .and. i<=x6) p(i,j)=0
if (x8<x7) then
if (i>=x7) p(i,j)=0
if (i<=x8) p(i,j)=0
else
if (i>=x7 .and. i<=x8) p(i,j)=0
end if
end if
end do
end do
do while (norm> err1 .and. iter1<5000) ! inner-loop stopping criteria (delta)
de_old=de
do i1=1,2
do j1=1,2
do i=i1,nx,2; ip=i+1; im=i-1
if (i==nx) then; ip=1; end if
do j=j1,ny-1,2; jp=j+1; jm=j-1
h2=h(i,j)**2; h3=h(i,j)**3
b1=(h(ip,j)-h(im,j))/d2x ! dh/dx
b2=(h(i,jp)-h(i,jm))/d2y ! dh/dy
b3=(p(ip,j)-p(im,j))/d2x ! dp/dx
b4=(p(i,jp)-p(i,jm))/d2y ! dp/dy
b5=(p(ip,j)+p(im,j)-2.d0*p(i,j))/dx2 ! dp2/dx2
b6=(p(i,jp)+p(i,jm)-2.d0*p(i,j))/dy2 ! dp2/dy2
a1=(p(i,j)*h3)/(dx2)+(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x-(6.0d0*h(i,j))/d2x
a2=(p(i,j)*h3)/(dx2)-(2.0d0*h3*b3+3.0d0*h2*p(i,j)*b1)/d2x+(6.0d0*h(i,j))/d2x
a3=(p(i,j)*h3)/(dy2)+(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a4=(p(i,j)*h3)/(dy2)-(2.0d0*h3*b4+3.0d0*h2*p(i,j)*b2)/d2y
a5=(-2.0d0*p(i,j)*h3)/dx2+h3*b5+3.0d0*h2*b1*b3-(2.0d0*p(i,j)*h3)/dy2+h3*b6+3.0d0*h2*b4*b2-6.0d0*b1
a6=p(i,j)*h3*(b5+b6)+h3*(b3**2+b4**2)+3.0d0*h2*p(i,j)*(b1*b3+b2*b4)-6.0d0*h(i,j)*b3-6.0d0*p(i,j)*b1
delta=-(a1*de(ip,j)+a2*de(im,j)+a3*de(i,jp)+a4*de(i,jm)+a6)/a5-de(i,j)
de(i,j)=de(i,j)+omega*delta
if (j>=y1 .and. j<=y2) then
if (x1>x2) then
if (i>=x1) de(i,j)=prr
if (i<=x2) de(i,j)=prr
else
if (i>=x1 .and. i<=x2) de(i,j)=prr
end if
if (i>=x3 .and. i<=x4) de(i,j)=prr
if (i>=x5 .and. i<=x6) de(i,j)=prr
if (x8<x7) then
if (i>=x7) de(i,j)=prr
if (i<=x8) de(i,j)=prr
else
if (i>=x7 .and. i<=x8) de(i,j)=prr
end if
end if
end do
end do
end do
end do
iter1=iter1+1
norm=sqrt(dx*dy*sum((de-de_old)**2))
end do
iter_in=iter_in+iter1
p=p+de
p(0,:)=p(nx,:)
p_norm=sqrt(dx*dy*sum((p-p_old)**2))
end do
p=p*(vis*b*u)/h0**2
dx=dx*b
dy=dy*b
return;end subroutine sor
!---------------------------
subroutine load_brg(p,load,phi) ! get the horizontal/vertical forces (fx,fy)
use parameters
implicit none
real(kind=8), intent(in):: p(0:nx,0:ny)
real(kind=8), intent(out):: load,phi
real(kind=8):: theta,px(0:nx,0:ny),py(0:nx,0:ny),fx,fy
integer:: i,j
do j=0,ny
do i=0,nx
theta=real(i)*360.0d0/real(nx)
px(i,j)=p(i,j)*sin(pi/180.0d0*theta)
py(i,j)=-p(i,j)*cos(pi/180.0d0*theta)
end do
end do
call simps(px,fx)
call simps(py,fy)
load=sqrt(fx**2+fy**2)
phi=atan(fx/fy)*180.0d0/pi
return; end subroutine load_brg
!----------------------
subroutine simps(pn,fn) ! Simpson's integration formula
use parameters
implicit none
real(kind=8), intent(in):: pn(0:nx,0:ny)
real(kind=8), intent(out):: fn
real(kind=8):: f1,f2,s1(0:ny),sx,sy
integer:: i,j
sx=dx/3.0d0
sy=dy/3.0d0
do j=0,ny
f1=0.0d0;f2=0.0d0
do i=1,nx-1,2; f1=f1+pn(i,j); end do
do i=2,nx-1,2; f2=f2+pn(i,j); end do
s1(j)=sx*(pn(0,j)+pn(nx,j)+4.0d0*f1+2.0d0*f2)
end do
f1=0.0d0
f2=0.0d0
do j=1,ny-1,2; f1=f1+s1(j); end do
do j=2,ny-1,2; f2=f2+s1(j); end do
fn=sy*(s1(0)+s1(ny)+4.0d0*f1+2.0d0*f2)
return; end subroutine simps
!-----------------------------------------------
subroutine recess_coord
! calculate the locations of the four recesses
! alpha: the orientation angle of the hybrid journal bearing
use parameters
implicit none
n_x=nint(lr/2.0d0/dxr)
n_x1=nint(alpha*real(nx)/360.0d0)
n_x2=nint(90.0d0*real(nx)/360.0d0)
x1=n_x1-n_x
x3=x1+n_x2
if (x1 < 0) then
x1=n_x1-n_x+nx
x3=x1+n_x2-nx
end if
x2=n_x1+n_x
x4=x2+n_x2
x5=x3+n_x2
x6=x4+n_x2
x7=x5+n_x2
x8=x6+n_x2
if (x8 > nx) then
x8=x6+n_x2-nx
end if
n_y=nint(br/2.0d0/dyr)
y1=ny/2-n_y
y2=ny/2+n_y
return; end subroutine recess_coord
[/code]