Hi PGroup,
what is the best way to correctly run this code (two do loops) into a GPU kernel??
do j=2,nym1
jm1=j-1
jp1=j+1
do i=2,nxm1
ip1=i+1
im1=i-1
ai(im1)=-aw(i,j)
bi(im1)=ap(i,j)
ci(im1)=-ae(i,j)
if(1.eq.nxm2)then
di(im1) = su(i,j) + as(i,j)*p(i,jm1) + an(i,j)*p(i,jp1) + aw(i,j)*p(im1,j) + ae(i,j)*p(ip1,j)
else if(im1.eq.1)then
di(im1) = su(i,j) + as(i,j)*p(i,jm1) + an(i,j)*p(i,jp1) + aw(i,j)*p(im1,j)
else if(im1.eq.nxm2) then
di(im1) = su(i,j) + as(i,j)*p(i,jm1) + an(i,j)*p(i,jp1) + ae(i,j)*p(ip1,j)
else
di(im1) = su(i,j) + as(i,j)*p(i,jm1) + an(i,j)*p(i,jp1)
endif
enddo
call tridag(nxm2,ai,bi,ci,di)
do i=2,nxm1
p(i,j)=di(i-1)
enddo
enddo
here is the tridag subroutine:
c===============================================
subroutine tridag(n,a,b,c,d)
c===============================================
use Q4_globals
implicit none
integer :: n, i, im1
real(8) :: a,b,c,d
real(8) :: beta, gamma
dimension a(n),b(n),c(n),d(n)
dimension beta(nmx),gamma(nmx)
beta(1)=b(1)
gamma(1)=d(1)/beta(1)
do i=2,n
im1=i-1
beta(i)=b(i)-a(i)*c(im1)/beta(im1)
gamma(i)=(d(i)-a(i)*gamma(im1))/beta(i)
enddo
d(n)=gamma(n)
do i=n-1,1,-1
d(i)=gamma(i)-c(i)*d(i+1)/beta(i)
enddo
return
end
many thanks.
Dolf