Hi Mat,
here is another strange error I have encountered when calling device subroutine.
C:\Users\Dolf\Desktop\test3>cuda-memcheck.exe --log-file err.dat test3.exe
grid 26 26 1
thread 16 16 0
gethxp1 kernel error
unspecified launch failure
4
0: DEALLOCATE: unspecified launch failure
! test3.f90
program test3
use kernels
use reyneq_Dev
use Q4_globals_Dev
use flows_Dev
use average_Dev
use filmtk_Dev
use sl_fthk_Dev
use bearing_Dev
nxmax = 402
nymax = 402
nx = nxmax
ny = nymax
threads%x = 16
threads%y = 16
allocate(hxlnewDev(nxmax,nymax),hxlDev(nxmax,nymax),hxrnewDev(nxmax,nymax),hxrDev(nxmax,nymax), &
hydnewDev(nxmax,nymax),hyunewDev(nxmax,nymax),hydDev(nxmax,nymax),hyuDev(nxmax,nymax))
allocate(xrefDev(nxmax),yrefDev(nxmax),hDev(nxmax,nymax),hnewDev(nxmax,nymax),hxyDev(nxmax,nymax))
allocate(zdatDev(1001),qndatDev(1001))
allocate(ztaDev(nxmax,nymax),etaDev(nxmax,nymax),bearxDev(nxmax,nymax),&
bearyDev(nxmax,nymax),pDev(nxmax,nymax),poldDev(nxmax,nymax))
allocate(awDev(nxmax,nymax),apDev(nxmax,nymax),aeDev(nxmax,nymax), &
arDev(nxmax,nymax),asDev(nxmax,nymax),anDev(nxmax,nymax))
allocate(ajDev(nxmax),bjDev(nxmax),cjDev(nxmax),djDev(nxmax),betaDev(nxmax),gammaDev(nxmax))
grid = dim3(ceiling(real(nx-1)/threads%x), & !nx-1
ceiling(real(ny-1)/threads%y),1) !ny-1
write(*,*)'grid',grid
write(*,*)'thread',threads
call gethxp1_kernel<<<grid,threads>>>(nx,ny,nxmax,nymax,hminDev,hx0Dev,hyDev,ylDev,hxyDev,hnewDev, &
hxlnewDev,hxrnewDev,hxlDev,hxrDev,hydnewDev,hyunewDev,hydDev,hyuDev,xrefDev,yrefDev,zdatDev,qndatDev)
istat = cudaThreadSynchronize()
if (istat .ne. 0) write(*,*) ' gethxp1 kernel error ',cudaGetErrorString(istat)
deallocate(hxlnewDev,hxlDev,hxrnewDev,hxrDev,hydnewDev,hyunewDev,hydDev,hyuDev)
deallocate(xrefDev,yrefDev,hDev,hnewDev,hxyDev,ztaDev,etaDev,bearxDev,bearyDev,pDev,poldDev)
deallocate(awDev,apDev,aeDev,arDev,asDev,anDev,ajDev,bjDev,cjDev,djDev)
istat = cudaDeviceReset()
end program test3
! kernels.f90
module kernels
contains
!======================================================
attributes (global) subroutine gethxp1_kernel(nx,ny,nxmax,nymax,hmin,hx0,hy,yl,hxy,hnew, &
hxlnew,hxrnew,hxl,hxr,hydnew,hyunew,hyd,hyu,xref,yref,zdat,qndat)
!======================================================
implicit none
integer, value :: nx,ny,nxmax,nymax
integer :: i,j
real(8) :: hnew(nxmax,nymax),hxy(nxmax,nymax),xref(nxmax),yref(nymax)
real(8) :: hyd(nxmax,nymax),hyu(nxmax,nymax),hydnew(nxmax,nymax),hyunew(nxmax,nymax)
real(8) :: hxl(nxmax,nymax),hxr(nxmax,nymax),hxlnew(nxmax,nymax),hxrnew(nxmax,nymax)
real(8) :: zdat(:),qndat(:)
real(8) :: hmin,hx0,hy,yl
real(8) :: YV1,YV2,XV1,XV2,ZX1,ZY2,ZX2,ZX,ZY,ZY1,DELY,DELX,XV,YV
i = (blockidx%x - 1) * blockDim%x + threadidx%x
j = (blockidx%y - 1) * blockDim%y + threadidx%y
!if(i <= nx) then
! if(j <= ny) then
!do i = 2,nx-1
if(i >= 2 .and. i <= nx) then
delx = (xref(i+1)-xref(i-1)) / 2.0
!do j = 2,ny-1
if(j >= 2 .and. j <= ny) then
dely=(yref(j+1)-yref(j-1)) / 2.0
xv = (xref(i-1)+xref(i))/2.0 + 0.5*delx
yv = (yref(j-1)+yref(j))/2.0
zx = hmin - hx0 * (1.0 - xv) !height due to nominal FH and pitch angle
zy = hy * (yv - 0.5*yl) !height due to roll angle
hxlnew(i,j) = hxl(i,j)+zx+zy !what are hxl and hxr???
hxrnew(i,j) = hxr(i,j)+zx+zy
if(j.eq.(ny-1)) then
!grab edge cases as well
xv=(xref(i-1)+xref(i))/2.0+0.50*delx
yv=(yref(j)+yref(j+1))/2.0
zx=hmin-hx0*(1.0-xv)
zy=hy*(yv-0.50*yl)
hxlnew(i,j+1)=hxl(i,j+1)+zx+zy
hxrnew(i,j+1)=hxr(i,j+1)+zx+zy
endif
xv=(xref(i-1)+xref(i))/2.d0
yv=(yref(j-1)+yref(j))/2.d0+0.5d0*dely
zx=hmin-hx0*(1.d0-xv)
zy=hy*(yv-0.5d0*yl)
hydnew(i,j)=hyd(i,j)+zx+zy
hyunew(i,j)=hyu(i,j)+zx+zy
if(i.eq.(nx-1)) then
!grab edge cases as well
xv=(xref(i)+xref(i+1))/2.d0
yv=(yref(j-1)+yref(j))/2.d0+0.5d0*dely
zx=hmin-hx0*(1.d0-xv)
zy=hy*(yv-0.5d0*yl)
hydnew(i+1,j)=hyd(i+1,j)+zx+zy
hyunew(i+1,j)=hyu(i+1,j)+zx+zy
endif
xv=xref(i)
yv=yref(j)
zx = hmin - hx0*(1.d0-xv)
zy = hy*(yv-0.5d0*yl)
hnew(i,j)=hxy(i,j)+zx+zy
!enddo
!enddo
endif
endif
return
end subroutine gethxp1_kernel
end module kernels
!common.f90
!****************************************
module Q4_globals_Dev
!****************************************
use cudafor
!use Q4_sizes
!use Q4_globals
implicit none
!cuda device variables
integer :: istat
type(dim3) :: grid, threads
type(cudaDeviceProp) :: prop
!slider info
!xl = x length, yl = y length, zl=**, xg = **, xt=taper Length, ht = taper angle, rebase = base recess
real(8), device :: xlDev, ylDev, zlDev, xgDev, xtDev, htDev, rebaseDev, crownDev, camberDev, twistDev
!Grid
!==========
!integer, device :: nxDev,nx1Dev,nx2Dev,nx3Dev,nx4Dev
real(8), device, allocatable, dimension(:) :: xrefDev,xref1Dev,xref2Dev,xref3Dev,xref4Dev, &
yrefDev,yref1Dev,yref2Dev,yref3Dev,yref4Dev
!real(8), device, allocatable, dimension(:,:) :: bearxDev, bearx1Dev, bearx2Dev,bearx3Dev,&
! bearx4Dev, bearyDev, beary1Dev, beary2Dev, beary3Dev, beary4Dev
!Run Setup and Initial attitude
real(8), device :: skeDev, raDev, rpmDev, u0Dev, hmDev, hx0Dev, h0Dev, hsDev, hyDev, p0xlDev
!height data (say what hnew and h are)
real(8), device, allocatable, dimension(:,:) :: hDev, h1Dev, h2Dev, h3Dev, h4Dev, hnewDev, hnew1Dev,&
hnew2Dev, hnew3Dev, hnew4Dev
!pressure
!========
real(8), device, allocatable :: pDev(:,:), p1Dev(:,:), p2Dev(:,:), p3Dev(:,:), p4Dev(:,:)
!constants
real(8), device :: corCoefDev !d0, gama, pir, pit, t1, t2, t3, t4, slip_beta, slip_gamma, accom,
!reynolds equation
!=================
real(8), device :: akmaxDev, akDev, ak0Dev, ak1Dev, ak2Dev, ak3Dev, ak4Dev
!reynolds eqn residuals
!=====================
real(8), device, allocatable, dimension(:,:) :: resDev, res1Dev, res2Dev, res3Dev, res4Dev !, &
! su01Dev, su02Dev, su03Dev, su04Dev
!various parameters needed by the reynolds equation
!many of these are set in ave_height()
real(8), device, allocatable, dimension(:,:) :: cohimxDev,cohimx1Dev,cohimx2Dev, &
cohimx3Dev,cohimx4Dev,cohjmxDev,cohjmx1Dev,cohjmx2Dev,cohjmx3Dev,cohjmx4Dev, &
himaxDev,himax1Dev,himax2Dev,himax3Dev,himax4Dev, &
himinDev,himin1Dev,himin2Dev,himin3Dev,himin4Dev, &
hjmaxDev,hjmax1Dev,hjmax2Dev,hjmax3Dev,hjmax4Dev, &
hjminDev,hjmin1Dev,hjmin2Dev,hjmin3Dev,hjmin4Dev, &
recssiDev,recssi1Dev,recssi2Dev,recssi3Dev,recssi4Dev, &
recssjDev,recssj1Dev,recssj2Dev,recssj3Dev,recssj4Dev
!FORCES
!================
real(8), device :: emaxDev, errDev, fDev, fnegDev, fposDev, fspDev, fsrDev, xfDev, yfDev, &
!xint(4), yint(4), hint(4), &
hminDev, MinFHDev, MinFHLocXDev, MinFHLocYDev, &
jacDev(3,3), rintDev(4), hintgeDev(4),hgapDev(4), &
XmomDev, YmomDev, xPosLocDev, yPosLocDev, xNegLocDev, yNegLocDev, &
fvdw_outputDev !,ZmomDev
real(8), device, allocatable, dimension (:,:) :: vdwMolecularForceMapDev
real(8), device, allocatable, dimension (:) :: xintNewDev, yintNewDev, hintNewDev
!contact and surface roughness
!=============================
real(8), device :: cpDev !(nxx,nyx)
real(8), device :: rsikDev,ctaDev,raspDev,fcrDev,txrDev,tyrDev, &
aratioDev,eyoungDev,ydstDev,ydcoeDev,pratioDev,frcoeDev,ahcDev,bhcDev,elecpotDev
!forces on the slider
real(8), device :: f0Dev, xf0Dev, yf0Dev, xfsDev, yfsDev, Pitch_StiffnessDev, Roll_StiffnessDev,&
PSADev, RSADev
logical, device :: crashDev
end module Q4_globals_Dev
!***************************************************************
module filmtk_Dev
!***************************************************************
!use Dyn_sizes_module
implicit none
real(8), device, allocatable :: hxlnewDev(:,:), hxrnewDev(:,:), hydnewDev(:,:), hyunewDev(:,:)
real(8), device :: zx1Dev,zx2Dev,zy1Dev,zy2Dev,zxDev
end module filmtk_Dev
!***************************************************************
module sl_fthk_Dev
!***************************************************************
implicit none
real(8), device, allocatable :: hxlDev(:,:), hxrDev(:,:), hydDev(:,:), hyuDev(:,:), hxyDev(:,:), &
hsadDev(:,:)
end module sl_fthk_Dev
!***************************************************************
module flows_Dev
!***************************************************************
!use Dyn_sizes_module
implicit none
real(8), device :: t1Dev, t2Dev, t3Dev, t4Dev, gamaDev, pirDev, pitDev, d0Dev
real(8), device, allocatable :: zdatDev(:), qndatDev(:)
!integer, device :: nterDev, icoeDev, iqpoDev
end module flows_Dev
!***************************************************************
module average_Dev
!***************************************************************
!use Dyn_sizes_module
implicit none
real(8), device, allocatable :: ztaDev(:,:), etaDev(:,:)
end module average_Dev
!***************************************************************
module bearing_Dev
!***************************************************************
!use Dyn_sizes_module
implicit none
real(8), device, allocatable :: bearxDev(:,:), bearyDev(:, :)
end module bearing_Dev
! added by Dolf 06/04/15
!****************************************
module reyneq_Dev
!****************************************
!use Dyn_sizes_module
implicit none
real(8), device, allocatable :: awDev(:,:),apDev(:,:),aeDev(:,:),arDev(:,:),asDev(:,:),anDev(:,:),&
aw1Dev(:,:),ap1Dev(:,:),ae1Dev(:,:),as1Dev(:,:),an1Dev(:,:),ar1Dev(:,:),dp1Dev(:,:), &
aw2Dev(:,:),ap2Dev(:,:),ae2Dev(:,:),as2Dev(:,:),an2Dev(:,:),ar2Dev(:,:),dp2Dev(:,:), &
aw3Dev(:,:),ap3Dev(:,:),ae3Dev(:,:),as3Dev(:,:),an3Dev(:,:),ar3Dev(:,:),dp3Dev(:,:), &
aw4Dev(:,:),ap4Dev(:,:),ae4Dev(:,:),as4Dev(:,:),an4Dev(:,:),ar4Dev(:,:),dp4Dev(:,:), &
poldDev(:,:),ajDev(:),bjDev(:),cjDev(:),djDev(:),betaDev(:),gammaDev(:)
real(8), device :: qnDev(1001),pnDev(1001)
real(8), device :: siDev,dtDev,omega0Dev,akdDev,aknDev
end module reyneq_Dev