Internal compiler error. unsupported operation 309

Hey all,

I have been producing a CUDA Fortran program that calls a device function from a global kernel. I have placed the relevant sections of code within the same fortran module, but when compiling I get the following error:


pgfortran -c -o cudacompute.o -Mcuda -r8 -i8 -pg cudacompute.f

PGF90-F-0000-Internal compiler error. unsupported operation     309 (cudacompute.f: 827)
PGF90/x86-64 Linux 10.1-0: compilation aborted

make: *** [cudacompute.o] Error 1



The relevant section of code is below. Line 827 is marked with :

c LINE 827:           #################################

Any help or suggestions would be great!

Thank you for your time,

—Chris


module inteuler_intvar
	     implicit none

      contains




c=======================================================================
c///////////////////////  SUBROUTINE INTEULER_D  \\\\\\\\\\\\\\\\\\\\\\\\\
c
      attributes(global) subroutine inteuler_d(
     &            dslice, pslice, gravity, grslice, geslice,
     &            uslice, vslice, wslice, dxi, flatten,
     &            idim, jdim, i1, i2, j1, j2, idual, eta1, eta2,
     &            isteep, iflatten, dt, gamma, ipresfree,
     &            dls, drs, pls, prs, gels, gers,
     &            uls, urs, vls, vrs, wls, wrs,
     &            ncolor, colslice, colls, colrs
     &                    )
c
c  COMPUTES LEFT AND RIGHT EULERIAN INTERFACE VALUES FOR RIEMANN SOLVER
c
c  written by: Jim Stone
c  date:       January,1991
c  modified1:  June, 1993 by Greg Bryan (changed to eulerian)
c  modified2:  July, 1994 by Greg Bryan (changed to slicewise)
c  modified3:  April, 1995 by GB (added gravity)
c  modified?:  August, 2010 by Chris Heuser (altered for use with CUDA)
c
c  PURPOSE:  Uses piecewise parabolic interpolation to compute left-
c    and right interface values to be fed into Riemann solver during a
c    one dimensional sweeps.  This version computes the Eulerian corrections
c    to the left and right states described in section three of Colella &
c    Woodward (1984), JCP.  The routine works on one two dimensional
c    slice at a time.
c
c  INPUT:
c    dslice - extracted 2d slice of the density, d
c    dt     - timestep in problem time
c    dxi    - distance between Eulerian zone edges in sweep direction
c    eta1   - (dual) selection parameter for gas energy (typically ~0.001)
c    flatten - ammount of flattening (calculated in calcdiss)
c    gamma  - ideal gas law constant
c    gravity - gravity flag (0 = off)
c    grslice - acceleration in this direction in this slice
c    i1,i2  - starting and ending addresses for dimension 1
c    idim   - declared leading dimension of slices
c    idual  - dual energy formalism flag (0 = off)
c    iflatten - integer flag for flattener (eq. A1, A2) (0 = off)
c    isteep - integer flag for steepener (eq. 1.14,1.17,3.2) (0 = off)
c    ipresfree - pressure free flag (0 = off, 1 = on, i.e. p=0)
c    j1,j2  - starting and ending addresses for dimension 2
c    jdim   - declared second dimension of slices
c    pslice - extracted 2d slice of the pressure, p
c    uslice - extracted 2d slice of the 1-velocity, u
c    vslice - extracted 2d slice of the 2-velocity, v
c    wslice - extracted 2d slice of the 3-velocity, w
c    
c  OUTPUT:
c    dl,rs  - density at left and right edges of each cell
c    pl,rs  - pressure at left and right edges of each cell
c    ul,rs  - 1-velocity at left and right edges of each cell
c    vl,rs  - 2-velocity at left and right edges of each cell
c    wl,rs  - 3-velocity at left and right edges of each cell
c
c  EXTERNALS:
c
c  LOCALS:
c
c  PARAMETERS:
c    ft     - a constant used in eq. 1.124 (=2*2/3)
c
c-----------------------------------------------------------------------
c
      implicit NONE
c
      integer ijkn
      parameter (ijkn=MAX_ANY_SINGLE_DIRECTION)
c
c-----------------------------------------------------------------------
c
c  argument declarations
c
      integer,device :: gravity, i1, i2, idim, idual, iflatten, 
     &	      ipresfree, isteep,
     &        j1, j2, jdim, ncolor

      real,device :: dt, eta1, eta2, gamma

      real,dimension(idim,jdim),device :: dslice, pslice,
     &       uslice,  vslice,  wslice,
     &      grslice, geslice

      real dxi(idim)

      real,dimension(idim,jdim),device :: dls,     drs, 
     &          flatten,
     &          pls,
     &          prs,    gels,    gers,
     &          uls,     urs,     vls,
     &          vrs,     wls,     wrs

      real,dimension(idim,jdim,ncolor),device :: colslice,   
     &        colls,
     &        colrs

c
c  local declarations
c
      integer tx, ty
      integer,device :: i, j, ic

      real,dimension(ijkn),device :: steepen,tmp1,tmp2,tmp3,tmp4
      real qa,qb,qc,qd,qe,s1,s2,f1
      real,dimension(ijkn),device :: c1, c2, c3, c4, c5, c6
     &    ,dp,pl,pr,p6
     &    ,du,ul,ur,u6
     &    ,dla,dra,pla,pra,ula,ura
     &    ,vla,vra,wla,wra
     &    ,plm,prm,ulm,urm
     &    ,dl0,dr0,pl0,pr0
     &    ,plp,prp,ulp,urp,ul0,ur0
     &    ,vl0,vr0,wl0,wr0
     &    , cs,d2d,dxb, dx2i
     &    , cm, c0, cp,char1,char2
     &    ,betalp,betalm,betal0,cla
     &    ,betarp,betarm,betar0,cra
     &    ,gela,gera,gel0,ger0
     &    ,colla(ijkn,MAX_COLOR),colra(ijkn,MAX_COLOR)
     &    ,coll0(ijkn,MAX_COLOR),colr0(ijkn,MAX_COLOR)
c
c  parameters
c
      real ft
      parameter(ft = 4.0/3.0)
c
c\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\/////////////////////////////////////
c=======================================================================
c
c     write(6,*) 'INTEULER: dt =',dt,' isteep =',isteep
c     write(6,*) 'INTEULER: iflatten =',iflatten
c     write(6,*) 'INTEULER: idim =',idim,' jdim =',jdim
c     write(6,*) 'INTEULER: i1   =',i1,  ' i2   =',i2
c     write(6,*) 'INTEULER: j1   =',j1,  ' j2   =',j2
c
c     set these details:

      tx = threadidx%x
      ty = threadidx%y
      i = blockIdx%x * 16 + tx      
      j = blockIdx%y * 16 + ty



c     Compute coefficients used in interpolation formulae (from eq. 1.6)
c
      if (i .ge. i1-2) then
          if (i .le. i2+2) then
              qa    = dxi(i)/(dxi(i-1) + dxi(i) + dxi(i+1))
              c1(i) = qa*(2.0*dxi(i-1) + dxi(i))/(dxi(i+1) + dxi(i))
              c2(i) = qa*(2.0*dxi(i+1) + dxi(i))/(dxi(i-1) + dxi(i))
          endif
      endif

c
      if (i .ge. i1-1) then
          if (i .le. i2+2) then
              qa    = dxi(i-2) + dxi(i-1) + dxi(i) + dxi(i+1)
              qb    = dxi(i-1)/(dxi(i-1) + dxi(i))
              qc    = (dxi(i-2) + dxi(i-1))/(2.0*dxi(i-1) + dxi(i  ))
              qd    = (dxi(i+1) + dxi(i  ))/(2.0*dxi(i  ) + dxi(i-1))
              qb    = qb + 2.0*dxi(i)*qb/qa*(qc-qd)
              c3(i) = 1.0 - qb
              c4(i) = qb
              c5(i) =  dxi(i  )/qa*qd
              c6(i) = -dxi(i-1)/qa*qc
              dx2i(i) = 0.5/dxi(i)
          endif
      endif
c
c    Loop over sweep lines (in this slice)
c
      if (j .ge. j1) then
      if (j .le. j2) then
c
c     Precompute steepening coefficients if needed (eqns 1.14-1.17, plus 3.2)
c
      if (isteep .ne. 0) then
         if ((i .ge. i1-2) .and. (i .le. i2+2)) then
            qa     = dxi(i-1) + dxi(i) + dxi(i+1)
            d2d(i) = (dslice(i+1,j) - dslice(i,j))/(dxi(i+1) + dxi(i))
            d2d(i) = (d2d(i) - (dslice(i,j)-dslice(i-1,j))
     &               /(dxi(i)+dxi(i-1)))/qa
            dxb(i) = 0.5*(dxi(i) + dxi(i+1))
         endif
	 if ((i .ge. i1-1) .and. (i .le. i2+1)) then
            qc = abs(dslice(i+1,j) - dslice(i-1,j))
     &           - 0.01*min(abs(dslice(i+1,j)),abs(dslice(i-1,j)))
            s1 = (d2d(i-1) - d2d(i+1))*(dxb(i-1)**3 + dxb(i)**3)
     &           /((dxb(i) + dxb(i-1))*
     &           (dslice(i+1,j) - dslice(i-1,j) + tiny))
            if (d2d(i+1)*d2d(i-1) .gt. 0.0) s1 = 0.0
            if (qc .le. 0.0) s1 = 0.0
            s2 = max(0.0, min(20.0*(s1-0.05), 1.0))
            qa = abs(dslice(i+1,j) - dslice(i-1,j))/
     &           min(dslice(i+1,j),  dslice(i-1,j))
            qb = abs(pslice(i+1,j) - pslice(i-1,j))/
     &           min(pslice(i+1,j),  pslice(i-1,j))
            if (gamma*0.1*qa .ge. qb) then
               steepen(i) = s2
            else
               steepen(i) = 0.0
            endif
         endif
      endif
      endif
      endif
c
c     Precompute left and right characteristic distances
c
      if ((i .ge. i1-1) .and. (i .le. i2+1)) then
         cs(i) = sqrt(gamma*pslice(i,j)/dslice(i,j))
         if (ipresfree .eq. 1) cs(i) = tiny
         char1(i) = max(0.0, dt*(uslice(i,j)+cs(i)))*dx2i(i)
         char2(i) = max(0.0,-dt*(uslice(i,j)-cs(i)))*dx2i(i)
      endif
c
      if ((i .ge. i1-1) .and. (i .le. i2+1)) then
        cm(i) = dt*(uslice(i,j)-cs(i))*dx2i(i)
        c0(i) = dt*(uslice(i,j)      )*dx2i(i)
        cp(i) = dt*(uslice(i,j)+cs(i))*dx2i(i)
      endif
c
c     Compute left and right states for each variable
c       (steepening, if requested, is only applied to density)
c
      call intvar_d(dslice(1,j),idim,i1,i2,isteep,steepen,iflatten, 
     &      flatten,c1,c2,c3,c4,c5,c6,char1,char2,c0,
     &      tmp1,tmp2,tmp3,tmp4,dla,dra,dl0,dr0,i)
c
c      call intvar_d(pslice(1,j), idim, i1, i2, 0     , steepen, iflatten, 
c     &            flatten, c1, c2, c3, c4, c5, c6, char1, char2, c0,
c     &            dp, pl, pr, p6, pla, pra, pl0, pr0, i)
cc
cc      call intvar_d(uslice(1,j), idim, i1, i2, 0     , steepen, iflatten, 
c     &            flatten, c1, c2, c3, c4, c5, c6, char1, char2, c0,
c     &            du, ul, ur, u6, ula, ura, ul0, ur0, i)
cc
cc      call intvar_d(vslice(1,j), idim, i1, i2, 0     , steepen, iflatten, 
c     &            flatten, c1, c2, c3, c4, c5, c6, char1, char2, c0,
c     &            tmp1, tmp2, tmp3, tmp4, vla, vra, vl0, vr0, i)
cc
cc      call intvar_d(wslice(1,j), idim, i1, i2, 0     , steepen, iflatten, 
c     &            flatten, c1, c2, c3, c4, c5, c6, char1, char2, c0,
c     &            tmp1, tmp2, tmp3, tmp4, wla, wra, wl0, wr0, i)
cc
cc      if (idual .eq. 1) then
cc        call intvar_d(geslice(1,j), idim, i1, i2, 0   , steepen, iflatten, 
c     &            flatten, c1, c2, c3, c4, c5, c6, char1, char2, c0,
c     &            tmp1, tmp2, tmp3, tmp4, gela, gera, gel0, ger0, i)
cc
cc      do ic=1,ncolor
cc        call intvar_d(colslice(1,j,ic), idim, i1,i2, 0, steepen, iflatten, 
c     &            flatten, c1, c2, c3, c4, c5, c6, char1, char2, c0,
c     &            tmp1, tmp2, tmp3, tmp4, colla(1,ic), colra(1,ic),
c     &            coll0(1,ic), colr0(1,ic), i)
cc      enddo
c
c#ifdef UNUSED
c      if ((i .ge. i1) .and. (i .le. i2+1)) then
c         if (dla(i)/dslice(i-1,j) .gt. 4.0) then
c            write(6,*) 'inteuler left:',i,j
c            write(6,*) dla(i-1),dla(i),dla(i+1)
c            write(6,*) dslice(i-1,j),dslice(i,j),dslice(i+1,j)
c            write(6,*) dra(i-1),dra(i),dra(i+1)
c            write(6,*) ula(i-1),ula(i),ula(i+1)
c            write(6,*) pla(i-1),pla(i),pla(i+1)
c            write(6,*) uslice(i-1,j),uslice(i,j),uslice(i+1,j)
c         endif
c      endif
c#endif /* UNUSED */
c
c
c Correct the initial guess from the linearized gas equations
c
c     First, compute averge over characteristic domain of dependance (3.5)
c
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
        plm(i)= pr(i-1)-cm(i-1)*(dp(i-1)-(1.0-ft*cm(i-1))*p6(i-1))
        prm(i)= pl(i  )-cm(i  )*(dp(i  )+(1.0+ft*cm(i  ))*p6(i  ))
        plp(i)= pr(i-1)-cp(i-1)*(dp(i-1)-(1.0-ft*cp(i-1))*p6(i-1))
        prp(i)= pl(i  )-cp(i  )*(dp(i  )+(1.0+ft*cp(i  ))*p6(i  ))
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
        ulm(i)= ur(i-1)-cm(i-1)*(du(i-1)-(1.0-ft*cm(i-1))*u6(i-1))
        urm(i)= ul(i  )-cm(i  )*(du(i  )+(1.0+ft*cm(i  ))*u6(i  ))
        ulp(i)= ur(i-1)-cp(i-1)*(du(i-1)-(1.0-ft*cp(i-1))*u6(i-1))
        urp(i)= ul(i  )-cp(i  )*(du(i  )+(1.0+ft*cp(i  ))*u6(i  ))
      endif
c
c     Compute correction terms (3.7)
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         cla(i) = sqrt(max(gamma*pla(i)*dla(i), 0.0))
         cra(i) = sqrt(max(gamma*pra(i)*dra(i), 0.0))
      endif
c
c     a) left side
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         f1 = 1.0/cla(i)
         betalp(i) = (ula(i)-ulp(i)) + (pla(i)-plp(i))*f1
         betalm(i) = (ula(i)-ulm(i)) - (pla(i)-plm(i))*f1
         betal0(i) = (pla(i)-pl0(i))*f1**2 + 1.0/dla(i) - 1.0/dl0(i)
      endif
c
c     Add gravity component
c      
      if (gravity .eq. 1) then
        if ((i .ge. i1) .and. (i .le. i2+1)) then
c          if (cla(i) .gt. 0.3*ula(i)) then
c          if (gamma*pla(i)/dla(i) .gt. eta2*ula(i)**2) then
           betalp(i) = betalp(i) - 0.25*dt*(grslice(i-1,j)+grslice(i,j))
           betalm(i) = betalm(i) - 0.25*dt*(grslice(i-1,j)+grslice(i,j))
c          endif
        endif
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         f1 = 0.5/cla(i)
         betalp(i) = -betalp(i)*f1
         betalm(i) = +betalm(i)*f1
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         if (cp(i-1) .le. 0) betalp(i) = 0
         if (cm(i-1) .le. 0) betalm(i) = 0
         if (c0(i-1) .le. 0) betal0(i) = 0
      endif
c
c     b) right side
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         f1 = 1.0/cra(i)
         betarp(i) = (ura(i)-urp(i)) + (pra(i)-prp(i))*f1
         betarm(i) = (ura(i)-urm(i)) - (pra(i)-prm(i))*f1
         betar0(i) = (pra(i)-pr0(i))*f1**2 + 1.0/dra(i) - 1.0/dr0(i)
      endif
c
c     Add gravity component
c      
      if (gravity .eq. 1) then
        if ((i .ge. i1) .and. (i .le. i2+1)) then
c          if (cra(i) .gt. 0.3*ura(i)) then
c           if (gamma*pra(i)/dra(i) .gt. eta2*ura(i)**2) then
           betarp(i) = betarp(i) - 0.25*dt*(grslice(i-1,j)+grslice(i,j))
           betarm(i) = betarm(i) - 0.25*dt*(grslice(i-1,j)+grslice(i,j))
c          endif
        endif
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         f1 = 0.5/cra(i)
         betarp(i) = -betarp(i)*f1
         betarm(i) = +betarm(i)*f1
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         if (cp(i) .ge. 0) betarp(i) = 0
         if (cm(i) .ge. 0) betarm(i) = 0
         if (c0(i) .ge. 0) betar0(i) = 0
      endif
c
c     Finally, combine to create corrected left/right states (eq. 3.6)
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         pls(i,j) = pla(i) + (betalp(i)+betalm(i))*cla(i)**2
         prs(i,j) = pra(i) + (betarp(i)+betarm(i))*cra(i)**2
c
         uls(i,j) = ula(i) + (betalp(i)-betalm(i))*cla(i)
         urs(i,j) = ura(i) + (betarp(i)-betarm(i))*cra(i)
c
         dls(i,j) = 1.0/(1.0/dla(i) - (betal0(i)+betalp(i)+betalm(i)))
         drs(i,j) = 1.0/(1.0/dra(i) - (betar0(i)+betarp(i)+betarm(i)))
      endif
c
c     Take the appropriate state from the advected variables
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         if (uslice(i-1,j) .le. 0.0) then
            vls(i,j)  = vla(i)
            wls(i,j)  = wla(i)
            gels(i,j) = gela(i)
         else
            vls(i,j)  = vl0(i)
            wls(i,j)  = wl0(i)
            gels(i,j) = gel0(i)
         endif
c
         if (uslice(i  ,j) .ge. 0.0) then
            vrs(i,j)  = vra(i)
            wrs(i,j)  = wra(i)
            gers(i,j) = gera(i)
         else
            vrs(i,j)  = vr0(i)
            wrs(i,j)  = wr0(i)
            gers(i,j) = ger0(i)
         endif
      endif
c
      do ic=1,ncolor
         if ((i .ge. i1) .and. (i .le. i2+1)) then
            if (uslice(i-1,j) .le. 0.0) then
               colls(i,j,ic) = colla(i,ic)
            else
               colls(i,j,ic) = coll0(i,ic)
            endif
c
            if (uslice(i  ,j) .ge. 0.0) then
               colrs(i,j,ic) = colra(i,ic)
            else
               colrs(i,j,ic) = colr0(i,ic)
            endif
         endif
      enddo
c
c  Dual energy formalism: if sound speed squared is less than eta1*v^2 
c    then discard the corrections and use pla, ula, dla.  This amounts
c     to assuming that we are outside the shocked region but the flow is
c     hypersonic so this should be true.  This is inserted because the
c     corrections are inaccurate for hypersonic flows.
c
      if (idual .eq. 1) then
         if ((i .ge. i1) .and. (i .le. i2+1)) then
c            if (gamma*pla(i)/dla(i) .lt. eta1*ula(i)**2) then
            if (gamma*pla(i)/dla(i) .lt. eta2*ula(i)**2 .or.
     &          max(abs(cm(i-1)),abs(c0(i-1)),abs(cp(i-1))) .lt. 
     &          1.0e-3
     &          .or. dls(i,j)/dla(i) .gt. 5.0) then
               pls(i,j) = pla(i)
               uls(i,j) = ula(i)
               dls(i,j) = dla(i)
            endif
c            if (gamma*pra(i)/dra(i) .lt. eta1*ura(i)**2) then
            if (gamma*pra(i)/dra(i) .lt. eta2*ura(i)**2 .or.
     &          max(abs(cm(i)),abs(c0(i)),abs(cp(i))) .lt. 1.0e-3
     &          .or. drs(i,j)/dra(i) .gt. 5.0) then
               prs(i,j) = pra(i)
               urs(i,j) = ura(i)
               drs(i,j) = dra(i)
            endif
         endif
      endif
c
c     Testing code
c
#define CORRECTION
c
#ifdef NO_CORRECTION
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         pls(i,j) = pla(i)
         uls(i,j) = ula(i)
         dls(i,j) = dla(i)
         prs(i,j) = pra(i)
         urs(i,j) = ura(i)
         drs(i,j) = dra(i)
      endif
#endif /* NO_CORRECTION */
c
c     Enforce minimum values.
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         pls(i,j) = max(pls(i,j), tiny)
         prs(i,j) = max(prs(i,j), tiny)
         dls(i,j) = max(dls(i,j), tiny)
         drs(i,j) = max(drs(i,j), tiny)
      endif
c
c     If approximating pressure free conditions, then the density should be
c       reset to the pre-corrected state.
c
      if (ipresfree .eq. 1) then
         if ((i .ge. i1) .and. (i .le. i2+1)) then
            dls(i,j) = dla(i)
            drs(i,j) = dra(i)
         endif
      endif
c
c      endif REMOVE THIS COMMENT
c      endif
c
      return
c      end subroutine inteuler_d
c      end subroutine 
c LINE 827:           #################################
      end

c=======================================================================
c///////////////////////  SUBROUTINE INTVAR_D  \\\\\\\\\\\\\\\\\\\\\\\\\\\
c
      attributes(device) subroutine intvar_d(qslice, idim, 
     &    i1, i2, isteep, 
     &	  steepen, iflatten, 
     &    flatten, c1, c2, c3, c4, c5, c6, char1, char2,
     &    c0, dq, ql, qr, q6, qla, qra, ql0, qr0, cudaI)
c
c  COMPUTES LEFT AND RIGHT EULERIAN INTERFACE VALUES FOR RIEMANN SOLVER
c
c  written by: Greg Bryan
c  date:       March, 1996
c  modified1:  August 2010 by Chris Heuser; altered for CUDA
c
c  PURPOSE:  Uses piecewise parabolic interpolation to compute left-
c    and right interface values to be fed into Riemann solver during a
c    one dimensional sweeps.  This version computes the Eulerian corrections
c    to the left and right states described in section three of Colella &
c    Woodward (1984), JCP.  The routine works on a single variable in
c    one dimension.
c
c  INPUT:
c    qslice   - one dimensional field of quantity q (one of d,e,u,v...)
c    idim     - declared dimension of 1D fields
c    i1, i2   - start and end indexes of active region
c    isteep   - steepening flag (1 = on, 0 = off); only apply to density!
c    steepen    - steepening coefficients
c    iflatten - flattening flag (1 = on, 0 = off)
c    flatten  - flattening coefficients
c    c1-6     - precomputed grid coefficients
c    char1,2  - characteristic distances for +/- waves (for average)
c    c0       - characteristic distance (for lagrangean cell face)
c    dq, ql, qr, q6 - 1D field temporaries
c    cudaI    - i value to be used (for cuda)
c    
c  OUTPUT:
c    qla, qra - left and right state values (from char1,2)
c    ql0, qr0 - left and right state values (from c0)
c
c  EXTERNALS:
c
c  LOCALS:
c
c  PARAMETERS:
c    ft     - a constant used in eq. 1.124 (=2*2/3)
c
c-----------------------------------------------------------------------
c


      integer ijkn
      parameter (ijkn=MAX_ANY_SINGLE_DIRECTION)
c
c  argument declarations
c
      integer,device :: idim, i1, i2, iflatten, isteep
      integer,device :: cudaI
      real,dimension(idim),device :: c1, c2, c3, c4, c5, c6,
     &     char1, char2, c0,
     &     qla, qra, ql0, qr0
      real,dimension(ijkn),device :: qslice, steepen, flatten
c
c  parameters
c
      real ft
      parameter(ft = 4.0/3.0)
c
c  local declarations (arrays passed as temps)
c
      integer i

      real qplus, qmnus, temp1, temp2, temp3, temp4, temp22, temp23
      real dq(idim), ql(idim), qr(idim), q6(idim)
c
c\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\/////////////////////////////////////
c=======================================================================
c
c     Compute average linear slopes (eqn 1.7)
c      Monotonize (eqn 1.8)
c
      i = cudaI

      if ((i .ge. i1-2) .and. (i .le. i2+2)) then
         qplus = qslice(i+1)-qslice(i  )
         qmnus = qslice(i  )-qslice(i-1)
         dq(i) = c1(i)*qplus + c2(i)*qmnus
         temp1 = min(abs(dq(i)), 2.0*abs(qmnus), 2.0*abs(qplus))
         if (qplus*qmnus .gt. 0) then
            dq(i) = temp1*sign(1.0, dq(i))
         else
            dq(i) = 0.0
         endif
      endif
c     
c     Construct left and right values (eqn 1.6)
c
      if ((i .ge. i1-1) .and. (i .le. i2+2)) then
         ql(i) = c3(i)*qslice(i-1) + c4(i)*qslice(i) +
     &           c5(i)*    dq(i-1)   + c6(i)*dq(i)
         qr(i-1) = ql(i)
      endif
c
c     Steepen if asked for (use precomputed steepening parameter)
c
      if (isteep .ne. 0) then
	 if ((i .ge. i1-1) .and. (i .le. i2+1)) then
            ql(i) = (1.0-steepen(i))*ql(i) + 
     &              steepen(i)*(qslice(i-1)+0.5*dq(i-1))
            qr(i) = (1.0-steepen(i))*qr(i) + 
     &              steepen(i)*(qslice(i+1)-0.5*dq(i+1))
         endif
      endif
c
c     Monotonize again (eqn 1.10)
c
      if ((i .ge. i1-1) .and. (i .le. i2+1)) then
         temp1 = (qr(i)-qslice(i))*(qslice(i)-ql(i))
         temp2 = qr(i)-ql(i)
         temp3 = 6.0*(qslice(i)-0.5*(qr(i)+ql(i)))
         if (temp1 .le. 0.0) then
            ql(i) = qslice(i)
            qr(i) = qslice(i)
         endif
         temp22 = temp2**2
         temp23 = temp2*temp3
         if (temp22 .lt. temp23)
     &        ql(i) = 3.0*qslice(i) - 2.0*qr(i) 
         if (temp22 .lt. -temp23)
     &        qr(i) = 3.0*qslice(i) - 2.0*ql(i) 
      endif
c
c     If requested, flatten slopes with flatteners calculated in calcdiss (4.1)
c
      if (iflatten .ne. 0) then
	 if ((i .ge. i1-1) .and. (i .le. i2+1)) then
            ql(i) = qslice(i)*flatten(i) + ql(i)*(1.0-flatten(i))
            qr(i) = qslice(i)*flatten(i) + qr(i)*(1.0-flatten(i))
         endif
      endif
c
c    Now construct left and right interface values (eqn 1.12 and 3.3)
c
      if ((i .ge. i1-1) .and. (i .le. i2+1)) then
         q6(i) = 6.0*(qslice(i)-0.5*(ql(i)+qr(i)))
         dq(i) = qr(i) - ql(i)
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
        qla(i)= qr(i-1)-char1(i-1)*(dq(i-1)-(1.0-ft*char1(i-1))*q6(i-1))
        qra(i)= ql(i  )+char2(i  )*(dq(i  )+(1.0-ft*char2(i  ))*q6(i  ))
      endif
c
      if ((i .ge. i1) .and. (i .le. i2+1)) then
         ql0(i) = qr(i-1)-c0(i-1)*(dq(i-1)-(1.0-ft*c0(i-1))*q6(i-1))
         qr0(i) = ql(i  )-c0(i  )*(dq(i  )+(1.0+ft*c0(i  ))*q6(i  ))
      endif
c
c
      return
      end subroutine intvar_d

      


      end module inteuler_intvar

Hi Rob,

The early CUDA Fortran compilers has a number of issues like this where some non-GPU code would be added to the kernel or we weren’t catching errors. While I don’t know the exact cause of this error, it does appear to have been fixed in the 10.3 release.

% pgf90 -c rob.f -Mcuda -Mpreprocess -V10.1 -DMAX_COLOR=8 -DMAX_ANY_SINGLE_DIRECTION=8 -i8 -r8 -pg
PGF90-F-0000-Internal compiler error. unsupported operation 309 (rob.f: 503)
PGF90/x86-64 Linux 10.1-0: compilation aborte
% pgf90 -c rob.f -Mcuda -Mpreprocess -V10.2 -DMAX_COLOR=8 -DMAX_ANY_SINGLE_DIRECTION=8 -i8 -r8 -pg
PGF90-F-0000-Internal compiler error. unsupported operation 309 (rob.f: 503)
PGF90/x86-64 Linux 10.2-1: compilation aborted
% pgf90 -c rob.f -Mcuda -Mpreprocess -V10.3 -DMAX_COLOR=8 -DMAX_ANY_SINGLE_DIRECTION=8 -i8 -r8 -pg
%

  • Mat