I am gettting the following error and naot able to figure out the why this is happening.

Lowering Error: bad ast optype in expression [ast=7371,asttype=12,datatype=0]

Lowering Error: array lower bound is not a symbol for datatype 4585

PGF90-F-0000-Internal compiler error. Errors in Lowering 2 (…/SOURCES/create_schur.f90: 395)

PGF90/x86-64 Linux 16.10-0: compilation aborted

A part of the code is:

```
attributes(global) SUBROUTINE cu_solve_seq_one(mat,rhs,nb_arrows,sch_rhs_loc,nint,current_dir)
USE types
use cudafor
USE tri_diag_solve_mod
IMPLICIT NONE
INTEGER, value :: nb_arrows, current_dir,nint,temp1,temp2,temp3
TYPE(matrix_bloc), managed, INTENT(IN) :: mat
TYPE(vector_bloc), managed, INTENT(IN) :: rhs
REAL(KIND=8), DIMENSION(:),device ,INTENT(OUT) :: sch_rhs_loc
INTEGER ,value :: code, nb_intfce, Ndom, n, nmat, i, j,start, finish
integer :: threadId, blockId, temp4
!type(dim3) :: dimGrid2, dimBlock2
REAL(KIND=8), dimension(:),allocatable :: s
nb_intfce = Ndom + 1
blockId = blockIdx%x + blockIdx%y * gridDim%x + gridDim%x * gridDim%y * blockIdx%z
threadId = blockId * (blockDim%x * blockDim%y * blockDim%z) + (threadIdx%z * (blockDim%x * blockDim%y)) &
+ (threadIdx%y * blockDim%x) + threadIdx%x
ALLOCATE(s(nint))
!DO i = 1, nb_arrows
start = (threadId-1)*(nint+2)+2
finish = threadId*(nint+2)-1
temp1 = finish-start+1
temp2 = start-1
temp3 = finish+1
temp4 = threadId + nb_arrows
CALL cu_tri_diag_solution<<<1,temp1>>>(mat%a11,rhs%f1(start:finish),s)
sch_rhs_loc(threadId) = mat%a21(1)*s(1)-rhs%f1(temp2)
sch_rhs_loc(temp4) = mat%a21(2)*s(nint)-rhs%f1(temp3)
!END DO
DEALLOCATE(s)
end subroutine cu_solve_seq_one !THIS IS LINE 395 that the error is shown
```

and the subroutine cu_tri_diag_solution is

```
attributes(global) SUBROUTINE cu_tri_diag_solution(mat,d,x)
USE types !launch with one block, probably enough (1024 threads)
IMPLICIT NONE
TYPE(tri_diag_mat), managed, INTENT(IN) :: mat
REAL(KIND=8), DIMENSION(:),managed, INTENT(IN) :: d
REAL(KIND=8), DIMENSION(:),device, INTENT(OUT) :: x
REAL(KIND=8), DIMENSION(SIZE(mat%b)),device :: cp, dp
REAL(KIND=8) :: id
INTEGER :: n, i,ix
n = SIZE(mat%b)
IF (n==1) THEN
x(1) = d(1)/mat%b(1)
RETURN
END IF
ix = (blockIdx%x)*blockDim%x + threadIdx%x
cp(1) = mat%c(1)/mat%b(1)
dp(1) = d(1)/mat%b(1)
!DO i = 2, n-1
id = (mat%b(ix) - cp(ix-1) * mat%a(ix))
cp(ix) = mat%c(ix)/id
dp(ix) = (d(ix) - dp(ix-1) * mat%a(i))/id
!END DO
dp(n) = (d(n) - dp(n-1) * mat%a(n))/ (mat%b(n) - cp(n-1) * mat%a(n))
!= Now back substitute. =!
x(n) = dp(n)
!DO i = n - 1, 1, -1
x(ix) = dp(ix) - cp(ix) * x(ix + 1)
!END DO
END SUBROUTINE cu_tri_diag_solution
```

I am using CUDA Fortran and the PGI version is 16.10-0. Anyone has any idea how I can resolve this ? Please feel free to point out any errors you see, because I am relatively new to cuda and fortran and this is quite confusing to me.

Thanks!