cuda fortran

hi,
i just started learning cuda fortran.i have written my first code but not getting correct values…
here is the source code

attributes (global)subroutine matadd(a,b,c)

integer ,dimension(2,2) :: a,b,c
integer i,j
i = threadIdx%x
j = blockIdx%x

c(i+j*blockDim%x) = a(i+j*blockDim%x) + b(i+j*blockDim%x)

end subroutine matadd

program matrix
use cudafor
integer ,dimension(2,2):: m,n,p
integer, device,allocatable,dimension(:,:) :: mdev,ndev,pdev
integer i,j
type(dim3) :: dimBlock
allocate (mdev(2,2),ndev(2,2),pdev(2,2))
do i=1,2
do j=1,2
m(i,j)=1
n(i,j)=1
end do
end do

mdev=m
ndev=n

dimBlock =dim(1,1)
call matadd <<<4,dimBlock>>> (mdev, ndev, pdev)

p=pdev

do i=1,2
do j=1,2
print*,p(i,j)

!print*,n(i,j)

!print*,p(i,j)
end do
end do

deallocate (mdev(2,2),ndev(2,2),pdev(2,2))
end program matrix

i am getting these warnings…“The number of subscripts is less than the rank of c,b,a”

please help me

These lines are not consistent:

integer ,dimension(2,2) :: a,b,c

c(i+jblockDim%x) = a(i+jblockDim%x) + b(i+j*blockDim%x)

You defined a,b,c as 2x2 matrices, but you access them with only 1 index.

This will work.

attributes (global) subroutine matadd(a,b,c)

integer ,dimension(2,2) :: a,b,c

integer i,j

i = threadIdx%x

j = threadIdx%y

c(i,j) = a(i,j) + b(i,j)

end subroutine matadd

program matrix

use cudafor

integer ,dimension(2,2):: m,n,p

integer, device,allocatable,dimension(:,:) :: mdev,ndev,pdev

integer i,j

type(dim3) :: dimBlock

allocate (mdev(2,2),ndev(2,2),pdev(2,2))

do j=1,2

 do i=1,2

  m(i,j)=1

  n(i,j)=1

 end do

end do

mdev=m

ndev=n

dimBlock =dim3(2,2,1)

call matadd <<<1,dimBlock>>> (mdev, ndev, pdev)

p=pdev

do j=1,2

 do i=1,2

  print*,p(i,j)

 end do

end do

deallocate (mdev,ndev,pdev)

end program matrix