Interface block is needed for device routines

Please let me know what is wrong in the following simple code:

  attributes(global) subroutine saxpy(x,y,a)
    implicit none
    real,device :: x(:), y(:)
    real,value :: a
    integer :: i, n

    n = size(x)
    i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
    if (i <= n) y(i) = y(i) + a*x(i)

  end subroutine saxpy

program testSaxpy
  use cudafor
  implicit none
  integer, parameter :: N = 40000
  real :: x(N), y(N), result(N), a
  real, device :: x_d(N), y_d(N)
  type(dim3) :: grid, tBlock
  integer :: i

  tBlock = dim3(256,1,1)
  grid = dim3(ceiling(real(N)/tBlock%x),1,1)

  a = 2.0

  do i = 1,N
    x(i) = float(i)
    y(i) = 3.0*float(i)
  enddo

  result = a*x + y

  x_d = x
  y_d = y
  call saxpy<<<grid, tBlock>>>(x_d,y_d,a)
  y = y_d
  write(*,*) 'Max error: ', maxval(abs(y-result))
end program testSaxpy

The error message is as follows:
0: copyout Memcpy (host=0x0x62b880, dev=0x0xf03d47200, size=160000) FAILED: 77(an illegal memory access was encountered)

However, if the global subroutine is inside a module, the code works. The working code is as follows:

module test
contains
  attributes(global) subroutine saxpy(x,y,a)
    implicit none
    real,device :: x(:), y(:)
    real,value :: a
    integer :: i, n

    n = size(x)
    i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
    if (i <= n) y(i) = y(i) + a*x(i)

  end subroutine saxpy
end module test

program testSaxpy
  use test
  use cudafor
  implicit none
  integer, parameter :: N = 40000
  real :: x(N), y(N), result(N), a
  real, device :: x_d(N), y_d(N)
  type(dim3) :: grid, tBlock
  integer :: i

  tBlock = dim3(256,1,1)
  grid = dim3(ceiling(real(N)/tBlock%x),1,1)

  a = 2.0

  do i = 1,N
    x(i) = float(i)
    y(i) = 3.0*float(i)
  enddo

  result = a*x + y

  x_d = x
  y_d = y
  call saxpy<<<grid, tBlock>>>(x_d,y_d,a)
  y = y_d
  write(*,*) 'Max error: ', maxval(abs(y-result))
end program testSaxpy

Thanks for your attention!

Hi XZHU,

CUDA Fortran device routines require an F90 interface when passing assumed shape arrays. This is why it works when you have it in a module since modules create an implicit interface. Alternatively, you can add an interface block.

Hope this helps,
Mat

Thanks for the information Matt!
It is working after I add an interface block. Have a great weekend!

  attributes(global) subroutine saxpy(x,y,a)
    implicit none
    real,device :: x(:), y(:)
    real,value :: a
    integer :: i, n

    n = size(x)
    i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
    if (i <= n) y(i) = y(i) + a*x(i)

  end subroutine saxpy

program testSaxpy
  use cudafor
  implicit none

  interface
    attributes(global) subroutine saxpy(x,y,a)
    real, device :: x(:), y(:)
    real, value :: a
    end subroutine saxpy
  end interface

  integer, parameter :: N = 40000
  real :: x(N), y(N), result(N), a
  real, device :: x_d(N), y_d(N)
  type(dim3) :: grid, tBlock
  integer :: i

  tBlock = dim3(256,1,1)
  grid = dim3(ceiling(real(N)/tBlock%x),1,1)

  a = 2.0

  do i = 1,N
    x(i) = float(i)
    y(i) = 3.0*float(i)
  enddo

  result = a*x + y

  x_d = x
  y_d = y
  call saxpy<<<grid, tBlock>>>(x_d,y_d,a)
  y = y_d
  write(*,*) 'Max error: ', maxval(abs(y-result))
end program testSaxpy

thank you, i also have the same problem. Now it’s working well!