Please let me know what is wrong in the following simple code:
attributes(global) subroutine saxpy(x,y,a)
implicit none
real,device :: x(:), y(:)
real,value :: a
integer :: i, n
n = size(x)
i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
if (i <= n) y(i) = y(i) + a*x(i)
end subroutine saxpy
program testSaxpy
use cudafor
implicit none
integer, parameter :: N = 40000
real :: x(N), y(N), result(N), a
real, device :: x_d(N), y_d(N)
type(dim3) :: grid, tBlock
integer :: i
tBlock = dim3(256,1,1)
grid = dim3(ceiling(real(N)/tBlock%x),1,1)
a = 2.0
do i = 1,N
x(i) = float(i)
y(i) = 3.0*float(i)
enddo
result = a*x + y
x_d = x
y_d = y
call saxpy<<<grid, tBlock>>>(x_d,y_d,a)
y = y_d
write(*,*) 'Max error: ', maxval(abs(y-result))
end program testSaxpy
The error message is as follows:
0: copyout Memcpy (host=0x0x62b880, dev=0x0xf03d47200, size=160000) FAILED: 77(an illegal memory access was encountered)
However, if the global subroutine is inside a module, the code works. The working code is as follows:
module test
contains
attributes(global) subroutine saxpy(x,y,a)
implicit none
real,device :: x(:), y(:)
real,value :: a
integer :: i, n
n = size(x)
i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
if (i <= n) y(i) = y(i) + a*x(i)
end subroutine saxpy
end module test
program testSaxpy
use test
use cudafor
implicit none
integer, parameter :: N = 40000
real :: x(N), y(N), result(N), a
real, device :: x_d(N), y_d(N)
type(dim3) :: grid, tBlock
integer :: i
tBlock = dim3(256,1,1)
grid = dim3(ceiling(real(N)/tBlock%x),1,1)
a = 2.0
do i = 1,N
x(i) = float(i)
y(i) = 3.0*float(i)
enddo
result = a*x + y
x_d = x
y_d = y
call saxpy<<<grid, tBlock>>>(x_d,y_d,a)
y = y_d
write(*,*) 'Max error: ', maxval(abs(y-result))
end program testSaxpy
Thanks for your attention!