When I was trying to compute 1D stencil with cuda fortran(using share memory), I got a illegal memory error. I have checked the program for a long time, but can not find the error.
`module reverse_m
implicit none
integer, device :: n_d
contains
attributes(global) subroutine staticReverse(a, d)
real :: d(:), a(:), result
integer :: t, tr, t1, ra = 3, j
real, shared :: s(-ra+1:64 + ra)
!real, shared :: s(64 + 2*ra)
t = (blockIdx%x-1)*blockDim%x + threadIdx%x
!write(*,*) 't:', t, ra
if (t > ra .and. t < size(a) - ra) then
t1 = threadIdx%x
!write(*,*) 't:', t1, a(t)
s(t1) = a(t)
write(*,*) 's(t1)', t1, s(t1)
if (t1 <= ra) then
s(t1 - ra) = a(t - ra)
endif
if (t1 >= 64 - ra) then
s(t1 + ra) = a(t + ra)
endif
endif
call syncthreads()
if (t > ra .and. t < size(a) - ra) then
write(*,*) t,t1
result = 0
do j = -ra, ra
result = result + s(t1 + j)
enddo
d(t) = result
write(*,*) 'size(d):', size(d)
endif
end subroutine staticReverse
end module reverse_m
program sharedExample
use cudafor
use reverse_m
implicit none
integer, parameter :: n = 64, ra = 3
real :: a(n), r(n), d(n)
real, device :: d_a(n), d_d(n)
type(dim3) :: grid, tBlock
integer :: i, sizeInBytes, j
tBlock = dim3(64,1,1)
grid = dim3(1,1,1)
do i = 1, n
a(i) = i
enddo
do i = 1 + ra, n - ra
do j = -ra, ra
d(i) = d(i) + a(i - j)
enddo
enddo
sizeInBytes = sizeof(a(1))*tBlock%x
! run version with static shared memory
d_a = a
write(,) ‘Size(a):’, size(a)
call staticReverse<<<grid,tBlock>>>(d_a, d_d)
r = d_d
write(,) ‘Static case max error:’, maxval(abs(r-d))
if (maxval(abs(r-d)) .gt. 1.0e-7) then
write(,) “Test Failed”
else
write(,) “Test Passed”
endif
end program sharedExample
`
When I was trying to print ‘s’ in the kernel function, It print nothing. It is weird. Can you help me find the error? thank you!