Hi,
This is my code :
ATTRIBUTES(global) SUBROUTINE collapse3Dto2D ( &
kpt, ipointerStart, &
igall_size, igall, &
csm_row, csm_col, csm, &
cx_size, cx, &
cvs1_row, cvs1_col, cvs1_layer, cvs1 )
USE cudafor
IMPLICIT NONE
INTEGER :: kpt, ipointerStart
INTEGER :: igall_size, csm_row, csm_col, cx_size, cvs1_row, cvs1_col, cvs1_layer
INTEGER :: igall(igall_size)
COMPLEX*16 :: csm(csm_row, csm_col), cx(cx_size), cvs1(cvs1_row, cvs1_col, cvs1_layer)
!Working variable
INTEGER :: ipointer, ns, ipindexStart, ipindexEnd, ipindex
ipointer = ipointerStart - 1 + (blockidx%x - 1) * blockdim%x + threadidx%x
ns = (ipointer - 1) * kpt
ipindexStart = igall(ipointer)
ipindexEnd = igall(ipointer + 1) - 1
DO ipindex = ipindexStart, ipindexEnd
csm(1:2, ns+1:ns+kpt) = csm(1:2, ns+1:ns+kpt) + cvs1(1:2, 1:kpt, ipindex) * cx(ipindex) !<--- PROBLEM
ENDDO
print *, "Done update"
END SUBROUTINE collapse3Dto2D
I compiled with this: pgfortran -mp -Mcuda=cc60 -Mlarge_arrays .f
Lastly, I get this: 0: copyin Memcpy (dev=0x0x7f0732401a00, host=0x0x7ffc96b0a7cc, size=4) FAILED: 77(an illegal memory access was encountered)
I narrowed down that the problem lies in this line:
csm(1:2, ns+1:ns+kpt) = csm(1:2, ns+1:ns+kpt) + cvs1(1:2, 1:kpt, ipindex) * cx(ipindex)
All variables and array are declared as managed in the host part.
Any explanation ?
Thanks,
Phoon