How to debug this error?
when I use openacc for speed up and I only added some simple directed sentence,it occured this mistack. I set the PGI_ACC_DEBUG=1, it shows the following information.
pgi_uacc_dataenterstart( file=/home/0401/cfl3dv6/build/cfl/libs/diagj.F, function=diagj, line=1:1, line=78, devid=0 )
pgi_uacc_dataon(hostptr=0x7f6dad10a9b0,stride=1,121,2178,size=121x1x1,extent=121x18x30,eltsize=8,lineno=78,name=dtj,flags=0x2700=present+create+copyin+inexact,async=-1,threadid=1)
pgi_uacc_alloc(size=968,devid=1,threadid=1)
allocate device memory 0x7f6d8ae00000(1024B)
pgi_uacc_alloc(size=968,devid=1,threadid=1) returns 0x7f6d8ae00000
map dev:0x7f6d8ae00000 host:0x7f6dad10a9b0 dindex:1 size:968 offset:0 (line:78 name:dtj) thread:1
alloc done with devptr at 0x7f6d8ae00000
pgi_uacc_dataupx(devptr=0x7f6d8ae00000,hostptr=0x7f6dad10a9b0,stride=1,size=121,extent=65340,eltsize=8,lineno=78,name=dtj,async=-1,threadid=1)
pgi_uacc_cuda_dataup1(devdst=0x7f6d8ae00000,hostsrc=0x7f6dad10a9b0,offset=0,stride=1,size=121,eltsize=8,lineno=78,name=dtj,thread=1)
pgi_uacc_dataon(hostptr=0x7f6dac570230,stride=1,121,2178,67518,size=121x1x1x5,extent=121x18x31x5,eltsize=8,lineno=78,name=q,flags=0x2700=present+create+copyin+inexact,async=-1,threadid=1)
pgi_uacc_alloc(size=2161544,devid=1,threadid=1)
allocate device memory 0x7f6d8b000000(2161664B)
pgi_uacc_alloc(size=2161544,devid=1,threadid=1) returns 0x7f6d8b000000
map dev:0x7f6d8b000000 host:0x7f6dac570230 dindex:1 size:2161544 offset:0 (line:78 name:q) thread:1
alloc done with devptr at 0x7f6d8b000000
pgi_uacc_dataupx(devptr=0x7f6d8b000000,hostptr=0x7f6dac570230,stride=1,67518,size=121x5,extent=67518x5,eltsize=8,lineno=78,name=q,async=-1,threadid=1)
pgi_uacc_cuda_dataup2(devdst=0x7f6d8b000000,hostsrc=0x7f6dac570230,offset=0,0,stride=1,67518,size=121,5,eltsize=8,lineno=78,name=q)
pgi_uacc_dataon(hostptr=0x7f6db2f09e78,offset=0,5,stride=1,63162,size=63162x25,extent=63162x35,eltsize=8,lineno=78,name=t,flags=0x2f00=present+create+copyin+copyout+inexact,async=-1,threadid=1)
pgi_uacc_alloc(size=12632400,devid=1,threadid=1)
allocate device memory 0x7f6d8e400000(12632576B)
pgi_uacc_alloc(size=12632400,devid=1,threadid=1) returns 0x7f6d8e400000
map dev:0x7f6d8e400000 host:0x7f6db3172b88 dindex:1 size:12632400 offset:0 (line:78 name:t) thread:1
alloc done with devptr at 0x7f6d8e1972f0
pgi_uacc_dataupx(devptr=0x7f6d8e1972f0,hostptr=0x7f6db2f09e78,offset=315810,stride=1,size=1579050,extent=2210670,eltsize=8,lineno=78,name=t,async=-1,threadid=1)
pgi_uacc_cuda_dataup1(devdst=0x7f6d8e1972f0,hostsrc=0x7f6db2f09e78,offset=315810,stride=1,size=1579050,eltsize=8,lineno=78,name=t,thread=1)
pgi_uacc_dataon(hostptr=0x7f6db2b5d0e8,stride=1,121,2178,65340,size=121x1x1x5,extent=121x18x30x5,eltsize=8,lineno=78,name=res,flags=0x2700=present+create+copyin+inexact,async=-1,threadid=1)
pgi_uacc_alloc(size=2091848,devid=1,threadid=1)
allocate device memory 0x7f6d8b400000(2092032B)
pgi_uacc_alloc(size=2091848,devid=1,threadid=1) returns 0x7f6d8b400000
map dev:0x7f6d8b400000 host:0x7f6db2b5d0e8 dindex:1 size:2091848 offset:0 (line:78 name:res) thread:1
alloc done with devptr at 0x7f6d8b400000
pgi_uacc_dataupx(devptr=0x7f6d8b400000,hostptr=0x7f6db2b5d0e8,stride=1,65340,size=121x5,extent=65340x5,eltsize=8,lineno=78,name=res,async=-1,threadid=1)
pgi_uacc_cuda_dataup2(devdst=0x7f6d8b400000,hostsrc=0x7f6db2b5d0e8,offset=0,0,stride=1,65340,size=121,5,eltsize=8,lineno=78,name=res)
pgi_uacc_dataon(hostptr=0x7f6dad08afd0,stride=1,121,2178,size=121x1x1,extent=121x18x30,eltsize=8,lineno=78,name=vol,flags=0x2700=present+create+copyin+inexact,async=-1,threadid=1)
pgi_uacc_alloc(size=968,devid=1,threadid=1)
allocate device memory 0x7f6d8ae00400(1024B)
pgi_uacc_alloc(size=968,devid=1,threadid=1) returns 0x7f6d8ae00400
map dev:0x7f6d8ae00400 host:0x7f6dad08afd0 dindex:1 size:968 offset:0 (line:78 name:vol) thread:1
alloc done with devptr at 0x7f6d8ae00400
pgi_uacc_dataupx(devptr=0x7f6d8ae00400,hostptr=0x7f6dad08afd0,stride=1,size=121,extent=65340,eltsize=8,lineno=78,name=vol,async=-1,threadid=1)
pgi_uacc_cuda_dataup1(devdst=0x7f6d8ae00400,hostsrc=0x7f6dad08afd0,offset=0,stride=1,size=121,eltsize=8,lineno=78,name=vol,thread=1)
pgi_uacc_dataenterdone( devid=1 )
pgi_uacc_cuda_wait(lineno=-99,async=-1,dindex=1)
pgi_uacc_cuda_wait(sync on stream=0x29b4290)
pgi_uacc_cuda_wait done
pgi_uacc_computestart( file=/home/xll/take_test/cfl3d_xll_0401/cfl3dv6/build/cfl/libs/diagj.F, function=diagj, line=1:1, line=78, devid=0, computeconstruct=9999 )
pgi_uacc_launch funcnum=0 argptr=0x7ffe8733df20 sizeargs=(nil) async=-1 devid=1
Arguments to function 0 diagj_79_gpu dindex=1 threadid=1 device=0:
522 522 522 522 120 -121 -121 -121
-121 121 121 121 121 32621 -1958739968 32621
0x0000020a 0x0000020a 0x0000020a 0x0000020a 0x00000078 0xffffff87 0xffffff87 0xffffff87
0xffffff87 0x00000079 0x00000079 0x00000079 0x00000079 0x00007f6d 0x8b400000 0x00007f6d
Launch configuration for function=0=diagj_79_gpu line=79 dindex=1 threadid=1 device=0 <<<(1,1,1),(32,1,1),0>>> async=-1
pgi_uacc_computedone( devid=0, computeconstruct=9999 )
pgi_uacc_cuda_wait(lineno=-99,async=-1,dindex=1)
pgi_uacc_cuda_wait(sync on stream=0x29b4290)
call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
call to cuMemFreeHost returned error 700: Illegal address during kernel execution
the code I changes is as follows:
kv = npl*kdim
!$acc kernels loop
do 1009 j=1,jdim1
kj = (j-1)*kv+1
do 1004 l=1,5
c
jj = 1-jdim
do 8466 ii=1,kv
jj = jj+jdim
8466 t(kj+ii-1,25+l) = -res(j+jj-1,1,i,l)
c call q8vgathp(kv,res(j,1,i,l),jdim,kv,kv,t(kj,25+l))
c
jj = 1-jdim
do 8467 ii=1,kv
jj = jj+jdim
8467 t(kj+ii-1,l+5) = q(j+jj-1,1,i,l)
c call q8vgathp(kv,q(j,1,i,l),jdim,kv,kv,t(kj,l+15))
1004 continue
c
jj = 1-jdim
do 8458 ii=1,kv
jj = jj+jdim
8458 t(kj+ii-1,21) = tfacp1*dtj(j+jj-1,1,i)
c call q8vgathp(kv,dtj(j,1,i),jdim,kv,kv,t(kj,21))
if(ivisc(2) .gt. 0) then
jj=1-jdim
do 9458 ii=1,kv
jj=jj+jdim
9458 t(kj+ii-1,12)=vol(j+jj-1,1,i)
end if
1009 continue
!$acc end kernels
c
!$acc kernels loop
do 1119 j=1,jdim
kj = (j-1)*kv+1
do 1119 l=1,5
c
jj = 1-jdim
do 8459 ii=1,kv
jj = jj+jdim
8459 t(kj+ii-1,15+l) = sj(j+jj-1,1,i,l)
c call q8vgathp(kv,sj(j,1,i,l),jdim,kv,kv,t(kj,5+l))
1119 continue
!$acc end kernels
if(ivisc(2) .gt. 1) then
ic=0
!$acc kernels loop
do 8558 ipl=1,npl
ii=i+ipl-1
do 8558 k=1,kdim
ic=ic+1
if(k .ne. kdim) then
do 1118 j=1,jdim1
kj=(j-1)*kv
t(kj+ic,31)=vist3d(j,k,ii)
1118 continue
else
do 1120 j=1,jdim1
kj=(j-1)*kv
t(kj+ic,31)=vist3d(j,kdim1,ii)
1120 continue
end if
8558 continue
!$acc end kernels
end if
It is very confusing