Hi,
I need to create a copy of an array on the device. The following code stops with an error
program Bug
use openacc
implicit none
Integer N, i, Asize
Parameter(N=4096)
Parameter(Asize=N*8)
Real*8 A(N)
Real*8 Anew(N)
do i=1,N
A(i) = 17
enddo
666 format(I)
write(*, 666) sizeof(A)
!$acc data copyin(A) create(Anew)
c call acc_memcpy_device(Anew, A, sizeof(A))
CALL acc_memcpy_device(acc_deviceptr(Anew), acc_deviceptr(A),
& Asize)
!$acc kernels
do i=2,N-1
Anew(i) = Anew(i) - 0.5 * (A(i-1) - 2*A(i) + A(i+1))
enddo
!$acc end kernels
!$acc end data
end program
$ pgfortran --version
pgfortran 18.10-1 64-bit target on x86-64 Linux -tp haswell
PGI Compilers and Tools
Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
$pgfortran -acc -ta=tesla,cc50 Bug.F
$PGI_ACC_DEBUG=1 ./a.out
32768
ACC: detected 1 CUDA devices
cuda_initdev thread:0 data.default_device_num:0 pdata.cuda.default_device_num:0
ACC: device[1] is NVIDIA CUDA device 0 compute capability 5.0
ACC: initialized 1 CUDA devices
ACC: device[2] is PGI native
pinitialize (threadid=1)
cuda_init_device thread:1 data.default_device_num:1 pdata.cuda.default_device_num:1
cuda_init_device(threadid=1, device 0) dindex=1, api_context=(nil)
cuda_init_device(threadid=1, device 0) dindex=1, setting api_context=(nil)
cuda_init_device(threadid=1, device 0) dindex=1, new api_context=0x1f39380
argument memory for queue 16 device:0x701b40000 host:0x203860000
curr_devid for threadid=1 is 1
pgi_uacc_dataenterstart( file=/home/alexander/Bachelorarbeit/Bug.F, function=bug, line=1:1, line=17, devid=0,threadid=1 )
cuda_init_device thread:1 data.default_device_num:1 pdata.cuda.default_device_num:1
cuda_init_device(threadid=1, device 0) dindex=1, api_context=0x1f39380
pgi_uacc_dataon(hostptr=0x60bce0,stride=1,size=4096,eltsize=8,lineno=17,name=a,flags=0x700=present+create+copyin,async=-1,threadid=1)
cuda_shared( 0x60bce0 ) is not-managed
pgi_uacc_alloc(size=32768,devid=1,threadid=1)
allocate device memory 0x701c40000(32768B,threadid=1)
pgi_uacc_alloc(size=32768,devid=1,threadid=1) returns 0x701c40000
map dev:0x701c40000 host:0x60bce0 dindex:1 size:32768 offset:0 (line:17 name:a) thread:1
alloc done with devptr at 0x701c40000 (threadid=1)
pgi_uacc_dataupx(devptr=0x701c40000,hostptr=0x60bce0,stride=1,size=4096,eltsize=8,lineno=17,name=a,async=-1,threadid=1)
pgi_uacc_cuda_dataup1(devdst=0x701c40000,hostsrc=0x60bce0,offset=0,stride=1,size=4096,eltsize=8,lineno=17,name=a,threadid=1)
pgi_uacc_dataon(hostptr=0x603ce0,stride=1,size=4096,eltsize=8,lineno=17,name=anew,flags=0x300=present+create,async=-1,threadid=1)
cuda_shared( 0x603ce0 ) is not-managed
pgi_uacc_alloc(size=32768,devid=1,threadid=1)
allocate device memory 0x701c48000(32768B,threadid=1)
pgi_uacc_alloc(size=32768,devid=1,threadid=1) returns 0x701c48000
map dev:0x701c48000 host:0x603ce0 dindex:1 size:32768 offset:0 (line:17 name:anew) thread:1
alloc done with devptr at 0x701c48000 (threadid=1)
pgi_uacc_dataenterdone(devid=1,threadid=1)
pgi_uacc_cuda_wait(lineno=-99,async=-1,dindex=1,threadid=1)
pgi_uacc_cuda_wait(sync on stream=0x228c9d0,threadid=1)
pgi_uacc_cuda_wait done (threadid=1)
cuda_shared( 0x603ce0 ) is not-managed (cached)
cuda_shared( 0x60bce0 ) is not-managed (cached)
pgi_uacc_cuda_memcpy(devdst=0x701c48000,devsrc=0x701c40000,bytes=6306356,async=-1,dindex=1)
call to cuMemcpyDtoDAsync returned error 1: Invalid value
The pointers are correct but the size is too large.
-Alex