Openacc fortran pointer multi-dimension array

Hi Mat.
I meet some errors

FATAL ERROR: variable in data clause is partially present on the device: name=bk
file:/home/jsera.lee/lica/LICA/apply_openACC/src/mod_flowarray.f90 lhsu line:1805

this is my code

    subroutine LHSU
        USE MOD_COMMON
        USE mpi_subdomain
        USE mpi_topology
        USE mod_les
        use PaScaL_TDMA

        implicit none
        INTEGER*8     :: I,J,K
        REAL*8        :: CRE,CRE2,PRIK,PRIJ,PRII
        REAL*8, DIMENSION (:), ALLOCATABLE, target  :: Alloc1,Alloc2,Alloc3,Alloc4
        REAL*8, dimension(:,:,:), pointer :: AI,BI,CI,GI
        REAL*8, dimension(:,:,:), pointer :: AJ,BJ,CJ,GJ
        REAL*8, dimension(:,:,:), pointer :: AK,BK,CK,GK

        integer :: Ibc,Jbc,Kbc

        allocate(Alloc1((n1sub-Ibc)*n2msub*n3msub))
        allocate(Alloc2((n1sub-Ibc)*n2msub*n3msub))
        allocate(Alloc3((n1sub-Ibc)*n2msub*n3msub))
        allocate(Alloc4((n1sub-Ibc)*n2msub*n3msub))

        !$acc enter data create(Alloc1((n1sub-Ibc)*n2msub*n3msub))
        !$acc enter data create(Alloc2((n1sub-Ibc)*n2msub*n3msub))
        !$acc enter data create(Alloc3((n1sub-Ibc)*n2msub*n3msub))
        !$acc enter data create(Alloc4((n1sub-Ibc)*n2msub*n3msub))

        !$acc enter data create(AK,BK,CK,GK)

        !!Z-direction
        AK(1:n1sub-Ibc,1:n2msub,1:n3msub)=>Alloc1
        BK(1:n1sub-Ibc,1:n2msub,1:n3msub)=>Alloc2
        CK(1:n1sub-Ibc,1:n2msub,1:n3msub)=>Alloc3
        GK(1:n1sub-Ibc,1:n2msub,1:n3msub)=>Alloc4
        !!Y-direction
        AJ(1:n3msub,1:n1sub-Ibc,1:n2msub)=>Alloc4
        BJ(1:n3msub,1:n1sub-Ibc,1:n2msub)=>Alloc1
        CJ(1:n3msub,1:n1sub-Ibc,1:n2msub)=>Alloc2
        GJ(1:n3msub,1:n1sub-Ibc,1:n2msub)=>Alloc3
        !!X-direction
        AI(1:n2msub,1:n3msub,1:n1sub-Ibc)=>Alloc3
        BI(1:n2msub,1:n3msub,1:n1sub-Ibc)=>Alloc4
        CI(1:n2msub,1:n3msub,1:n1sub-Ibc)=>Alloc1
        GI(1:n2msub,1:n3msub,1:n1sub-Ibc)=>Alloc2


        !!Z-direction
        !!$acc parallel loop collapse(3) present(AK(:),BK(:),CK(:),GK(:), &
        !!$acc parallel loop collapse(3) present(AK(1:(n1sub-Ibc)*n2msub*n3msub),BK(1:(n1sub-Ibc)*n2msub*n3msub),CK(1:(n1sub-Ibc)*n2msub*n3msub),GK(1:(n1sub-Ibc)*n2msub*n3msub), &
        !!$acc parallel loop collapse(3) present(AK(1:n1sub-Ibc,1:n2msub,1:n3msub),BK(1:n1sub-Ibc,1:n2msub,1:n3msub),CK(1:n1sub-Ibc,1:n2msub,1:n3msub),GK(1:n1sub-Ibc,1:n2msub,1:n3msub), &
        !!$acc parallel loop collapse(3) present(AK(:,:,:),BK(:,:,:),CK(:,:,:),GK(:,:,:), &
        
        !$acc parallel loop collapse(3) present( &
        !$acc FIXKL_sub(:),FIXKU_sub(:),TNUY(:,:,:),RHS1(:,:,:,:),CKUV_sub(:),AKUV_sub(:))
        DO K=1,N3Msub
        DO J=1,N2Msub
        DO I=Ibc,N1Msub
            AK(I-Ibc+1,J,K)=AKUV_sub(K)*(1.+CRE*TNUY(I,J,K  )) *(1.-FIXKL_sub(K)*FLOAT(KUB))
            CK(I-Ibc+1,J,K)=CKUV_sub(K)*(1.+CRE*TNUY(I,J,K+1)) *(1.-FIXKU_sub(K)*FLOAT(KUT))
            IF (IVELSRC .EQ. 1) THEN
                BK(I-Ibc+1,J,K)=ACOEFI*(1+ACOEF*PRIK)-AK(I-Ibc+1,J,K)-CK(I-Ibc+1,J,K)
                GK(I-Ibc+1,J,K)=ACOEFI*(1+ACOEF*PRIK)*RHS1(I,J,K,1)
            ELSE
                BK(I-Ibc+1,J,K)=ACOEFI-AK(I-Ibc+1,J,K)-CK(I-Ibc+1,J,K)
                GK(I-Ibc+1,J,K)=ACOEFI*RHS1(I,J,K,1)
            ENDIF
        ENDDO
        ENDDO
        ENDDO
        !$acc end loop

        !! Y-direction

        !!! something !!!

        !! X-direction

        !!! something !!!

        !$acc exit data delete(Alloc1)
        !$acc exit data delete(Alloc2)
        !$acc exit data delete(Alloc3)
        !$acc exit data delete(Alloc4)


end subroutine LHSU

compile message:

lhsu:
   1767, Generating enter data create(alloc1(n3msub*(n2msub*(n1sub-ibc))))
   1768, Generating enter data create(alloc2(n3msub*(n2msub*(n1sub-ibc))))
   1769, Generating enter data create(alloc3(n3msub*(n2msub*(n1sub-ibc))))
   1770, Generating enter data create(alloc4(n3msub*(n2msub*(n1sub-ibc))))
   1772, Generating enter data create(ck(:,:,:),gk(:,:,:),bk(:,:,:),ak(:,:,:))
   1805, Generating present(akuv_sub(:),bk(:,:,:),fixku_sub(:),ckuv_sub(:),fixkl_sub(:),rhs1(:,:,:,:),tnuy(:,:,:),gk(:,:,:),ck(:,:,:),ak(:,:,:))
         Generating NVIDIA GPU code
       1807, !$acc loop gang, vector(128) collapse(3) ! blockidx%x threadidx%x
       1808,   ! blockidx%x threadidx%x collapsed
       1809,   ! blockidx%x threadidx%x collapsed

   1955, Generating exit data delete(alloc1(:))
   1956, Generating exit data delete(alloc2(:))
   1957, Generating exit data delete(alloc3(:))
   1958, Generating exit data delete(alloc4(:))

this is my runtime error message

   bk lives at 0xe3a8c00 size 12582912 partially present
Present table dump for device[2]: NVIDIA Tesla GPU 1, compute capability 8.0, threadid=1
host:0x899d30 device:0x7f3e5b2ffe00 size:128 presentcount:0+1 line:150 name:descriptor
host:0x899dc0 device:0x7f3e5b302c00 size:128 presentcount:0+1 line:151 name:descriptor
host:0x899e50 device:0x7f3e5b305200 size:128 presentcount:0+1 line:152 name:descriptor
host:0x899ee0 device:0x7f3e5b300600 size:128 presentcount:0+1 line:150 name:descriptor
host:0x899f70 device:0x7f3e5b303200 size:128 presentcount:0+1 line:151 name:descriptor
host:0x89a000 device:0x7f3e5b305a00 size:128 presentcount:0+1 line:152 name:descriptor


host:0x2f419b0 device:0x7f3e5b309a00 size:1024 presentcount:0+1 line:211 name:fixku_sub
host:0x2f4ddb0 device:0x7f3e5b3fa000 size:8 presentcount:0+1 line:182 name:nfc_inner_ps_sub
host:0x2f51b10 device:0x7f3e38600000 size:12582912 presentcount:0+1 line:118 name:rk3xo
host:0x3b52dd0 device:0x7f3e39200000 size:12582912 presentcount:0+1 line:119 name:rk3yo
host:0x47542a0 device:0x7f3e34000000 size:12582912 presentcount:0+1 line:120 name:rk3zo
host:0x5355980 device:0x7f3e34c00000 size:12582912 presentcount:0+1 line:121 name:rk3xoo
host:0x5f57270 device:0x7f3e32000000 size:12582912 presentcount:0+1 line:122 name:rk3yoo
host:0x6b58d70 device:0x7f3e32c00000 size:12582912 presentcount:0+1 line:123 name:rk3zoo
host:0x7758da0 device:0x7f3e5b30b600 size:101920 presentcount:0+1 line:125 name:duout
host:0x7771bf0 device:0x7f3e5b324800 size:101920 presentcount:0+1 line:125 name:dvout
host:0x778aa40 device:0x7f3e5b33da00 size:101920 presentcount:0+1 line:125 name:dwout
host:0x77a3890 device:0x7f3e5b356c00 size:98304 presentcount:0+1 line:126 name:uout
host:0x77bb8c0 device:0x7f3e5b36ee00 size:98304 presentcount:0+1 line:126 name:vout
host:0x77d38f0 device:0x7f3e5b387000 size:98304 presentcount:0+1 line:126 name:wout
host:0x77eb920 device:0x7f3e5b39f200 size:131072 presentcount:0+1 line:127 name:vtop
host:0x780b950 device:0x7f3e5b3bf400 size:131072 presentcount:0+1 line:127 name:dvtop
host:0x783da90 device:0x7f3e30e00000 size:12582912 presentcount:0+1 line:44 name:rhsps
host:0x843fbc0 device:0x7f3e2e000000 size:12582912 presentcount:0+1 line:45 name:anpso
host:0x9041f00 device:0x7f3e2ec00000 size:12582912 presentcount:0+1 line:46 name:anpsoo
host:0x9c41f30 device:0x7f3e5b3dfe00 size:98304 presentcount:0+1 line:47 name:tout
host:0x9c59f60 device:0x7f3e2f800000 size:101920 presentcount:0+1 line:47 name:dtout
host:0xa75c4a0 device:0x7f3e1c000000 size:12582912 presentcount:0+1 line:125 name:nwall_dvm_sub
host:0xb40bae0 device:0x7f3e1f106000 size:536576 presentcount:0+1 line:217 name:ifc_ps_sub
host:0xb4943c0 device:0x7f3e1f200000 size:536576 presentcount:0+1 line:217 name:jfc_ps_sub

host:0xfbb0e18 device:0x7f3e5b3ff400 size:8 presentcount:0+1 line:1769 name:alloc3
host:0x107b9248 device:0x7f3e5b3ff200 size:8 presentcount:0+1 line:1770 name:alloc4
host:0x7f3e202e2850 device:0x7f3e1ae00000 size:13249600 presentcount:0+1 line:766 name:qmass_sub
host:0x7f3e225617c0 device:0x7f3e12e00000 size:13939200 presentcount:0+1 line:1257 name:rhstmp
host:0x7f3e232b45b0 device:0x7f3e12000000 size:13939200 presentcount:0+1 line:1256 name:valuetmp1
host:0x7f3e2a0c4d80 device:0x7f3e24000000 size:13249600 presentcount:0+1 line:36 name:talph
host:0x7f3e2ad6ab70 device:0x7f3e26e00000 size:13249600 presentcount:0+1 line:32 name:tnuz
host:0x7f3e2ba10960 device:0x7f3e26000000 size:13249600 presentcount:0+1 line:31 name:tnuy
host:0x7f3e2c6b6750 device:0x7f3e28e00000 size:13249600 presentcount:0+1 line:30 name:tnux
host:0x7f3e2d35c540 device:0x7f3e28000000 size:13249600 presentcount:0+1 line:29 name:tnu
host:0x7f3e3e0ced00 device:0x7f3e30000000 size:13249600 presentcount:0+1 line:43 name:t
host:0x7f3e3ed72e90 device:0x7f3e36000000 size:39748800 presentcount:0+1 line:117 name:rhs1
host:0x7f3e4135cc80 device:0x7f3e3ae00000 size:13249600 presentcount:0+1 line:116 name:p
host:0x7f3e746b8a70 device:0x7f3e3a000000 size:13249600 presentcount:0+1 line:115 name:w
host:0x7f3e7535c860 device:0x7f3e3ce00000 size:13249600 presentcount:0+1 line:114 name:v
host:0x7f3e7c23c650 device:0x7f3e3c000000 size:13249600 presentcount:0+1 line:113 name:u
allocated block device:0x7f3e0e000000 size:13939200 thread:1
allocated block device:0x7f3e0ee00000 size:13939200 thread:1
allocated block device:0x7f3e12000000 size:13939200 thread:1
allocated block device:0x7f3e12e00000 size:13939200 thread:1
allocated block device:0x7f3e1ae00000 size:13250048 thread:1
allocated block device:0x7f3e1c000000 size:12582912 thread:1
allocated block device:0x7f3e1e000000 size:12703744 thread:1
allocated block device:0x7f3e1ee00000 size:1710592 thread:1

allocated block device:0x7f3e28e00000 size:13250048 thread:1
allocated block device:0x7f3e2e000000 size:12582912 thread:1
allocated block device:0x7f3e2ec00000 size:12582912 thread:1
allocated block device:0x7f3e2f800000 size:102400 thread:1
allocated block device:0x7f3e2f819000 size:1710592 thread:1
allocated block device:0x7f3e30000000 size:13250048 thread:1
allocated block device:0x7f3e30e00000 size:12582912 thread:1
allocated block device:0x7f3e32000000 size:12582912 thread:1
allocated block device:0x7f3e32c00000 size:12582912 thread:1
allocated block device:0x7f3e34000000 size:12582912 thread:1
allocated block device:0x7f3e34c00000 size:12582912 thread:1
allocated block device:0x7f3e36000000 size:39749120 thread:1
allocated block device:0x7f3e38600000 size:12582912 thread:1
allocated block device:0x7f3e39200000 size:12582912 thread:1
allocated block device:0x7f3e3a000000 size:13250048 thread:1
allocated block device:0x7f3e3ae00000 size:13250048 thread:1
allocated block device:0x7f3e3c000000 size:13250048 thread:1
allocated block device:0x7f3e3ce00000 size:13250048 thread:1
allocated block device:0x7f3e5b2fa000 size:1536 thread:1
allocated block device:0x7f3e5b2fa600 size:512 thread:1
allocated block device:0x7f3e5b2fa800 size:1536 thread:1

allocated block device:0x7f3e5b3ff200 size:512 thread:1
allocated block device:0x7f3e5b3ff400 size:512 thread:1
deleted block   device:0x7f3e5b3ff800 size:512 threadid=1 
deleted block   device:0x7f3e5b3ff600 size:512 threadid=1 
deleted block   device:0x7f3e5b3ffa00 size:512 threadid=1 
FATAL ERROR: variable in data clause is partially present on the device: name=bk
 file:/home/jsera.lee/lica/LICA/apply_openACC/src/mod_flowarray.f90 lhsu line:1805

--------------------------------------------------------------------------
Primary job  terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:

  Process name: [[10318,1],3]
  Exit code:    1
--------------------------------------------------------------------------

I meet “variable in data clause is partially present on the device” errors frequently.
I know why this message occured, the size is different device memory with host memory.
but in this case use fortran pointer and multi-dimension array.

I can’t find this example… I only can find 1d array pointer example or scalar case.
https://forums.developer.nvidia.com/t/openacc-fortran-pointers/135604
https://forums.developer.nvidia.com/t/openacc-fortran-pointer-in-the-structured-data-construct/176679

I try to many cases.

    !!$acc parallel loop collapse(3) present(AK(:),BK(:),CK(:),GK(:), &
    !!$acc parallel loop collapse(3) present(AK(1:(n1sub-Ibc)*n2msub*n3msub),BK(1:(n1sub-Ibc)*n2msub*n3msub),CK(1:(n1sub-Ibc)*n2msub*n3msub),GK(1:(n1sub-Ibc)*n2msub*n3msub), &
    !!$acc parallel loop collapse(3) present(AK(1:n1sub-Ibc,1:n2msub,1:n3msub),BK(1:n1sub-Ibc,1:n2msub,1:n3msub),CK(1:n1sub-Ibc,1:n2msub,1:n3msub),GK(1:n1sub-Ibc,1:n2msub,1:n3msub), &
    !!$acc parallel loop collapse(3) present(AK(:,:,:),BK(:,:,:),CK(:,:,:),GK(:,:,:), &
    !$acc parallel loop collapse(3) present( &
    !$acc FIXKL_sub(:),FIXKU_sub(:),TNUY(:,:,:),RHS1(:,:,:,:),CKUV_sub(:),AKUV_sub(:))
  1. typing the size explicit pointer full size 1d → because original Alloc1 is 1d array
  2. typing the size explicit pointer size 1d →
  3. typing the size explicit pointer size 3d → because pointer is 3d array and in loop use like this “AK(i,j,k)”
  4. typing the size explicit pointer full size 3d
  5. using implicit copy (no typing)

As you can see the 3d array size is

AK(1:n1sub-Ibc,1:n2msub,1:n3msub)=>Alloc1

and in the loop my index start at 1. It means do not accessing all size of array.
so i think i need explicitly talk to compiler array size.

Q. Should i use !$acc enter data create(AK,BK,CK,GK) ?
or only i need !$acc enter data create(Alloc1) ?

Could i ask some samples or examples about using this practical cases using openACC in fortran?
I need advanced many code samples not basic tutorials…

This is likely the issue. While we used to allow implicit sections, it wasn’t standard compliant and hence you need to use triple notation to define the range. i.e. add a “1:” or “:” before the extent. Without it, you’re only creating a single element on the device. Note that the lower bound is optional and assume to be 1 in Fortran

        !$acc enter data create(Alloc1(1:(n1sub-Ibc)*n2msub*n3msub))
        !$acc enter data create(Alloc2(1:(n1sub-Ibc)*n2msub*n3msub))
        !$acc enter data create(Alloc3(1:(n1sub-Ibc)*n2msub*n3msub))
        !$acc enter data create(Alloc4(1:(n1sub-Ibc)*n2msub*n3msub))

Alternately, you can add the flag “-gpu=implicitsections” to revert to the old behavior, the caveat being that it may not work with other compilers.

To illustrate, I pulled to together this example from your code:

test.F90

   program LHSU

        implicit none
        INTEGER*8     :: I,J,K
        REAL*8, DIMENSION (:), ALLOCATABLE, target  :: Alloc1
        REAL*8, dimension(:,:,:), pointer :: AK

        integer :: n1,n2,n3,Ibc
        n1=32
        n2=32
        n3=32
        Ibc=16

        allocate(Alloc1(n1*n2*n3))

#ifdef NO_SECTION
        !$acc enter data create(Alloc1(n1*n2*n3))
#else
        !$acc enter data create(Alloc1(1:n1*n2*n3))
#endif
        AK(1:n1-Ibc,1:n2,1:n3)=>Alloc1

        !$acc parallel loop collapse(3)
        DO K=1,n3
        DO J=1,n2
        DO I=Ibc,n1
            AK(I-Ibc+1,J,K)=1.0
        ENDDO
        ENDDO
        ENDDO

        !$acc exit data copyout(Alloc1)
        print *, Alloc1(1:10)


end program LHSU
% nvfortran test.F90 -acc -Minfo=accel ; a.out
lhsu:
     19, Generating enter data create(alloc1(1:(n2*n1)*n3))
     23, Generating NVIDIA GPU code
         24, !$acc loop gang, vector(128) collapse(3) ! blockidx%x threadidx%x
         25,   ! blockidx%x threadidx%x collapsed
         26,   ! blockidx%x threadidx%x collapsed
     23, Generating implicit copyout(ak(1:17,1:32,1:32)) [if not already present]
     32, Generating exit data copyout(alloc1(:))
    1.000000000000000         1.000000000000000         1.000000000000000
    1.000000000000000         1.000000000000000         1.000000000000000
    1.000000000000000         1.000000000000000         1.000000000000000
    1.000000000000000
% nvfortran test.F90 -acc -Minfo=accel -DNO_SECTION; a.out
lhsu:
     17, Generating enter data create(alloc1((n2*n1)*n3))
     23, Generating NVIDIA GPU code
         24, !$acc loop gang, vector(128) collapse(3) ! blockidx%x threadidx%x
         25,   ! blockidx%x threadidx%x collapsed
         26,   ! blockidx%x threadidx%x collapsed
     23, Generating implicit copyout(ak(1:17,1:32,1:32)) [if not already present]
     32, Generating exit data copyout(alloc1(:))
Present table dump for device[1]: NVIDIA Tesla GPU 0, compute capability 7.0, threadid=1
Hint: specify 0x800 bit in NV_ACC_DEBUG for verbose info.
host:0x14fad13fb018 device:0x14faa5afa000 size:8 presentcount:0+1 line:17 name:alloc1((n2*n1)*n3)
allocated block device:0x14faa5afa000 size:512 thread:1
deleted block   device:0x14faa5afa200 size:131072 threadid=1

Present table errors:
alloc1(:) lives at 0x14fad13bb020 size 262144 partially present in
host:0x14fad13fb018 device:0x14faa5afa000 size:8 presentcount:0+1 line:17 name:alloc1((n2*n1)*n3) file:/local/home/mcolgrove/test.F90
FATAL ERROR: variable in data clause is partially present on the device: name=alloc1(:)
 file:/local/home/mcolgrove/test.F90 lhsu line:32

Q. Should i use !$acc enter data create(AK,BK,CK,GK) ?
or only i need !$acc enter data create(Alloc1) ?

You only need to create Alloc1. The pointers are present since they point to Alloc which are already on the device.

Also, adding the explicit creation of the pointers will create the pointers on the device but get orphaned when the pointers are assigned to Alloc. Not too big of an issue, but It could cause other issues given these are stack variables, the stack address would be present on the device so if a different routine tries to create device data for it’s stack variables, that address may already be present.

1 Like

I really appreciate about your help.
I can solve my problems through your code example.
I need to check the result, but I can learn about using fortran pointer to openACC.

Thank you. Mat!

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.