function inline problem

Hi all,

I used PGI13.10 for parallel,and I have met some questions about function inline.The following is my code,please give some advices.



subroutine Contact_Detection_Cgrid()
    use contact_control_mod, only : get_contact_default_parameters
    use element_mod, only : get_total_number_dem, obtain_DEM_kinematic, &
                            get_dem_radius, add_element_contactid
    use public_mod
  
      integer,parameter::m=40
      integer i,j,g,k,r,w,t,n_box,n_particle,ni,nj
      integer nb
	  integer n1,n2,n3
	  integer,allocatable::A(:)
	  real,allocatable::X(:)
	  real,allocatable::Y(:)
	  real,allocatable::Z(:)
	  real,allocatable::B(:)
	  real,allocatable::C(:)
	  real,allocatable::D(:)
      real xi,yi,zi,xj,yj,zj
	  real boundx,boundy,boundz,mboundx,mboundy,mboundz
	  integer :: EDID
      real( kind = RK ) :: Pos(3)
      real( kind = RK ) :: Dis(3)
      real( kind = RK ) :: Vel(3)
      real( kind = RK ) :: Acc(3)
      real( kind = RK ) :: UnitE(3),UnitE1(3),UnitE2(3),UnitE3(3)
      real( kind = RK ) :: RVel(3)
      real( kind = RK ) :: RAcc(3)
	  real( kind = RK )::vector
	  real( kind = RK ), allocatable ::ELE(:,:)
	  real distant,distance
	  call allocate_variables() 
	  n1=boundary%nx                
      n2=boundary%ny
      n3=boundary%nz
	  n_box=n1*n2*n3
      n_particle=get_total_number_dem()  
	
      allocate(A(m))     
      allocate(X(n1))   
      allocate(Y(n2))     
      allocate(Z(n3))
      allocate(B(n1))          
      allocate(C(n2))           
      allocate(D(n3))   
	  allocate(ELE(4,n_particle))  
!$acc kernels
      do r=1,n1
          boundx=(r-1)*boundary%DcellX+boundary%xmin
          mboundx=r*boundary%DcellX+boundary%xmin
          X(r)=boundx
          B(r)=mboundx
      end do
!$acc end kernels
!$acc kernels
	  do w=1,n2
	      boundy=(w-1)*boundary%DcellY+boundary%ymin
	      mboundy=w*boundary%DcellY+boundary%ymin
	      Y(w)=boundy
	      C(w)=mboundy
      end do
!$acc end kernels
!$acc kernels
      do k=1,n3
	      boundz=(k-1)*boundary%DcellZ+boundary%xmin
	      mboundz=k*boundary%DcellZ+boundary%xmin
	      Z(k)=boundz
	      D(k)=mboundz
      end do      
!$acc end kernels

	  do i=1,n_particle
	      call obtain_DEM_kinematic(i,Pos,Dis,Vel,Acc, UnitE, UnitE1,UnitE2,UnitE3,RVel, RAcc ) 
		  ELE(1,i)=Pos(1)
		  ELE(2,i)=Pos(2)
		  ELE(3,i)=Pos(3)
		  ELE(4,i)=get_dem_radius(i)
      end do                     
	   
!$acc data present_or_copyin(ELE,X,Y,Z,B,C,D,A)
!$acc kernels                                
      do k=1,n3

        do w=1,n2
          do r=1,n1
              nb=0

!$acc do private(nb)
            
			 do t=1,n_particle  
			    if((ELE(1,t)-ELE(4,t)<=X(r) .and. X(r)<=ELE(1,t)+ELE(4,t)) &
                      &.or. (ELE(1,t)-ELE(4,t)<=B(r) .and. B(r)<=ELE(1,t)+ELE(4,t))) then
                  if((ELE(2,t)-ELE(4,t)<=Y(w) .and. Y(w)<=ELE(2,t)+ELE(4,t)) &
                        &.or. (ELE(2,t)-ELE(4,t)<=C(w) .and. C(w)<=ELE(2,t)+ELE(4,t)))then
                    if((ELE(3,t)-ELE(4,t)<=Z(k) .and. Z(k)<=ELE(3,t)+ELE(4,t)) &
                          &.or. (ELE(3,t)-ELE(4,t)<=D(k) .and. D(k)<=ELE(3,t)+ELE(4,t)))then 
              	     nb=nb+1
              	     A(nb)=t
                  end if
                end if
              end if
            end do 

!$acc do private(i)
			do i=1,nb-1
               ni=A(i)
			   xi=ELE(1,ni)
			   yi=ELE(2,ni)
			   zi=ELE(3,ni)
               do j=i+1,nb
                  nj=A(j)
				  xj=ELE(1,nj)
				  yj=ELE(2,nj)
				  zj=ELE(3,nj)
                 ! if( ni==0 .or. nj==0 ) exit
               	 ! if((xi<X(r) .and. xj<X(r)) .or. (yi<Y(w) .and. yj<Y(w)) .or. (zi<Z(k) .and. zj<Z(k))) exit
               	  distance=0.0  
               	  distant=0.0	  
				  do g=1,3
				    vector=ELE(g,ni)-ELE(g,nj)
				    distance=distance+vector*vector
				  end do
				  distant=distance-(ELE(4,ni)+ELE(4,nj))*1.1
				  if( distant<0 ) then                                     
                     if( ni < nj ) then 
                        call add_element_contactID(ni, nj)
                     else
                        call add_element_contactID(nj, ni)
                     end if
                  end if
               end do
            end do 
          end do           
        end do 
      end do 
!$acc end kernels
!$acc end data
  return
end subroutine Contact_Detection_Cgrid



subroutine add_element_contactID(ni, nj)
  !
  ! Function:
  !
  !  Add element ID, NJ, to the list of contactID of NI
  !
   implicit none
   integer, intent(in) :: ni, nj
   integer :: total_dem
   character(len = MNL) :: msg, msg1, msg2, cni, cnj

   total_dem = get_total_number_dem()

   if( ni > total_dem .or. ni < 0 ) goto 555
   if( nj > total_dem .or. nj < 0 ) goto 555

   if( .not. associated( dem(ni) % p_ContactID ) ) then
     allocate( dem(ni) % ContactID )
     dem(ni) % contactID % DEID = nj
     dem(ni) % p_ContactID => dem(ni) % ContactID
     return
   end if


   allocate( dem(ni) % p_ContactID % next )
   dem(ni) % p_ContactID => dem(ni) % p_ContactID % next
   dem(ni) % p_ContactID % DEID = nj

   return
  555 continue
   write(cni, *) ni;    cni = adjustl(cni)
   write(cnj, *) nj;    cnj = adjustl(cnj)
   msg = 'Subprogram Exception. In ADD_ELEMENT_CONTACTID.'
   msg1 = 'Discrete element ID is greater than the total number of DEM.'
   msg2 = 'Discrete Element ID: '//trim(cni)//'    Discrete Element ID: '//trim(cnj)
   call ErrorMSG( msg, msg1, msg2 )
   return
  end subroutine add_element_contactID

The error is

F:\zmz\code\Cgrid_mod.f90(191) : warning W0155 : Accelerator region ignored; see -Minfo messages 
contact_detection_cgrid:
    157, Generating present_or_copyout(x(1:n1))
         Generating present_or_copyout(b(1:n1))
         Generating NVIDIA code
         Generating compute capability 1.3 binary
         Generating compute capability 2.0 binary
         Generating compute capability 3.0 binary
    158, Loop is parallelizable
         Accelerator kernel generated
        158, !$acc loop gang, vector(128) ! blockidx%x threadidx%x
    165, Generating present_or_copyout(y(1:n2))
         Generating present_or_copyout(c(1:n2))
         Generating NVIDIA code
         Generating compute capability 1.3 binary
         Generating compute capability 2.0 binary
         Generating compute capability 3.0 binary
    166, Loop is parallelizable
         Accelerator kernel generated
        166, !$acc loop gang, vector(128) ! blockidx%x threadidx%x
    173, Generating present_or_copyout(z(1:n3))
         Generating present_or_copyout(d(1:n3))
         Generating NVIDIA code
         Generating compute capability 1.3 binary
         Generating compute capability 2.0 binary
         Generating compute capability 3.0 binary
    174, Loop is parallelizable
         Accelerator kernel generated
        174, !$acc loop gang, vector(128) ! blockidx%x threadidx%x
    190, Generating present_or_copyin(a(:))
         Generating present_or_copyin(d(:))
         Generating present_or_copyin(c(:))
         Generating present_or_copyin(b(:))
         Generating present_or_copyin(z(:))
         Generating present_or_copyin(y(:))
         Generating present_or_copyin(x(:))
         Generating present_or_copyin(ele(:,:))
    191, Accelerator region ignored
    220, Accelerator restriction: function/procedure calls are not supported
    236, Accelerator restriction: unsupported call to 'add_element_contactid'
  0 inform,   1 warnings,   0 severes, 0 fatal for contact_detection_cgrid
..\code\contact_mod.f90
..\code\KeyWord_mod.f90
..\code\complete_mod.f90
..\code\information_mod.f90
..\code\read_keyword_mod.f90
..\code\restart_mod.f90
..\code\program main.f90
F:\zmz\code\program main.f90(49) : error F0000 : Internal compiler error. readin_func:can't scan 1st line    1333
parale2 build failed.

-bigwbxu

Hi bigwbxu,

Sorry but I’m unclear as to the specific question but will attempt to answer in a more general way.

In OpenACC compute regions, calls to subroutines must either be inlined or decorated with the OpenACC “routine” directive. Note that “routine” was first added to the 14.1/14.2 releases so since you have 13.10, you’ll need to inline this routine or update your compiler.

If “add_element_contactid” is in the same module file as “contact_detection_cgrid”, then try adding the flag “-Minline”. If they are in separate files, then you must either use IPA, -Mipa=inline, or first perform and extraction pass over (-Mextract) your files before using -Minline. (See chapter4 of the PGI User’s Guide for details on Inlining http://www.pgroup.com/doc/pgiug.pdf).

For the “routine” directive, you would simply add “!$acc routine seq” in the definition of “add_element_contactid”. However, “add_element_contactid” does need an interface (either explicit or implicitly defined as part of the module). There’s a few other restrictions such as for Radeon targets “routine” can only be used within the same source file (Telsa can be used across source files), we currently don’t allow structs to be returned from functions, and the “bind” clause isn’t supported yet. Please refer the most recent release notes for more details.

  • Mat