Problem with if statment in acc region

Hi,

I am getting some wrong values after a kernel in a fortran code with acc directives. It seems to be related with if statments. I have been able to reproduce the behaviour in a reduced example:


module compute
  implicit none
  CONTAINS

    SUBROUTINE compute_gpu(N,ke,ntur,&
         tke,fr_land,tfm,tfh,dz_0a_m,dz_0a_h,vel_2d,z0m_2d,l_tur_z0, lo_ice, &
         gz0, z0d_2d,tkvm,tkvh,tcm,tch)
      REAL*8, INTENT(IN) :: tke(N,ke+1,ntur),fr_land(N),tfm(N),tfh(N), &
           dz_0a_m(N),dz_0a_h(N),vel_2d(N),z0m_2d(N),l_tur_z0(N)   
      REAL*8, INTENT(INOUT) :: gz0(N), z0d_2d(N),tkvm(N,ke+1),tkvh(N,ke+1), &
           tcm(N),tch(N)
      LOGICAL, INTENT(IN) :: lo_ice(N)
      INTEGER*4, INTENT(IN) :: N,ke,ntur    
      INTEGER*4 :: ip,ke1
      REAL*8 :: grav, z0_ice, len_min, alpha0, z10, z0m_dia,z1d2
      REAL*8 :: fakt,velh,wert,zgz0
      !$acc reflected(tke,fr_land,tfm,tfh,dz_0a_m,dz_0a_h,vel_2d,z0m_2d,l_tur_z0, lo_ice)
      !$acc reflected(gz0, z0d_2d,tkvm,tkvh,tcm,tch)

      grav=9.8
      z0_ice=1e-3
      len_min=1e-8
      alpha0=1.23e-2
      z10=10.0
      z0m_dia=0.2
      z1d2=0.5
      ke1=ke+1
            
      !$acc region 
      DO ip = 1, N
         fakt=l_tur_z0(ip)*tke(ip,ke1,ntur)
         tkvm(ip,ke1)=fakt*tkvm(ip,ke1)
         tkvh(ip,ke1)=fakt*tkvh(ip,ke1)
         !--------------------------------
         tcm(ip)=tkvm(ip,ke1)*tfm(ip)/(dz_0a_m(ip)*vel_2d(ip))
         tch(ip)=tkvh(ip,ke1)*tfh(ip)/(dz_0a_h(ip)*vel_2d(ip))
         !---------------------------------
         IF (fr_land(ip).LT.z1d2) THEN
            !-------------------------
            IF ( lo_ice(ip) ) THEN
               !----------------------
               gz0(ip)=grav*z0_ice
            ELSE
               velh=(tke(ip,ke1,ntur)+tke(ip,ke,ntur))*z1d2
               wert=tcm(ip)*vel_2d(ip)*SQRT(vel_2d(ip)**2+velh**2)
               gz0(ip)=MAX( grav*len_min, alpha0*wert )
            END IF
            !--------------------
            z0d_2d(ip)=z0m_2d(ip)
         ELSE
            z0d_2d(ip)=MIN( z10, z0m_dia )
         END IF
      ENDDO
      !$acc end region 

    END SUBROUTINE compute_gpu

end module compute


program main
  USE compute
  implicit none
  integer*4 :: N,ke,ntur,ke1 
  real*8, allocatable ::  tke(:,:,:),fr_land(:),tfm(:),tfh(:), &
           dz_0a_m(:),dz_0a_h(:),vel_2d(:),z0m_2d(:), &
           gz0(:), z0d_2d(:),tkvm(:,:),tkvh(:,:), &
           tcm(:),tch(:),l_tur_z0(:)
  LOGICAL, ALLOCATABLE :: lo_ice(:)

  N=4E3
  ke=2
  ke1=ke+1
  ntur=2

  allocate(tke(N,ke+1,ntur),fr_land(N),tfm(N),tfh(N))
  allocate(dz_0a_m(N),dz_0a_h(N),vel_2d(N),z0m_2d(N))
  allocate(gz0(N), z0d_2d(N),tkvm(N,ke+1),tkvh(N,ke+1))
  allocate(tcm(N),tch(N),lo_ice(N),l_tur_z0(N))

!----------------------------------
!initit values
l_tur_z0(:)=0.06
tke(:,:,:) =0.47
tke(:,ke1,ntur)=0.55
tkvm(:,:)=0.39
tkvh(:,:)=0.49
tfm(:)=1.0
tfh(:)=0.61
dz_0a_m(:)=0.607
dz_0a_h(:)=0.606
vel_2d(:)=2.19
z0m_2d(:)=0.15
gz0(:)=1.48

fr_land(:)=1.0D0
fr_land(N/2-N/4:N/2+N/4)=0.0D0
lo_ice(:)=.FALSE.
lo_ice(N/8-N/4:N/2+N/8)=.TRUE.

!$acc data region local(tke,fr_land,tfm,tfh,dz_0a_m,dz_0a_h,vel_2d,z0m_2d,l_tur_z0, lo_ice) &
!$acc local(gz0, z0d_2d,tkvm,tkvh,tcm,tch)

!$acc update device(tke,fr_land,tfm,tfh,dz_0a_m,dz_0a_h,vel_2d,z0m_2d,l_tur_z0, lo_ice)
!$acc update device(gz0, z0d_2d,tkvm,tkvh,lo_ice)

call compute_gpu(N,ke,ntur,&
         tke,fr_land,tfm,tfh,dz_0a_m,dz_0a_h,vel_2d,z0m_2d,l_tur_z0, lo_ice, &
         gz0, z0d_2d,tkvm,tkvh,tcm,tch)

!$acc update host(gz0, z0d_2d,tkvm,tkvh,tcm,tch)
!$acc end data region

print*, 'Mean values'
print*, 'gz0', sum(gz0)/N
print*, 'z0d_2d', sum(z0d_2d)/N
print*, 'tkvm', sum(tkvm)/N
print*, 'tkvh', sum(tkvh)/N
print*, 'tcm', sum(tcm)/N
print*, 'tch', sum(tch)/N

print*, 'Some gz0 values:'
print*, 'gz0(1)', gz0(1)
print*, 'gz0(N/2)', gz0(N/2)
print*, 'gz0(N)', gz0(N)

end program main

Here are the results when compiling for the CPU:

pgf90 -o test_gz0 test_gz0.f90
./test_gz0
Mean values
gz0 0.7433807619914817
z0d_2d 0.1749875044710934
tkvm 0.7928699709091234
tkvh 0.9961700193771321
tcm 9.6815684024946901E-003
tch 7.4322980680181670E-003
Some gz0 values:
gz0(1) 1.480000019073486
gz0(N/2) 9.8000006562098951E-003
gz0(N) 1.480000019073486

On the GPU one gets:

pgf90 -ta=nvidia -o test_gz0 test_gz0.f90
./test_gz0
Mean values
gz0 1.8594320466921980E+019
z0d_2d 0.1749875044710934
tkvm 0.7928699709091234
tkvh 0.9961700193771321
tcm 9.6815684024946901E-003
tch 7.4322980680181679E-003
Some gz0 values:
gz0(1) 7.8125018339407840E-003
gz0(N/2) 9.8000006562098951E-003
gz0(N) 7.8125018480932340E-003

One sees that mean(gz0) is different, when running on the GPU or on the CPU.

Looking at the details of this array it seems that the wrong values correspond to indices for which nothing is done at the IF statment in the GPU kernel:

         IF (fr_land(ip).LT.z1d2) THEN

It seems that the problem is related to the combination of the if statment and gz0 being assigned to a register (I am getting the Using register for ‘gz0’ message).

Indeed if I replace gz0(ip) with some scalar zgz0:

        IF (fr_land(ip).LT.z1d2) THEN
            !-------------------------
            IF ( lo_ice(ip) ) THEN
               !----------------------
               !gz0(ip)=grav*z0_ice
               zgz0=grav*z0_ice
            ELSE
               velh=(tke(ip,ke1,ntur)+tke(ip,ke,ntur))*z1d2
               wert=tcm(ip)*vel_2d(ip)*SQRT(vel_2d(ip)**2+velh**2)
               zgz0=MAX( grav*len_min, alpha0*wert )
               !gz0(ip)=MAX( grav*len_min, alpha0*wert )
            END IF
            gz0(ip)=zgz0
            !--------------------
            z0d_2d(ip)=z0m_2d(ip)
         ELSE
            z0d_2d(ip)=MIN( z10, z0m_dia )
         END IF

the GPU results are then correct:

pgf90 -ta=nvidia -o test_gz0 test_gz0.f90
./test_gz0
Mean values
gz0 0.7433807619914817
z0d_2d 0.1749875044710934
tkvm 0.7928699709091234
tkvh 0.9961700193771321
tcm 9.6815684024946901E-003
tch 7.4322980680181679E-003
Some gz0 values:
gz0(1) 1.480000019073486
gz0(N/2) 9.8000006562098951E-003
gz0(N) 1.480000019073486


Thanks for your help,

Xavier


PS :1) I am using PGI 11.9
2) I did send this code last week (01.11.2011) to trs@pgroup.com, but I didn’t get any response so far and I don’t know if someone already had a look to it. I thought I would take a chance on the forum.[/b]

Hi Xavier,

What’s happening is that by putting gz0 in a register, all elements of gz0 are being updated, not just those in the true section of the first IF-statement. I have submitted a problem report (TPR#18264) and sent it on to our engineers.

  1. I did send this code last week (01.11.2011) to > trs@pgroup.com> , but I didn’t get any response so far and I don’t know if someone already had a look to it. I thought I would take a chance on the forum.

Sorry about that. Dave had sent your code on to engineering for investigation but hadn’t gotten a response yet. He should have sent you an note as to the status.

Best Regards,
Mat