ACC routine in Fortran

Hi, I have this simple Fortran code

         parameter (n=100000,m=10)
         real*8 x(n),y(n)
         real*8 A(m),B(m)
!$acc   routine(mult)
         do i=1,m
           A(i)=2.0d0
           B(i)=3.0d0
         enddo
         do i=1,n
          x(i)=0.0d0
          y(i)=1.0d0
         enddo
!$acc data copy(x) copyin(y,A,B)
!$acc parallel loop gang
         do i=1,10000,2
           im=m*(i-1)+1
           ik=m*i+1
           call mult(m,A,x(im),y(im))
           call mult(m,B,x(ik),y(ik))
         enddo
!$acc end parallel
!$acc end data
         aa=0.0d0
         do i=1,n
          aa=aa+x(i)
         enddo
         print*,aa
         stop
         end

         subroutine mult(m,A,x,y)
         real*8 A(*),x(*),y(*)
!$acc   routine seq
!$acc   loop 
         do i=1,m
             x(i)=A(i)*y(i)
         enddo
         return
         end

The subroutine’s loop is executed sequentially. In order to enable parallelization in this loop, I replaced seq with vector in the !$acc routine seq statement, but the computation result is wrong. How can I fix it ?

Manolis

Hi Manolis,

Did you add vector to the routine directive in the main program? Vector needs to be added in both spots.

% cat test.f90

          parameter (n=100000,m=10)
          real*8 x(n),y(n)
          real*8 A(m),B(m)
 !$acc   routine(mult) vector
          do i=1,m
            A(i)=2.0d0
            B(i)=3.0d0
          enddo
          do i=1,n
           x(i)=0.0d0
           y(i)=1.0d0
          enddo
 !$acc data copy(x) copyin(y,A,B)
 !$acc parallel loop gang
          do i=1,10000,2
            im=m*(i-1)+1
            ik=m*i+1
            call mult(m,A,x(im),y(im))
            call mult(m,B,x(ik),y(ik))
          enddo
 !$acc end parallel
 !$acc end data
          aa=0.0d0
          do i=1,n
           aa=aa+x(i)
          enddo
          print*,aa
          stop
          end

          subroutine mult(m,A,x,y)
          real*8 A(*),x(*),y(*)
 !$acc   routine vector
 !$acc   loop
          do i=1,m
              x(i)=A(i)*y(i)
          enddo
          return
          end
% pgfortran -acc -Minfo=accel -ta=tesla:cc60 test.f90 -V17.4; a.out
MAIN:
     14, Generating copyin(b(:))
         Generating copy(x(:))
         Generating copyin(y(:),a(:))
     15, Accelerator kernel generated
         Generating Tesla code
         16, !$acc loop gang ! blockidx%x
mult:
     32, Generating Tesla code
         36, !$acc loop vector ! threadidx%x
     36, Loop is parallelizable
    250000.0
Warning: ieee_inexact is signaling
FORTRAN STOP

-Mat

This solved the problem.
Thank you,