Hi, I am a beginneer of CUDA fortran and I am testing the following program. The code is compiled as pgf95 -ta=nvidia sumAB.cuf and it runs but gives me the wrong results. Any suggestion? Thanks,
!----------------module for sumAB--------------------------
module m_sumAB
use cudafor
contains
!-------------kernel subroutine-----------------
attributes(global) subroutine k_sumAB(n,A,B,C)
integer :: i
integer, value :: n
real, dimension (n) :: A,B,C
i=(blockidx%x-1)*blockdim%x+threadidx%x
if (i<=n) C(i)=A(i)+B(i)
end subroutine k_sumAB
!-------------host subrotuine--------------------
subroutine h_sumAB(n,bdim,A,B,C)
implicit none
integer :: n,bdim
real, dimension (n) :: A,B,C
real, device, dimension (n) :: Adev,Bdev,Cdev
Adev=A
Bdev=B
call k_sumAB<<<n/bdim, bdim>>>(n,Adev,Bdev,Cdev)
C=Cdev
end subroutine h_sumAB
end module m_sumAB
!---------------------------end module----------------------
program sumAB
!----------------------------------------------------
!
!purpose: sum two vector A and B of n-elements
!
!----------------------------------------------------
use m_sumAB
integer i
integer :: n=1000
integer :: bdim=100
real :: times,timef,sum
real, dimension (n) :: A,B,C,D
!-----------------end declaration variable-----------
!Initialzation arrays
A=1.2
B=2.2
C=0.
D=0.
E=0.
!CPU calculation
call cpu_time(times)
do i=1,n
D(i)=A(i)+B(i)
end do
call cpu_time(timef)
print *,‘CPU time required is: ‘,timef-times,’ seconds’
!GPU calculation
call cpu_time(times)
call h_sumAB(n,bdim,A,B,C)
call cpu_time(timef)
print *,‘GPU time required is: ‘,timef-times,’ seconds’
!diff between results
sum=0.
do i=1,n
sum=sum+C(i)-D(i)
end do
print *,'Difference between results is: ',sum,C(1),D(1)
pause
end program sumAB