I tried a simple matrix addition test. I tried compiling and got the following message:
[gschoend@exalted acctest]$ pgfortran -ta=nvidia,time -Mcuda -Minfo -mcmodel=medium test.f90
PGF90-W-0155-Accelerator region ignored; no parallel kernels found (test.f90: 33)
test:
33, No parallel kernels found, accelerator region ignored
0 inform, 1 warnings, 0 severes, 0 fatal for test
/share/apps/pgi/linux86-64/12.2/libso/libacc1.so: undefined reference to `__pgi_cu_close_noprof’
However, if I indent everything six spaces to use fixed form code, then there is no problem compiling. Does anyone know why there would be an issue with the parallel region in version 12.2? Also, I can compile and run this just fine with version 12.1 as long as I use fixed form. Free form code gives the same error as above.
! Requires mcmodel=medium to avoid exceeding 2GB limit
program test
use accel_lib
implicit double precision(a-h,o-z)
parameter (lda=10000)
integer :: i, j, t_host, t1_host, t2_host
integer :: t_d, t1_d, t2_d
double precision, dimension(lda,lda) :: a, b, c, d, e, a1, b1
! Fix a1 and b1 before continuing
! Construct a and b matrices
do i=1,lda
do j=1,lda
a(i,j)=(i+j)*0.5
b(i,j)=(i-j)*0.5
end do
end do
call acc_init( acc_device_nvidia )
call system_clock(count=t1_host)
! c=a+b
do i=1,lda
do j=1,lda
c(i,j)=a(i,j)+b(i,j)
end do
end do
call system_clock(count=t2_host)
t_host = t2_host - t1_host
call system_clock(count=t1_d)
! d=a+b
!$acc region
do i=1,lda
do j=1,lda
d(i,j)=a(i,j)+b(i,j)
end do
end do
!$acc end region
call system_clock(count=t2_d)
t_d = t2_d - t1_d
print *, t_host, ' microseconds on the host'
print *, t_d, ' microseconds on GPU'
end program
[/code]