Hi,
When I compile and run a program on my computer with GeForce 1660Ti, it works fine. Yet when I compile the same code on a remote computer with Quadro GV100 (I just changed -ta=tesla:cc75 to cc70), it gives the following error
Current file: /home/yunus/openacc/jacobi_acc.f90
function: main
line: 89
This file was compiled: -ta=tesla:cc70
Command exited with non-zero status 1
0.02user 0.00system 0:00.05elapsed 64%CPU (0avgtext+0avgdata 14184maxresident)k
0inputs+0outputs (0major+999minor)pagefaults 0swaps
make: *** [Makefile:10: jacobi_acc] Error 1
The code is
module generator
implicit none
contains
subroutine init_diag_dom_mat(A)
real*4, intent(out), dimension(:,:) :: A
integer :: i,j,nsize
real*4 :: sum, x
nsize = ubound(A,1)
do i = 1, nsize
sum = 0
do j = 1, nsize
call random_number(x)
x = mod(x, 23.0) / 1000.0
A(j,i) = x
sum = sum + x
end do
A(i,i) = A(i,i) + sum
! in order make it like identity matrix
do j = 1, nsize
A(j,i) = A(j,i) / sum
end do
end do
end subroutine
end module generator
program main
use generator
use omp_lib
implicit none
integer :: nsize, i, j, iters, max_iters, riter
real*4, allocatable :: A(:,:), b(:)
real*4, allocatable, target :: x1(:), x2(:)
real*4, pointer, contiguous :: xnew(:), xold(:), xtmp(:)
real*4 :: r, residual, rsum, dif, err, chksum
real*4, parameter :: TOLERANCE = 0.00000000000000000000000001
real*8 :: start_time, elapsed_time
nsize = 600
write(*,*) "nsize", nsize
! CONSTANTS--------------------------------------------------------
max_iters = 100000
riter = 10000000
! -----------------------------------------------------------------
allocate(A(nsize,nsize))
allocate(b(nsize), x1(nsize), x2(nsize))
! configuration of the matrix A
call init_diag_dom_mat(A)
! configuration of the vectors x1, x2, b
x1 = 0
x2 = 0
do i = 1, nsize
call random_number(r)
b(i) = mod(r, 51.0) / 100.0
end do
residual = TOLERANCE + 1.0 ! + 1.0d0 is put to meet the while condition at the first step
iters = 0
! swap these in each iteration
xnew => x1
xold => x2
start_time = omp_get_wtime()
!$acc data copyin(A(:,:), b(:)) copy(x1(:), x2(:))
do while(residual > TOLERANCE .and. iters < max_iters)
iters = iters + 1
! swap of input and output vectors
xtmp => xnew
xnew => xold
xold => xtmp
!$acc parallel loop private(rsum) async
do i = 1, nsize
rsum = 0
!$acc loop reduction(+:rsum)
do j = 1, nsize
if ( i /= j ) rsum = rsum + A(j,i) * xold(j)
end do
xnew(i) = (b(i) - rsum) / A(i,i)
end do
residual = 0
!$acc parallel loop reduction(+:residual) private(dif) async
do i = 1, nsize
dif = xnew(i) - xold(i)
residual = residual + dif * dif
end do
!$acc wait
residual = sqrt(residual)
if( mod(iters, riter) == 0) write (*,*) "Iteration", iters, ", & residual is", residual
end do
!$acc end data
elapsed_time = omp_get_wtime() - start_time
write (*,*) "Converged after ", iters, " iterations"
write (*,*) " and ", elapsed_time, " seconds"
write (*,*) " residual is ", residual
deallocate(A, b, x1, x2)
end program main
and the Makefile is
FC=nvfortran
TIMER=/usr/bin/time
OPT=
NOPT=-fast -Minfo=opt $(OPT)
jacobi_acc: jacobi_acc.o
$(TIMER) ./jacobi_acc.o $(STEPS)
jacobi_acc.o: jacobi_acc.f90
$(FC) -o $@ $< $(NOPT) -ta:tesla:cc70 -Minfo=accel -acc
clean:
rm -f *.o *.exe *.s *.mod a.out
Should I add any other command to run the code?
Thanks