Hi,
I’m just getting started with CUDA C and CUDA-Fortran. I’m starting by verifying that the simple CUDA-C and CUDA-Fortran examples hosted at nvidia would and execute correctly. However while the CUDA-C example generates the expected result for the example “Max Error = 0.000000”, the CUDA-Fortran code generates an incorrect result “Max Error = 2.00000”. The links to the progams are here, and my source code is below the links. I am running an intel i7-6800k, with 2 Geforce GTX 1060. Could there be something off with my cuda-8-0 distribution? I would like to note that the same source code file compiled and executed correctly on a different machine running cuda-7-5 (amd processor, 1 nvidia 560ti)
https://devblogs.nvidia.com/parallelforall/easy-introduction-cuda-c-and-c/ and https://devblogs.nvidia.com/parallelforall/easy-introduction-cuda-fortran/
module mathOps
contains
attributes(global) subroutine saxpy(x, y, a)
implicit none
real :: x(:), y(:)
real, value :: a
integer :: i, n
n = size(x)
i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
if (i <= n) y(i) = y(i) + a*x(i)
end subroutine saxpy
end module mathOps
program testSaxpy
use mathOps
use cudafor
implicit none
integer, parameter :: N = 40000
real :: x(N), y(N), a
real, device :: x_d(N), y_d(N)
type(dim3) :: grid, tBlock
tBlock = dim3(256,1,1)
grid = dim3(ceiling(real(N)/tBlock%x),1,1)
x = 1.0; y = 2.0; a = 2.0
x_d = x
y_d = y
call saxpy<<<grid, tBlock>>>(x_d, y_d, a)
y = y_d
write(*,*) 'Max error: ', maxval(abs(y-4.0))
end program testSaxpy