Hello,
we are experiencing serious issues when trying to use some Fortran features in CUDA Fortran kernels.
(1) Defining kinds in pre-processing defines cannot be applied to literal constants
(2) Automatic arrays declared with sizes passed as arguments unpredictably fail at runtime depending on the size of theirselves or of other variables
We attach a reproducing example below.
define RTYPE 8
module kernels_test
use cudafor
contains
attributes(global) subroutine mykernel(nx, ny, ng, v)
integer, value :: nx, ny, ng
real(RTYPE), device :: v(1-ng:nx+ng, 1-ng:ny+ng)! [1] It works
real(RTYPE) :: v_auto(-1:62)! [1] It does NOT work (at run-time)
!real(RTYPE) :: v_auto(1-ng:nx+ng)!real(RTYPE) :: v_auto(-1:62)
!real(RTYPE) :: v_auto(10,10,10,10)
integer, parameter :: rtypepar = RTYPE
i = blockDim%x*(blockIdx%x-1) + threadIdx%x
j = blockDim%y*(blockIdx%y-1) + threadIdx%y
if(i<=nx .and. j<=ny) then
v_auto = 20.! [2] It works
v(i,j) = 1000._rtypepar + v_auto(1)! [2] It does NOT work (at compilation time)
!v(i,j) = 1000._RTYPE + v_auto(1)
endif
endsubroutine mykernel
endmodule kernels_testmodule const
use cudafor
integer :: nx, ny, ng, iermpi
real(RTYPE), allocatable, dimension(:,:) :: v, v_gpu
attributes(device) :: v_gpu
endmodule constprogram main
use const
use cudafor
use kernels_test
type(dim3) :: dimGrid, dimBlock
iermpi = cudaSetDevice(0)
nx = 60
ny = 400
ng = 2
allocate(v(1-ng:nx+ng,1-ng:ny+ng))
allocate(v_gpu(1-ng:nx+ng, 1-ng:ny+ng))
call random_number(v)
!print*,'before: ',v
v_gpu = vdimBlock = dim3(16,16,1)
dimGrid = dim3((nx+dimBlock%x-1)/(dimBlock%x), (ny+dimBlock%y-1)/(dimBlock%y), 1)
call mykernel<<<dimGrid, dimBlock>>>( nx, ny, ng, v_gpu)
iercuda = cudaDeviceSynchronize()
print*,'cuda error: ',cudaGetErrorString(iercuda)
v = v_gpu
!print*,'after: ',v
endprogram main
Thanks
Francesco