Hi,
I have some trouble using the sqrt() function in the CUDA Fortran kernel. The kernel input is a matrix, calculate the square root of each element in the matrix, but the output matrix is all 0. Below is the kernel code:
module cudaModule
use cudafor
contains
attributes(global) subroutine test_sqrt(jdim,kdim,idim,input,output)
implicit none
integer, intent(in) :: jdim,kdim,idim
real, intent(in) :: input(jdim-1,kdim-1,idim-1)
real, intent(inout) :: output(jdim-1,kdim-1,idim-1)
integer gtidx,gtidy,gtidz
gtidx = blockDim%x * (blockIdx%x - 1) + threadIdx%x
gtidy = blockDim%y * (blockIdx%y - 1) + threadIdx%y
gtidz = blockDim%z * (blockIdx%z - 1) + threadIdx%z
if (gtidy <= idim-1 .and. gtidx <= jdim-1 .and. gtidz <= kdim-1) then
output(gtidx,gtidz,gtidy)=SQRT(input(gtidx,gtidz,gtidy))
endif
end subroutine test_sqrt
end module cudaModule
program testTwoeqn
use cudafor
use cudaModule
implicit none
integer, parameter :: jdim=17
integer, parameter :: kdim=25
integer, parameter :: idim=2
integer result,i,j,k
real :: input(jdim-1,kdim-1,idim-1),cpu_output(jdim-1,kdim-1,idim-1), &
gpu_output(jdim-1,kdim-1,idim-1)
! device data
integer, parameter :: TILE_X = 32
integer, parameter :: TILE_Y = 32
integer, parameter :: TILE_Z = 2
integer, device :: jdim_d, kdim_d, idim_d
real, device :: input_d(jdim-1,kdim-1,idim-1),output_d(jdim-1,kdim-1,idim-1)
type(dim3) :: grid, tBlock
! init
do k=1,kdim-1
do i=1,idim-1
do j=1,jdim-1
input(j,k,i)=j
cpu_output(j,k,i)=0.0
gpu_output(j,k,i)=0.0
enddo
enddo
enddo
! CPU TEST
do k=1,kdim-1
do i=1,idim-1
do j=1,jdim-1
cpu_output(j,k,i)=sqrt(input(j,k,i))
enddo
enddo
enddo
! GPU TEST
jdim_d=jdim
kdim_d=kdim
idim_d=idim
input_d=input
output_d=gpu_output
tBlock = dim3(TILE_X,TILE_Y,TILE_Z)
grid = dim3(CEILING(REAL(jdim-1)/TILE_X), &
CEILING(REAL(kdim-1)/TILE_Y), &
CEILING(REAL(idim-1)/TILE_Z))
call test_sqrt<<<grid, tBlock>>>(jdim_d,kdim_d,idim_d,input_d,output_d)
gpu_output=output_d
call compare3D(cpu_output,gpu_output,jdim-1,kdim-1,idim-1,result)
if (result == 0) then
print*, "Pass Test!"
else
print*, "Test Failed!"
endif
end program testTwoeqn
Is there anything wrong here?
Thank you for your help.