declare dim3 type using %x,%y doesn't work

Tuan · January 9, 2011, 10:14pm

It seems that CUDA Fortran code only work when I declare 2D/3D thread block using the following approach

type(dim3) :: dimGrid, dimBlock

 dimGrid = dim3( N/16, L/16, 1 )
 dimBlock = dim3( 16, 16, 1 )
 call mmul_kernel<<<dimGrid,dimBlock>>>( Adev,Bdev,Cdev,N,M,L )

I get runtime error if I use the C-like declaration

type(dim3) :: dimGrid, dimBlock

 dimGrid%x = N/16
 dimGrid%y = L/16
 dimGrid%z = 1
 dimBlock%x = 16
 dimBlock%y = 16
  dimBlock%z = 1
 call mmul_kernel<<<dimGrid,dimBlock>>>( Adev,Bdev,Cdev,N,M,L )

I think it should be okay to use either approach. Any idea?

Tuan

MatColgrove · January 10, 2011, 9:45pm

Hi Tuan,

Something else is going on since both methods work for me. Can you post a reproducer?

Go Ducks!
Mat

Example:

% cat test2.cuf 

module testme
use cudafor

contains

attributes (global) subroutine mmul_kernel(A,N,L)
use cudafor
real, dimension(:,:) :: A
integer, value :: N,L
integer :: ix,iy

ix = threadidx%x + blockdim%x*(blockidx%x-1)
iy = threadidx%y + blockdim%y*(blockidx%y-1)
if (ix.le.N.and.iy.le.L) then
   A(ix,iy) = ix*iy
endif

end subroutine

end module testme

program test
use cudafor
use testme
real, dimension(:,:), allocatable, device :: Adev
real, dimension(:,:), allocatable :: A
integer :: N,L
type(dim3) :: dimGrid, dimBlock

N=64
L=64
allocate(Adev(N,L), A(N,L))

 dimGrid%x = N/16
 dimGrid%y = L/16
 dimGrid%z = 1
 dimBlock%x = 16
 dimBlock%y = 16
 dimBlock%z = 1
call mmul_kernel<<<dimGrid,dimBlock>>>( Adev,N,L ) 
A=Adev

print *, A(1,1), A(N,L)
end program test
% pgf90 test2.cuf -o test2.out -V11.0 -fast
% test2.out
    1.000000        4096.000