how to use the derived type in kernel subroutine

I need to use derived type array in my code. So I started with the following test code and it worked:

module structure
  type mytype
    real,allocatable :: a(:)
  end type mytype

  type mytypeDevice
    real,managed,allocatable :: a(:)
  end type mytypeDevice
end module structure

program main
  use cudafor
  use structure

  type(mytype),allocatable :: T(:)

  type(mytypeDevice),allocatable,managed :: T_Device(:)

  ! For the host
  allocate(T(3))
  do i=1,3
    allocate(T(i)%a(10))
  end do
  T(1)%a=1; T(2)%a=2; T(3)%a=3
  print *, 'Host derived array is ok'

  ! For the managed derived array
  allocate(T_Device(3))
  do i=1,3
    allocate(T_Device(i)%a(10))
  end do
  do i=1,3
    T_Device(i)%a=T(i)%a
  end do
  print *, 'Managed derivd array is okay.'

end program main

However, the following code failed compiling when I tried to access the derived type array inside a kernel subroutine:

module structure
  type mytype
    real,allocatable :: a(:)
  end type mytype

  type mytypeDevice
    real,managed,allocatable :: a(:)
  end type mytypeDevice
end module structure

program main
  use cudafor
  use structure

  interface
    attributes(global) subroutine testkernel(T_device)
      use structure
      type(mytypeDevice),managed :: T_device(3)
    end subroutine testkernel
  end interface

  type(mytype),allocatable :: T(:)
  type(mytypeDevice),allocatable,managed :: T_Device(:)

  ! For the host
  allocate(T(3))
  do i=1,3
    allocate(T(i)%a(10))
  end do
  T(1)%a=1; T(2)%a=2; T(3)%a=3
  print *, 'Host derived array is ok'

  ! For the managed
  allocate(T_Device(3))
  do i=1,3
    allocate(T_Device(i)%a(10))
  end do
  do i=1,3
    T_Device(i)%a=T(i)%a
  end do
  print *, 'Managed derivd array is okay.'

  call testkernel<<<1,256>>>(T_device)

end program main

attributes(global) subroutine testkernel(T_device)
  use cudafor
  use structure
  implicit none
  type(mytypeDevice),managed :: T_device(3)
  integer i

  i = (blockidx%x-1)*blockdim%x+threadidx%x

  if(i.eq.1)write(*,*)'T_device(1)%a=',T_device(1)%a
  if(i.eq.1)write(*,*)'T_device(2)%a=',T_device(2)%a
  if(i.eq.1)write(*,*)'T_device(3)%a=',T_device(3)%a
end subroutine testkernel

The compiling error message is as follows:
PGF90-F-0155-Compiler failed to translate accelerator region (see -Minfo messages): Unexpected runtime function call (testDerived2.cuf: 1)
PGF90/x86-64 Linux 16.5-0: compilation aborted.

Please help me understand what is wrong in the code, or what is the right way to use the derived array in kernel routines. Thanks a lot!

Hi XZHU,

Your uses of the derived types is fine. The problem here is with the write statements. Printing on the device is fairly rudimentary so things like formats, strings and printing array syntax don’t work well.

Try something like the following:

% cat test3.cuf
module structure
  type mytype
    real,allocatable :: a(:)
  end type mytype

  type mytypeDevice
    real,managed,allocatable :: a(:)
  end type mytypeDevice
end module structure

program main
  use cudafor
  use structure

  interface
    attributes(global) subroutine testkernel(T_device)
      use structure
      type(mytypeDevice),managed :: T_device(3)
    end subroutine testkernel
  end interface

  type(mytype),allocatable :: T(:)
  type(mytypeDevice),allocatable,managed :: T_Device(:)
  integer rc

  ! For the host
  allocate(T(3))
  do i=1,3
    allocate(T(i)%a(10))
  end do
  T(1)%a=1; T(2)%a=2; T(3)%a=3
  print *, 'Host derived array is ok'

  ! For the managed
  allocate(T_Device(3))
  do i=1,3
    allocate(T_Device(i)%a(10))
  end do
  do i=1,3
    T_Device(i)%a=T(i)%a
  end do
  print *, 'Managed derivd array is okay.'

  call testkernel<<<1,256>>>(T_device)
  rc = cudaDeviceSynchronize()
  rc = cudaGetLastError()
  print *, cudaGetErrorString(rc)
end program main

attributes(global) subroutine testkernel(T_device)
  use cudafor
  use structure
  implicit none
  type(mytypeDevice),managed :: T_device(3)
  integer i, sze, j
  real x

  i = (blockidx%x-1)*blockdim%x+threadidx%x

  if (i.eq.1) then
      x = T_device(1)%a(1)
      print *, x
      x = T_device(2)%a(1)
      print *, x
      x = T_device(3)%a(1)
      !print *, 'T_device(3)%a=',x
      print *, x
  endif
end subroutine testkernel
% pgfortran -Mcuda=cc70 test3.cuf ; a.out
 Host derived array is ok
 Managed derivd array is okay.
   1.000000
   2.000000
   3.000000
 no error

Hope this helps,
Mat

Wow! Cuda Fortran is not that straightforward compared with the conventional Fortran.
Thanks a lot for your help Mat!

Hi Mat,

In this case, do we need to free the managed variables?

thanks.

Just like other allocatable arrays, it’s best practice to deallocate them.

Thanks!