Using Streams

I’m trying to get a hand at using streams. Below is the code i’ve written. If proper, the code should print 2. Can somebody help find what is wrong?

Thanks.

module addnum
  use cudafor
    contains
    
  attributes(global) subroutine addnum_kernel( zn1, nz2, zn3 )
    integer, device :: zn1, nz2, zn3
    zn3 = zn1 + zn2
  end subroutine addnum_kernel


  subroutine plladdnum( zconst, zthreadidx, znewval )
    integer :: zconst, zthreadidx, znewval
    integer, device :: zdconst, zdthreadidx, zdnewval
 !   integer ( kind = cuda_stream_kind ) :: streamnum
    integer:: streamnum
    istat = cudaStreamCreate ( streamnum )

    call addnum_kernel <<< 1,1,0,streamnum >>> (zdconst, zdthreadidx, zdnewval)
    znewval = zdnewval

  end subroutine plladdnum

end module addnum

program hccode
  use addnum
  !     Program to test a heterogenous compute code using MPI an CUDA Fortran
  !     Each MPI thread copies a number to device, the device adds a thread specific number to it, the number is returned to host, the host prints it.
  integer :: zconst, zthreadidx, znewval
  integer :: idevice, istat

  idevice = 0
  zconst = 1
  zthreadidx = 1

  istat = cudaSetDevice(idevice)  
  print*,'calling plladdnum'
  call plladdnum( zconst, zthreadidx, znewval)

  print*,'znewval',znewval
end program hccode

Hi iamaditya,

There are two issues here. First, you either need to assign the device variables, zdconst and zdthreadidx, before passing them to the kernel, or pass in the host variables by value. (Passing by value is preferred here). Secondly, you have a typo in your kernel where use the variable “zn2” but declared it as “nz2”. Here the fixed code:

% cat test.cuf 
module addnum
  use cudafor
    contains
   
  attributes(global) subroutine addnum_kernel( zn1, zn2, zn3 )
    integer, value :: zn1, zn2
    integer, device :: zn3
    zn3 = zn1 + zn2
  end subroutine addnum_kernel


  subroutine plladdnum( zconst, zthreadidx, znewval )
    integer :: zconst, zthreadidx, znewval
    integer, device :: zdnewval
 !   integer ( kind = cuda_stream_kind ) :: streamnum
    integer:: streamnum
    istat = cudaStreamCreate ( streamnum )

    call addnum_kernel <<< 1,1,0,streamnum >>> (zconst, zthreadidx, zdnewval)
    znewval = zdnewval

  end subroutine plladdnum

end module addnum

program hccode
  use addnum
  !     Program to test a heterogenous compute code using MPI an CUDA Fortran
  !     Each MPI thread copies a number to device, the device adds a thread specific number to it, the number is returned to host, the host prints it.
  integer :: zconst, zthreadidx, znewval
  integer :: idevice, istat

  idevice = 0
  zconst = 1
  zthreadidx = 1

  istat = cudaSetDevice(idevice) 
  print*,'calling plladdnum'
  call plladdnum( zconst, zthreadidx, znewval)

  print*,'znewval',znewval
end program hccode 

% pgf90 test.cuf
% a.out
 calling plladdnum
 znewval            2

Hope this helps,
Mat

Yes.
Thanks.