I’m trying to get a hand at using streams. Below is the code i’ve written. If proper, the code should print 2. Can somebody help find what is wrong?
Thanks.
module addnum
use cudafor
contains
attributes(global) subroutine addnum_kernel( zn1, nz2, zn3 )
integer, device :: zn1, nz2, zn3
zn3 = zn1 + zn2
end subroutine addnum_kernel
subroutine plladdnum( zconst, zthreadidx, znewval )
integer :: zconst, zthreadidx, znewval
integer, device :: zdconst, zdthreadidx, zdnewval
! integer ( kind = cuda_stream_kind ) :: streamnum
integer:: streamnum
istat = cudaStreamCreate ( streamnum )
call addnum_kernel <<< 1,1,0,streamnum >>> (zdconst, zdthreadidx, zdnewval)
znewval = zdnewval
end subroutine plladdnum
end module addnum
program hccode
use addnum
! Program to test a heterogenous compute code using MPI an CUDA Fortran
! Each MPI thread copies a number to device, the device adds a thread specific number to it, the number is returned to host, the host prints it.
integer :: zconst, zthreadidx, znewval
integer :: idevice, istat
idevice = 0
zconst = 1
zthreadidx = 1
istat = cudaSetDevice(idevice)
print*,'calling plladdnum'
call plladdnum( zconst, zthreadidx, znewval)
print*,'znewval',znewval
end program hccode
Hi iamaditya,
There are two issues here. First, you either need to assign the device variables, zdconst and zdthreadidx, before passing them to the kernel, or pass in the host variables by value. (Passing by value is preferred here). Secondly, you have a typo in your kernel where use the variable “zn2” but declared it as “nz2”. Here the fixed code:
% cat test.cuf
module addnum
use cudafor
contains
attributes(global) subroutine addnum_kernel( zn1, zn2, zn3 )
integer, value :: zn1, zn2
integer, device :: zn3
zn3 = zn1 + zn2
end subroutine addnum_kernel
subroutine plladdnum( zconst, zthreadidx, znewval )
integer :: zconst, zthreadidx, znewval
integer, device :: zdnewval
! integer ( kind = cuda_stream_kind ) :: streamnum
integer:: streamnum
istat = cudaStreamCreate ( streamnum )
call addnum_kernel <<< 1,1,0,streamnum >>> (zconst, zthreadidx, zdnewval)
znewval = zdnewval
end subroutine plladdnum
end module addnum
program hccode
use addnum
! Program to test a heterogenous compute code using MPI an CUDA Fortran
! Each MPI thread copies a number to device, the device adds a thread specific number to it, the number is returned to host, the host prints it.
integer :: zconst, zthreadidx, znewval
integer :: idevice, istat
idevice = 0
zconst = 1
zthreadidx = 1
istat = cudaSetDevice(idevice)
print*,'calling plladdnum'
call plladdnum( zconst, zthreadidx, znewval)
print*,'znewval',znewval
end program hccode
% pgf90 test.cuf
% a.out
calling plladdnum
znewval 2
Hope this helps,
Mat