Calling CUBLAS routines from insdie a kernel

Hi,

I’ve been trying to write some simple code that calls a CUBLAS routine from inside a kernel, but I keep getting the following error message when I compile with the command:

pgfortran -Mcuda test_sgemm_from_kernel.cuf -o test_sgemm_from_kernel -lcublas

PGF90-S-0155-Calls from device code to a host subroutine are allowed only in emulation mode - sgemm (test_sgemm_from_kernel.cuf: 25)
0 inform, 0 warnings, 1 severes, 0 fatal for kernel

Any help would be appreciated. My code is appended below.

module kernel_mod

  interface
   subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) bind(c,name='cublasSgemm')
     use iso_c_binding
     integer(c_int), value :: m, n, k, lda, ldb, ldc
     real(c_float), device, dimension(m,n) :: a, b, c
     real(c_float), value :: alpha, beta
     character(kind=c_char), value :: transa, transb
   end subroutine sgemm

!   subroutine cublasinit() bind(c,name='cublasInit')
!   end subroutine cublasinit
  end interface

  contains

  attributes(global) subroutine kernel(k,m,n,alpha,a,lda,b,ldb,beta,c,ldc)

    real, device :: a(k,m), b(m,n), c(k,m)
    real :: alpha, beta
    integer :: m,n,k,lda,ldb,ldc

! CUBLAS call
    call sgemm('n','n',m,n,k,alpha,a,lda,b,ldb,beta,c,ldc)


  end subroutine kernel

end module kernel_mod



program sgemm_mod

  use cudafor
  use kernel_mod

  real, dimension(1000,1000) :: a, b, c
  real, device, dimension(1000,1000) :: dA, dB, dC
  real :: alpha, beta
  real, device :: dAlpha, dBeta
  integer :: m,n,k,lda,ldb,ldc
  integer, device :: dM,dN,dK,dLda,dLdb,dLdc
! dim3 variables to define the grid and block shapes
  type(dim3) :: dimGrid, dimBlock

  m = 1000
  n = 1000
  k = 1000
  lda = 1000
  ldb = 1000
  ldc = 1000
  alpha = 1.0
  beta = 0.0

! initialize all entries of A, B, and C
  a = 2.0e0
  b = 1.5e0
  c = -9.9e0

! print 25 values od A and B
  write(*,*) '25 entries of A:'
  write(*,10) (a(1,i), i=1,5)
  write(*,10) (a(2,i), i=1,5)
  write(*,10) (a(3,i), i=1,5)
  write(*,10) (a(4,i), i=1,5)
  write(*,10) (a(5,i), i=1,5)

  write(*,*) '25 entries of B:'
  write(*,10) (b(1,i), i=1,5)
  write(*,10) (b(2,i), i=1,5)
  write(*,10) (b(3,i), i=1,5)
  write(*,10) (b(4,i), i=1,5)
  write(*,10) (b(5,i), i=1,5)

!call cublasinit()

! Copy data to device
  dA = a
  dB = b
  dC = c
  dAlpha = alpha
  dBete = beta
  dM = m
  dN = n
  dK = k
  dLda = lda
  dLdb = ldb
  dLdc = ldc

! Create the grid and block dimensions
  dimGrid = dim3( N/16, L/16, 1 )
  dimBlock = dim3( 16, 16, 1 )
  call kernel<<<dimGrid>>>(dK,dM,dN,dAlpha,dA,dLda,dB,dLdb,dBeta,dC,dLdc)

! Copy results from device
  c = dC

! print results in C
  write(*,*) '25 entries of C:'
  write(*,10) (c(1,i), i=1,5)
  write(*,10) (c(2,i), i=1,5)
  write(*,10) (c(3,i), i=1,5)
  write(*,10) (c(4,i), i=1,5)
  write(*,10) (c(5,i), i=1,5)

10 format(F15.5,F15.5,F15.5,F15.5,F15.5)

end

Hi gv1579,

The error is correct. To call a routine from a kernel, it must be defined within the same module as your kernel and have the “device” attribute. CUBLAS Sgemm is callable only from the host. You can find an example of how call CUBLAS sgemm with your compiler installation and in the following PGInsider article: http://www.pgroup.com/lit/articles/insider/v3n1a5.htm

  • Mat