I want to using the cuBLAS library in the device code to make matrix multiple. From the manual http://www.pgroup.com/doc/pgi17cudaint.pdf, we know it is callable the cuBLAS libraries from CUDA Fortran device
code. however, I obtain an error as:
PGF90-S-0155-Calls from device code to a host subroutine are allowed only in emulation mode
my code is given below:
module precision
Integer,parameter::single=kind(0.0)
integer,parameter::double=kind(0.0d0)
integer,parameter:: fp_kind=double
!integer,parameter:: fp_kind=single
end module precision
Program gemm_test
use precision
use cublas
integer,parameter:: m=9,n=9,k=1
real(fp_kind):: a_h(m,n),b_h(n,k),c_h(m,k) !real(fp_kind):: a(m,n),b(n),c(m)
real(fp_kind),device:: a(m,n),b(n,k),c(m,k)
call solve_C <<<1,1>>>(c)
c_h=c
print*,c_h
end program
attributes(global) subroutine solve_c(c)
use precision
use cublas_device
implicit none
integer,parameter:: m=9,n=9,k=1
real:: a(m,n),b(n,k),c(m,k)
a=1
b=2
call sgemm ('N','N',m,k,n,1.0,a,m,b,n,0.0,c,k)
return
end subroutine
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
character*1 :: transa, transb
integer :: m, n, k, lda, ldb, ldc
real(4), device, dimension(lda, *) :: a ! device or host variable
real(4), device, dimension(ldb, *) :: b ! device or host variable
real(4), device, dimension(ldc, *) :: c ! device or host variable
real(4), device :: alpha, beta ! device or host variable
end subroutine
run it by:
pgfortran -Mcuda=cc35 gemm.cuf -lcublas_device