Hi Mat,
I modified the dA & dB matrix as allocatable ones and found something weird.
After allocating device matrix, the VIRT increased abruptly to 63,400,828 Kb !!
- [root@manysplendid ~]# ps aux | grep testCublas.x
root 1794 0.0 0.0 246412 7656 pts/5 S+ 13:37 0:00 ./testCublas.x
root 1796 0.0 0.0 105452 932 pts/4 S+ 13:37 0:00 grep testCublas.x
[root@manysplendid ~]# ps aux | grep testCublas.x
root 1794 2.5 0.6 63400828 199280 pts/5 Sl+ 13:37 0:00 ./testCublas.x
root 1800 0.0 0.0 105452 932 pts/4 S+ 13:37 0:00 grep testCublas.x
[root@manysplendid ~]#
The PGI version is 16.10 and GPU is GTX 780 Ti. OS is CentOs 6.6.
Following are the code and compile command.
Is there something wrong?
Thank you
CY
module testBLAS
use cublas_device
contains
attributes(global) subroutine testKernelCallCublas(AA,N,M,BB)
implicit none
integer, value :: N, M
real, device :: AA(N,M), BB(N,N)
integer, device :: istat, transa, transb
type(cublasHandle) :: handle
if( threadidx%x==1 )then
istat = cublasCreate(handle)
transa=0
transb=1
istat= cublasSgemm(handle,transa,transb,4,4,2,1.0,AA,4,AA,4,0.,BB,4)
istat = cublasDestroy(handle)
end if
end subroutine testKernelCallCublas
end module testBLAS
program prog
use testBLAS
implicit none
real, allocatable, device :: dA(:,:), dB(:,:)
real :: A(4,2), B(4,4)
A(:,1)=1.
A(:,2)=0.5
pause
allocate(dA(4,2), dB(4,4))
print*, "after allocating"
pause
dA=A
call testKernelCallCublas<<<1,16>>>(dA,4,2,dB)
B=dB
print*,B(:,1)
print*,B(:,2)
print*,B(:,3)
print*,B(:,4)
deallocate(dA, dB)
end program prog
- [root@manysplendid cublas]# pgfortran -Mcuda=cc3.5 -lcublas_device test2.cuf -o testCublas.x
nvlink warning : SM Arch (‘sm_35’) not found in ‘/opt/pgi/linux86-64/2016/cuda/7.0/lib64/libcublas_device.a:maxwell_sgemm.asm.o’
nvlink warning : SM Arch (‘sm_35’) not found in ‘/opt/pgi/linux86-64/2016/cuda/7.0/lib64/libcublas_device.a:maxwell_sm50_sgemm.o’
nvlink warning : SM Arch (‘sm_35’) not found in ‘/opt/pgi/linux86-64/2016/cuda/7.0/lib64/libcublas_device.a:maxwell_sm50_ssyrk.o’
[root@manysplendid cublas]# ./testCublas.x
FORTRAN PAUSE: enter or d to continue>
after allocating
FORTRAN PAUSE: enter or d to continue>
1.250000 1.250000 1.250000 1.250000
1.250000 1.250000 1.250000 1.250000
1.250000 1.250000 1.250000 1.250000
1.250000 1.250000 1.250000 1.250000
[root@manysplendid cublas]#