Please reply to me with more information you might need from me.
I have been in this tight spot for a couple of days. I do not understand why the compiler is not accepting the check_status_ reference in particular… are there any libraries I am not linking right?
I hope you could answer. I have reached a dead end.
I have also done the following to reduce the warnings and I got an exe file. But an error occurs when I run the cula.exe…please have a look:
module cula_test
use cudafor
contains
! cula test (device interface)
subroutine do_cula_device_test(n,nrhs,ain,bin)
! input
real,dimension(:,:) :: ain,bin
! allocations (all on host)
real,dimension(:,:),allocatable :: a,b,ans
integer n,nrhs,status
integer c1,c2,cr,cm
real norm
! gpu memory
real,device,dimension(:,:),allocatable :: a_dev,b_dev
integer,device,dimension(:),allocatable :: ipiv_dev
! back up input for reconstruction test
allocate( a(n,n), b(n,nrhs), ans(n,nrhs) )
a(1:n,1:n) = ain
b(1:n,1:nrhs) = bin
open (6, file='output.txt', status='unknown')
! allocate gpu memory
allocate( a_dev(n,n), b_dev(n,nrhs), ipiv_dev(n) )
! start test
call system_clock( c1,cr,cm )
print *, 'starting cula (device interface) test...'
! copy memory to gpu
a_dev = a
b_dev = b
! call cula solver (device interface)
status = cula_device_sgesv(n,nrhs,a_dev,n,ipiv_dev,b_dev,n)
! copy answer to host
b = b_dev
! stop test
call system_clock( count=c2 )
print *, ' runtime:', 1.e3*real(c2-c1) / real(cr), 'ms'
print *, ' gflops:', (0.66*n**3.)/(real(c2-c1)/real(cr))/(1.e9)
write(6,*) 'runtime:', 1.e3*real(c2-c1) / real(cr), 'ms'
write(6,*) 'gflops:', (0.66*n**3.)/(real(c2-c1)/real(cr))/(1.e9)
pause
! cleanup
deallocate(a,b,ans)
deallocate(a_dev,b_dev,ipiv_dev)
end subroutine do_cula_device_test
end module cula_test
! main program
program cula
use cula_test
real error,eps
! Host memory
real,dimension(:,:),allocatable :: a, b
integer n, info, i, j, status
n = 10000
nrhs = 1
print *,'cula + pgfortran test (matrix solve)'
print *,' array size: ', n, ' by ', n
print *,' right hand sides: ', nrhs
print *,''
allocate( a(n,n), b(n,nrhs) )
! intialize a and b
call random_number(a)
call random_number(b)
! Make sure a() isn't singular
do i=1,n
a(i,i) = 10. * a(i,i) + 10.
enddo
pause
! initialize cula
status = cula_initialize()
pause
! do gpu test (device interface)
call do_cula_device_test(n,nrhs,a,b)
pause
end program cula
compiled with: pgfortran cula_.cuf -lcula_pgfortran
and when I run the Cula.exe file I get this:
0: ALLOCATE: 400000000 bytes requested: status = 49(unspecified driver error)
I am really counting on you to give me a push.
Ahmed