The code below fails at line 37 where it tries to perform a dot product using cublas and tries to store the result into an element of an array on the device.
Is this not allowed?
program test
!GPU\
use cudafor
use cublas
!GPU/
implicit none
integer, parameter :: size = 10
double precision, dimension (size) :: vec_a, vec_b, dotproduct
double precision, allocatable, dimension (:), device :: Dvec_a,
> Dvec_b, Ddotproduct
integer, i, j, istat, imode
type(cublashandle) :: h
c
istat=0
c
istat=istat+cublasInit()
if(istat.ne.0) write(*,*) 'cublasInit'
h=cublasgethandle()
istat=istat+cublasCreate(h)
if(istat.ne.0) write(*,*) 'cublasCreate'
istat=istat+cublasalloc(size,8,Dvec_a)
istat=istat+cublasalloc(size,8,Dvec_b)
istat=istat+cublasalloc(size,8,Ddotproduct)
if(istat.ne.0) write(*,*) 'cublasalloc'
do i=1, size
do j=1, size
vec_a(j)=dble(j)
vec_b(j)=dble(size-j)
enddo
#ifdef _ACCEL
istat=istat+cublassetvector(size,8,vec_a,1,Dvec_a,1)
if(istat.ne.0) write(*,*) 'set vector Dvec_a'
istat=istat+cublassetvector(size,8,vec_b,1,Dvec_b,1)
if(istat.ne.0) write(*,*) 'set vector Dvec_b'
istat=istat+
> cublasDdot_v2(h,size,Dvec_a,1,Dvec_b,1,Ddotproduct(i)) !<< code crashes here
if(istat.ne.0) write(*,*) 'dot product'
dotproduct(i)=dotproduct(i)+dble(i)
#else
dotproduct(i)=dot_product(vec_a,vec_b)+dble(i)
#endif
enddo
#ifdef _ACCEL
istat=istat+cublasgetvector(size,8,Ddotproduct,1,dotproduct,1)
if(istat.ne.0) write(*,*) 'get vector'
#endif
write(*,*) 'dotproduct := ', dotproduct
istat=istat+cublasShutdown()
end
Also, the cublas 4.0 manual states that cublasAlloc and cublasFree have been depreciated. Would the “in thing” be to use allocate and deallocate or cudaMalloc and cudaFree?
If either could be used is there a benefit to using one over the other (i.e. for 1D arrays vs. 2D or 3D arrays?) I do gather that their usage should not be mix.