cublas_sdot appears to deliver a double precision result. Shouldn’t this be a single proecision blas function ?
( cublas_ddot is the double precision function) .
System : CUDA 2.0 on Linux - RedHat EL5 x86_64.
Test code follows. Answer should be 204, but doesn’t work when cublas_sdot is declared real.
program sdot_test
implicit real (a-h,o-z)
integer*4 size,dev_x
double precision cublas_sdot
c real cublas_sdot
parameter (n=8,size=4)
dimension y(n),z(n)
call cublas_init()
do j=1,n
y(j)=float(j)
enddo
call cublas_Alloc(n,size,dev_x)
call cublas_Set_Vector(n,size,y,1,dev_x,1)
call cublas_Get_Vector(n,size,dev_x,1,z(1),1)
s0=cublas_sdot(n,dev_x,1,dev_x,1)
s1=sdot(n,y(1),1,y(1),1)
s2=sdot(n,z(1),1,z(1),1)
print *,‘s0,s1,s2’,s0,s1,s2
call cublas_free(dev_x)
stop
end