I build and run trand2.cuf in NVHPC/Linux_x86_64/22.2/examples/CUDA-Libraries/cuRAND/test_rand_cuf. And it Test Passed.
But I change seq or offect from 0 to 1 and then it Test Failed.
Then I do more test, some of them are test failed everytime.
module mtests
use curand_device
integer, parameter :: n = 10000
type(curandStateXORWOW), allocatable, device, target :: h1(:)
type(curandStatePhilox4_32_10), allocatable, device, target :: h2(:)
type(curandStateMRG32k3a), allocatable, device, target :: h3(:)
contains
attributes(global) subroutine init1(seq, offset)
integer(8), intent(in) :: seq, offset
integer(8) :: seed
integer :: iam
iam = threadIdx%x
seed = iam*121 + 12345
call curand_init(seed, seq, offset, h1(iam))
call curand_init(seed, seq, offset, h2(iam))
call curand_init(seed, seq, offset, h3(iam))
return
end subroutine
attributes(global) subroutine test1(a, b)
real, device :: a(3*n), b(3*n)
integer :: iam
iam = threadIdx%x
do i = iam, n, blockdim%x
a(i) = curand_uniform(h1(iam))
a(i+n) = curand_uniform(h2(iam))
a(i+2*n) = curand_uniform(h3(iam))
b(i) = curand_normal(h1(iam))
b(i+n) = curand_normal(h2(iam))
b(i+2*n) = curand_normal(h3(iam))
end do
return
end subroutine
subroutine check(a, b)
real, intent(in) :: a(3*n), b(3*n)
logical :: passing
passing = .true.; call subcheck(a(1:n), b(1:n))
passing = .true.; call subcheck(a(n+1:2*n), b(n+1:2*n))
passing = .true.; call subcheck(a(2*n+1:3*n), b(2*n+1:3*n))
contains
subroutine subcheck(a, b)
real, intent(in) :: a(n), b(n)
real :: c(n), rmean
c = a; print *,"Should be uniform around 0.5"
open (unit=10, file='a.txt')
do i = 1, n
if ((i.lt.10) .or. (i.lt.42 .and. i .gt. 32)) print *,i,c(i)
write(10,*) c(i)
if ((c(i).lt.0.0) .or. (c(i).gt.1.0)) passing = .false.
end do
close(10)
rmean = sum(c)/n; print *,"Mean is ",rmean
if ((rmean .lt. 0.49) .or. (rmean .gt. 0.51)) passing = .false.
c = b; print *,"Should be normal around 0.0"
nc1 = 0; nc2 = 0
open (unit=10, file='b.txt')
do i = 1, n
if ((i.lt.10) .or. (i.lt.42 .and. i .gt. 32)) print *,i,c(i)
write(10,*) c(i)
if ((c(i) .gt. -4.0) .and. (c(i) .lt. 0.0)) nc1 = nc1 + 1
if ((c(i) .gt. 0.0) .and. (c(i) .lt. 4.0)) nc2 = nc2 + 1
end do
close(10)
print *,"Found on each side of zero ",nc1,nc2
if (abs(nc1-nc2) .gt. (n/10)) npassing = .false.
rmean = sum(c,mask=abs(c).lt.4.0)/n; print *,"Mean is ",rmean
rmean = sum(c)/n; print *,"Mean is ",rmean
if ((rmean .lt. -0.1) .or. (rmean .gt. 0.1)) passing = .false.
if (passing) then
print *,"Test SUCCESS"
else
print *,"Test FAILED"
endif
end subroutine
end subroutine
end module mtests
program t
use mtests
integer(8), device :: seq, offset
real, device :: a_d(3*n), b_d(3*n)
real :: a(3*n), b(3*n)
a_d = 0.0; b_d = 0.0; seq = 0; offset = 0
allocate(h1(n), h2(n), h3(n))
print*,"<<<1,1>>> (0, 0)"
call init1<<<1,1>>> (seq, offset)
call test1<<<1,1>>> (a_d, b_d)
a=a_d; b=b_d; call check(a, b)
print*,"<<<1,32>>> (0, 0)"
call init1<<<1,32>>> (seq, offset)
call test1<<<1,32>>> (a_d, b_d)
a=a_d; b=b_d; call check(a, b)
end
And if seq is set to 1. Numbers given by XORWOW when I use one seed and call n times:
Should be uniform around 0.5
1 0.5458559
2 0.5461090
3 0.5461934
4 0.5464466
5 0.5465310
Should be normal around 0.0
1 -0.3137481
2 -1.054540
3 -0.3158224
4 -1.053334
5 -0.3178933
Philox4_32_10 and MRG32k3a both have some problems, too.