When seq and offset are not zero, the distributions curand given are error

I build and run trand2.cuf in NVHPC/Linux_x86_64/22.2/examples/CUDA-Libraries/cuRAND/test_rand_cuf. And it Test Passed.
But I change seq or offect from 0 to 1 and then it Test Failed.

Then I do more test, some of them are test failed everytime.

module mtests
  use curand_device
  integer, parameter :: n = 10000
  type(curandStateXORWOW), allocatable, device, target :: h1(:)
  type(curandStatePhilox4_32_10), allocatable, device, target :: h2(:)
  type(curandStateMRG32k3a), allocatable, device, target :: h3(:)
contains
attributes(global) subroutine init1(seq, offset)
  integer(8), intent(in) :: seq, offset
  integer(8) :: seed
  integer :: iam
  iam = threadIdx%x
  seed = iam*121 + 12345
  call curand_init(seed, seq, offset, h1(iam))
  call curand_init(seed, seq, offset, h2(iam))
  call curand_init(seed, seq, offset, h3(iam))
  return
end subroutine

attributes(global) subroutine test1(a, b)
  real, device :: a(3*n), b(3*n)
  integer :: iam
  iam = threadIdx%x
  do i = iam, n, blockdim%x
    a(i) = curand_uniform(h1(iam))
    a(i+n) = curand_uniform(h2(iam))
    a(i+2*n) = curand_uniform(h3(iam))
    b(i) = curand_normal(h1(iam))
    b(i+n) = curand_normal(h2(iam))
    b(i+2*n) = curand_normal(h3(iam))
  end do
  return
end subroutine

subroutine check(a, b)
  real, intent(in) :: a(3*n), b(3*n)
  logical :: passing

  passing = .true.; call subcheck(a(1:n), b(1:n))
  passing = .true.; call subcheck(a(n+1:2*n), b(n+1:2*n))
  passing = .true.; call subcheck(a(2*n+1:3*n), b(2*n+1:3*n))
contains
subroutine subcheck(a, b)
  real, intent(in) :: a(n), b(n)
  real :: c(n), rmean

  c = a; print *,"Should be uniform around 0.5"
  open (unit=10, file='a.txt')
  do i = 1, n
    if ((i.lt.10) .or. (i.lt.42 .and. i .gt. 32)) print *,i,c(i)
    write(10,*) c(i)
    if ((c(i).lt.0.0) .or. (c(i).gt.1.0)) passing = .false.
  end do
  close(10)
  rmean = sum(c)/n; print *,"Mean is ",rmean
  if ((rmean .lt. 0.49) .or. (rmean .gt. 0.51)) passing = .false.

  c = b; print *,"Should be normal around 0.0"
  nc1 = 0; nc2 = 0
  open (unit=10, file='b.txt')
  do i = 1, n
    if ((i.lt.10) .or. (i.lt.42 .and. i .gt. 32)) print *,i,c(i)
    write(10,*) c(i)
    if ((c(i) .gt. -4.0) .and. (c(i) .lt. 0.0)) nc1 = nc1 + 1
    if ((c(i) .gt.  0.0) .and. (c(i) .lt. 4.0)) nc2 = nc2 + 1
  end do
  close(10)
  print *,"Found on each side of zero ",nc1,nc2
  if (abs(nc1-nc2) .gt. (n/10)) npassing = .false.
  rmean = sum(c,mask=abs(c).lt.4.0)/n; print *,"Mean is ",rmean
  rmean = sum(c)/n; print *,"Mean is ",rmean
  if ((rmean .lt. -0.1) .or. (rmean .gt. 0.1)) passing = .false.

  if (passing) then
    print *,"Test SUCCESS"
  else
    print *,"Test FAILED"
  endif
end subroutine
end subroutine
end module mtests

program t
  use mtests
  integer(8), device :: seq, offset
  real, device :: a_d(3*n), b_d(3*n)
  real :: a(3*n), b(3*n)
  a_d = 0.0; b_d = 0.0; seq = 0; offset = 0

  allocate(h1(n), h2(n), h3(n))
  print*,"<<<1,1>>> (0, 0)"
  call init1<<<1,1>>> (seq, offset)
  call test1<<<1,1>>> (a_d, b_d)
  a=a_d; b=b_d; call check(a, b)

  print*,"<<<1,32>>> (0, 0)"
  call init1<<<1,32>>> (seq, offset)
  call test1<<<1,32>>> (a_d, b_d)
  a=a_d; b=b_d; call check(a, b)
end

And if seq is set to 1. Numbers given by XORWOW when I use one seed and call n times:
Should be uniform around 0.5
1 0.5458559
2 0.5461090
3 0.5461934
4 0.5464466
5 0.5465310
Should be normal around 0.0
1 -0.3137481
2 -1.054540
3 -0.3158224
4 -1.053334
5 -0.3178933

Philox4_32_10 and MRG32k3a both have some problems, too.

I will have to open a bug on this. It looks to be an error that was introduced in how we build our Fortran device runtime for curand calls.

Thanks.

I do another test for Philox4_32_10. The seq and offect are both zero this time, and I find two problems.
First, Philox4_32_10 should give me the pseudorandom sequences, but it give me different sequences.
Second, some seeds for Philox4_32_10 will also give the non random sequences, such as 9280*64+12345.
curand_uniform() show me the same number and curand_normal() gives a sequence of two numbers after few random numbers.

The code and output is below.

module mtests
  use curand_device
  integer, parameter :: n = 10000
  type(curandStatePhilox4_32_10), device :: h(n)
contains
attributes(global) subroutine curandinit(seq, offset)
  integer(8), intent(in) :: seq, offset
  integer(8) :: seed
  integer :: iam
  iam = (blockidx%x-1)*blockdim%x+threadidx%x
  seed = iam*64 + 12345; if (iam > n) return
  call curand_init(seed, seq, offset, h(iam))
end subroutine
attributes(global) subroutine testany(a,b)
  real, device :: a(n), b(n)
  integer :: iam
  iam = (blockidx%x-1)*blockdim%x+threadidx%x
  if (iam > n) return
  a(iam) = curand_uniform(h(iam))
  b(iam) = curand_normal(h(iam))
  return
end subroutine
end module mtests

program t
  use mtests
  real, device :: a(n,n), b(n,n)
  real c(n,n), rmean
  logical passing
  integer(8), device :: seq, offset
  seq = 0; offset = 0; a = 0.0; b = 0.0
  call curandinit<<<int((n-1)/32+1),32>>>(seq, offset)
  do i=1,n
    call testany<<<int((n-1)/32+1),32>>> (a(:,i),b(:,i))
  end do
  print*, "Philox4_32_10; seed=9280*64+12345; seq=0; offset=0"
  c = a; print*, "curand_uniform():"
  print*, c(9280,1:16)
  c = b; print*, "curand_normal()"
  print*, c(9280,1:16)
end

Output:

 Philox4_32_10; seed=9280*64+12345; seq=0; offset=0
 curand_uniform():
   1.1641532E-10   5.3212352E-02   9.6049003E-02   9.6049003E-02
   9.6049003E-02   9.6049003E-02   9.6049003E-02   9.6049003E-02
   9.6049003E-02   9.6049003E-02   9.6049003E-02   9.6049003E-02
   9.6049003E-02   9.6049003E-02   9.6049003E-02   9.6049003E-02
 curand_normal()
    1.587809        6.574692        1.228498        1.782297
    1.228498        1.782297        1.228498        1.782297
    1.228498        1.782297        1.228498        1.782297
    1.228498        1.782297        1.228498        1.782297

or

 Philox4_32_10; seed=9280*64+12345; seq=0; offset=0
 curand_uniform():
   1.1641532E-10   5.3212352E-02   9.6049003E-02   0.3583073
   0.6605009       0.8449480       0.3111651       0.1627964
   0.6180061       0.6180061       0.6180061       0.6180061
   0.6180061       0.6180061       0.6180061       0.6180061
 curand_normal()
    1.587809        6.574692       0.3850557       0.7791733
  -0.3509679       0.1806471      -0.4914827      -0.8889445
  -0.6625820      -0.7235323      -0.6625820      -0.7235323
  -0.6625820      -0.7235323      -0.6625820      -0.7235323

iam = 4000, 6144… have the similar non random sequences.

btw, I could not call curand_normal2 or 4 and the errors is below. They are both device api in cuRand Document but error message regard them as host functions.
Is it bug or should I call them in emulation mode?

NVFORTRAN-S-0155-Calls from device code to a host function are allowed only in emulation mode - curand_uniform4
NVFORTRAN-S-0155-Calls from device code to a host function are allowed only in emulation mode - curand_normal2

I think it may be caused because I use ‘do’ in host code, so I edit the code.
Even if the program do the cycle in device code, it stilll has above problem. but non random sequence begins from the about 20th or 30th number.

Here is how I check the sequences.
I save the two n*n matrixs and load them to matlab, and then get the average(mu and sigma if curand_normal) of every column or line. At last, get the max or min data and their index of averages.

It’s possible there is a bug, we’ve opened a ticket

This was not a problem in the curand implementation itself, just in how the Fortran runtime calls into it. This has been addressed, and the fix will be in our next NVHPC release, version 22.3. Thanks for the bug report.

Thank you.

There is a new problem in 22.3 about Philox4_32_10 and it doesn’t occur in 22.2. Could you help me to find the reason? Thanks.
I Creat a new topic for it. Error when call curand_init() with the even-th elements of a philox type array in NVHPC 22.3 - Accelerated Computing / GPU-Accelerated Libraries - NVIDIA Developer Forums

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.