# Consistence Problem with Curand Philox_4x32_10 Pseudo Random Generator

The random sequence should be consistence by generating 10 numbers at once or separately.

test1
0: 1713891541
1: 3781805453
2: 3159862348
3: 2600524760
4: 2219120097
5: 4035800746
6: 253345875
7: 2214098416
8: 107075191
9: 3061999449
test2
0: 1713891541
1: 3781805453
2: 3159862348
3: 3159862348
4: 2600524760
5: 2219120097
6: 4035800746
7: 253345875
8: 2214098416
9: 107075191

The sequence is different, test2 result should be same as test1.
Curand 10.1.2, Cuda 11.0.194, Win 10, Quadro M1200
nvcc ‘.\curand_issue.cu’ -arch=sm_50 -lcurand
.\a.exe

curand_issue.cu

``````#include <curand.h>
#include <iostream>
void test1()
{
using T     = unsigned int;
const int n = 10;
T *       h_data, *d_data;
h_data = new T[n];
cudaMalloc(&d_data, sizeof(T) * n);

curandGenerator_t generator;
curandRngType_t   rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
curandCreateGenerator(&generator, rng_type);

int total = 0;
int num   = 10;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;

cudaFree(d_data);
delete[] h_data;
}

void test2()
{
using T     = unsigned int;
const int n = 10;
T *       h_data, *d_data;
h_data = new T[n];
cudaMalloc(&d_data, sizeof(T) * n);

curandGenerator_t generator;
curandRngType_t   rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
curandCreateGenerator(&generator, rng_type);

int total = 0;
int num   = 1;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;

num = 2;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;

num = 7;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;

cudaFree(d_data);
delete[] h_data;
}

int main()
{
std::cout << "test1" << std::endl;
test1();
std::cout << "test2" << std::endl;
test2();
}``````