The random sequence should be consistence by generating 10 numbers at once or separately.
test1
0: 1713891541
1: 3781805453
2: 3159862348
3: 2600524760
4: 2219120097
5: 4035800746
6: 253345875
7: 2214098416
8: 107075191
9: 3061999449
test2
0: 1713891541
1: 3781805453
2: 3159862348
3: 3159862348
4: 2600524760
5: 2219120097
6: 4035800746
7: 253345875
8: 2214098416
9: 107075191
The sequence is different, test2 result should be same as test1.
Curand 10.1.2, Cuda 11.0.194, Win 10, Quadro M1200
nvcc ‘.\curand_issue.cu’ -arch=sm_50 -lcurand
.\a.exe
curand_issue.cu
#include <curand.h>
#include <iostream>
void test1()
{
using T = unsigned int;
const int n = 10;
T * h_data, *d_data;
h_data = new T[n];
cudaMalloc(&d_data, sizeof(T) * n);
curandGenerator_t generator;
curandRngType_t rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
curandCreateGenerator(&generator, rng_type);
int total = 0;
int num = 10;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;
cudaFree(d_data);
delete[] h_data;
}
void test2()
{
using T = unsigned int;
const int n = 10;
T * h_data, *d_data;
h_data = new T[n];
cudaMalloc(&d_data, sizeof(T) * n);
curandGenerator_t generator;
curandRngType_t rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
curandCreateGenerator(&generator, rng_type);
int total = 0;
int num = 1;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;
num = 2;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;
num = 7;
curandGenerate(generator, d_data, num);
cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
for (int i = 0; i < num; i++)
{
std::cout << i + total << ": " << h_data[i] << std::endl;
}
total += num;
cudaFree(d_data);
delete[] h_data;
}
int main()
{
std::cout << "test1" << std::endl;
test1();
std::cout << "test2" << std::endl;
test2();
}