Consistence Problem with Curand Philox_4x32_10 Pseudo Random Generator

The random sequence should be consistence by generating 10 numbers at once or separately.

test1
0: 1713891541
1: 3781805453
2: 3159862348
3: 2600524760
4: 2219120097
5: 4035800746
6: 253345875
7: 2214098416
8: 107075191
9: 3061999449
test2
0: 1713891541
1: 3781805453
2: 3159862348
3: 3159862348
4: 2600524760
5: 2219120097
6: 4035800746
7: 253345875
8: 2214098416
9: 107075191

The sequence is different, test2 result should be same as test1.
Curand 10.1.2, Cuda 11.0.194, Win 10, Quadro M1200
nvcc ‘.\curand_issue.cu’ -arch=sm_50 -lcurand
.\a.exe

curand_issue.cu

#include <curand.h>
#include <iostream>
void test1()
{
    using T     = unsigned int;
    const int n = 10;
    T *       h_data, *d_data;
    h_data = new T[n];
    cudaMalloc(&d_data, sizeof(T) * n);

    curandGenerator_t generator;
    curandRngType_t   rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
    curandCreateGenerator(&generator, rng_type);

    int total = 0;
    int num   = 10;
    curandGenerate(generator, d_data, num);
    cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
    for (int i = 0; i < num; i++)
    {
        std::cout << i + total << ": " << h_data[i] << std::endl;
    }
    total += num;

    cudaFree(d_data);
    delete[] h_data;
}

void test2()
{
    using T     = unsigned int;
    const int n = 10;
    T *       h_data, *d_data;
    h_data = new T[n];
    cudaMalloc(&d_data, sizeof(T) * n);

    curandGenerator_t generator;
    curandRngType_t   rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
    curandCreateGenerator(&generator, rng_type);

    int total = 0;
    int num   = 1;
    curandGenerate(generator, d_data, num);
    cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
    for (int i = 0; i < num; i++)
    {
        std::cout << i + total << ": " << h_data[i] << std::endl;
    }
    total += num;

    num = 2;
    curandGenerate(generator, d_data, num);
    cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
    for (int i = 0; i < num; i++)
    {
        std::cout << i + total << ": " << h_data[i] << std::endl;
    }
    total += num;

    num = 7;
    curandGenerate(generator, d_data, num);
    cudaMemcpy(h_data, d_data, sizeof(T) * n, cudaMemcpyDeviceToHost);
    for (int i = 0; i < num; i++)
    {
        std::cout << i + total << ": " << h_data[i] << std::endl;
    }
    total += num;

    cudaFree(d_data);
    delete[] h_data;
}

int main()
{
    std::cout << "test1" << std::endl;
    test1();
    std::cout << "test2" << std::endl;
    test2();
}