__global__ void setup_kernel(curandState *state)
{
int id = threadIdx.x * blockIdx.x;
curand_init(1234ULL, id, N , &state[id]);
}
__global__ void rnd_kernel(curandState *state, float *tabDev)
{
int tid = threadIdx.x;
int bid = blockIdx.x;
int id = threadIdx.x * blockIdx.x;
curandState localState = state[id];
for(int i=0; i<N; i++)
{
tabDev[i] = curand(&localState);
}
state[id] = localState;
}
And here is my problem. With each run of program i get the same results, obviously it cames from definition of seed, and how it doesent change during each instance/run of my program. In normal C/C++ i would use time as my seed, which gives me diferent results, with each run. So is there any way to put time as a argument for curand_init funcion ? Ofcourse i can temper with seqencial number and offset, but that wont do the trick i need, especialy if i need to loop my kernel about 10k times.
Also, im not quite sure is this is correct, but my results are like 125486578,0000… The specification tells me that i should get a “random” numbers between 0.0 and 1.0 excluding 0.0 or 1.0.
Now change the kernel to use the variable seconds instead of 1234 ULL:
global void setup_kernel(curandState *state, int secconds)
{
int id = threadIdx.x * blockIdx.x;
curand_init((unsigned int) seconds, id, N , &state[id]);
}
Please give more details about how you use the random numbers. It is different if you generate random numbers on GPU and use later on CPU or if you have generate random numbers to be used in a kernel.
#define MODULUS 2147483647 /* DON'T CHANGE THIS VALUE */
#define MULTIPLIER 48271 /* use 16807 for the "minimal standard" */
#define CHECK 399268537 /* use 1043616065 for the "minimal standard" */
#define DEFAULT 123456789 /* initial seed, use 0 < DEFAULT < MODULUS */
__device__ long seed = DEFAULT; /* seed is the state of the generator */
inline __device__ float randf(void)
{
const long Q = MODULUS / MULTIPLIER;
const long R = MODULUS % MULTIPLIER;
long t;
t = MULTIPLIER * (seed % Q) - R * (seed / Q);
if (t > 0) seed = t;
else seed = t + MODULUS;
return ((float) seed / MODULUS);
}
You are right the numbers should be between 0 and 1. My first guess is that id is bigger than the size of tabDev. Or just compile the code with -g -G flags and then run the code with cuda-memcheck.