Passing generator state by pointer

Hi,

Is the following code correct when passing the random generator state by reference in function CalculateValue(curandState *localStat) and GetExponential(curandState *localState)?

Thanks

__device__ double GetExponential(curandState *localState)

							

{

	double u1 = curand_uniform_double(localState);

}

__device__  double CalculateValue(curandState *localStat) 

{

      double x = GetExponential(localState);

return x;

}

__global__ void RunMonteCarloKernel(curandState *state, double *results)

{

	int i = threadIdx.x + blockIdx.x * blockDim.x;

	

	/* Copy state to local memory for efficiency */

	curandState localState = state[threadIdx.x + blockIdx.x * blockDim.x];	 

	results[i] = CalculateValue(&localState);

	/* Copy state back to global memory */

	state[threadIdx.x + blockIdx.x * blockDim.x] = localState;

}

__global__ void setup_kernel(curandState *state)

{

	int i = threadIdx.x + blockIdx.x * blockDim.x;

	/* Each thread gets different seed, a different sequence number, no offset */

	curand_init(i, i, 0, &state[i]);

}

int main(void)

{

        double *devResults;

	curandState *devStates;

	

	/* Allocate space for prng states on device */

	CUDA_CALL(cudaMalloc((void **)&devStates, totalThreads * sizeof(curandState)));

/* Setup prng states */

       setup_kernel<<<totalBlocks, threadsPerBlock>>>(devStates);

	for(int i=0; i< 1000; i++)

	{

		RunMonteCarloKernel(devStates, devResults);

	}

}

Hi,

Is the following code correct when passing the random generator state by reference in function CalculateValue(curandState *localStat) and GetExponential(curandState *localState)?

Thanks

__device__ double GetExponential(curandState *localState)

							

{

	double u1 = curand_uniform_double(localState);

}

__device__  double CalculateValue(curandState *localStat) 

{

      double x = GetExponential(localState);

return x;

}

__global__ void RunMonteCarloKernel(curandState *state, double *results)

{

	int i = threadIdx.x + blockIdx.x * blockDim.x;

	

	/* Copy state to local memory for efficiency */

	curandState localState = state[threadIdx.x + blockIdx.x * blockDim.x];	 

	results[i] = CalculateValue(&localState);

	/* Copy state back to global memory */

	state[threadIdx.x + blockIdx.x * blockDim.x] = localState;

}

__global__ void setup_kernel(curandState *state)

{

	int i = threadIdx.x + blockIdx.x * blockDim.x;

	/* Each thread gets different seed, a different sequence number, no offset */

	curand_init(i, i, 0, &state[i]);

}

int main(void)

{

        double *devResults;

	curandState *devStates;

	

	/* Allocate space for prng states on device */

	CUDA_CALL(cudaMalloc((void **)&devStates, totalThreads * sizeof(curandState)));

/* Setup prng states */

       setup_kernel<<<totalBlocks, threadsPerBlock>>>(devStates);

	for(int i=0; i< 1000; i++)

	{

		RunMonteCarloKernel(devStates, devResults);

	}

}

The function GetExponential should have a return value. The function RunMonteCarloKernel is a global function, which means it should be called using the triple angle bracket syntax <<<>>>. The pass-by-pointer code looks OK ;)

As a performance note, the code has each thread calculate one output value. It will be much faster to loop and have each thread calculate many result values.

The function GetExponential should have a return value. The function RunMonteCarloKernel is a global function, which means it should be called using the triple angle bracket syntax <<<>>>. The pass-by-pointer code looks OK ;)

As a performance note, the code has each thread calculate one output value. It will be much faster to loop and have each thread calculate many result values.

Thanks. Very helpful.

Thanks. Very helpful.