Hi,
We have recently got the new conformant OpenCL SDK and we have tried to convert the CUDA SDK Mersenne Twister algorithm and we have encountered a weird error. Where screen is garbled and cannot be taken back, the only solution is to restart the Windows Vista x64. (BTW : We are not registered users :) )
We are giving the following “problematic” code.
#define MT_RNG_COUNT 4096
#define MT_MM 9
#define MT_NN 19
#define MT_WMASK 0xFFFFFFFFU
#define MT_UMASK 0xFFFFFFFEU
#define MT_LMASK 0x1U
#define MT_SHIFT0 12
#define MT_SHIFTB 7
#define MT_SHIFTC 15
#define MT_SHIFT1 18
typedef struct
{
unsigned int matrix_a;
unsigned int mask_b;
unsigned int mask_c;
unsigned int seed;
} mt_struct_stripped;
// OpenCL Kernel Function for element by element vector addition
__kernel void Rand(__global float *d_Random, __constant mt_struct_stripped* ds_MT, __global int NPerRng)
{
const int tid = get_global_id(0);
const int THREAD_N = get_global_size(0);
unsigned int mt[MT_NN];
int iState, iState1, iStateM, iOut, iRng;
unsigned int mti, mti1, mtiM, x;
//Initialize current state
for(iRng = tid; iRng < MT_RNG_COUNT; iRng += THREAD_N)
{
mt_struct_stripped config = ds_MT[iRng];
mt[0] = config.seed;
for(iState = 1; iState < MT_NN; iState++)
mt[iState] = (1812433253U * (mt[iState - 1] ^ (mt[iState - 1] >> 30)) + iState) & MT_WMASK;
iState = 0;
mti1 = mt[0];
for(int iOut = 0; iOut < NPerRng; iOut++)
{
iState1 = iState + 1;
iStateM = iState + MT_MM;
if(iState1 >= MT_NN) iState1 -= MT_NN;
if(iStateM >= MT_NN) iStateM -= MT_NN;
mti = mti1;
mti1 = mt[iState1];
mtiM = mt[iStateM];
x = (mti & MT_UMASK) | (mti1 & MT_LMASK);
x = mtiM ^ (x >> 1) ^ ((x & 1) ? config.matrix_a : 0);
mt[iState] = x;
iState = iState1;
//Tempering transformation
x ^= (x >> MT_SHIFT0);
x ^= (x << MT_SHIFTB) & config.mask_b;
x ^= (x << MT_SHIFTC) & config.mask_c;
x ^= (x >> MT_SHIFT1);
//Convert to (0, 1] float and write to global memory
d_Random[iRng + iOut * MT_RNG_COUNT] = ((float)x + 1.0f) / 4294967296.0f;
}
}
}
We have found that this line of code causes the error (we have a workaround for it)
x = mtiM ^ (x >> 1) ^ ((x & 1) ? config.matrix_a : 0);
The following code is safe but “a lot” slower.
x = mtiM ^ (x >> 1) ^ ((x & 1) ? ds_MT[iRng].matrix_a : 0);
Any comments on this subject may be useful.
Thanks in advance.