Hi !
Trying to use the new MRG32k3a,curand_init causes unspecified launch failure.
Studyiing the code, I found that the scratch pointer of curandStateMRG32k3a_t is used before being set:
void curand_init(unsigned long long seed,
unsigned long long subsequence,
unsigned long long offset,
curandStateMRG32k3a_t *state)
{
int i;
for ( i=0; i<3; i++ ) {
state->s1[i] = 12345.;
state->s2[i] = 12345.;
}
if (seed != 0ull) {
unsigned int x1 = ((unsigned int)seed) ^ 0x55555555UL;
unsigned int x2 = (unsigned int)((seed >> 32) ^ 0xAAAAAAAAUL);
state->s1[0] = curand_MRGmodMul(x1, state->s1[0], MRG32K3A_MOD1);
state->s1[1] = curand_MRGmodMul(x2, state->s1[1], MRG32K3A_MOD1);
state->s1[2] = curand_MRGmodMul(x1, state->s1[2], MRG32K3A_MOD1);
state->s2[0] = curand_MRGmodMul(x2, state->s2[0], MRG32K3A_MOD2);
state->s2[1] = curand_MRGmodMul(x1, state->s2[1], MRG32K3A_MOD2);
state->s2[2] = curand_MRGmodMul(x2, state->s2[2], MRG32K3A_MOD2);
}
skipahead_subsequence( subsequence, state );
skipahead( offset, state );
}
void skipahead_subsequence(unsigned long long n, curandStateMRG32k3a_t *state)
{
double t[3][3];
struct sMRG32k3aSkipSubSeq * pSkip;
pSkip = ((curandMRG32k3aPtrs_t *)state->scratch)->subSeqM1;
curand_MRGmatPow3x3( pSkip->m, t, MRG32K3A_MOD1, n);
curand_MRGmatVecMul3x3( t, state->s1, MRG32K3A_MOD1);
pSkip = ((curandMRG32k3aPtrs_t *)state->scratch)->subSeqM2;
curand_MRGmatPow3x3( pSkip->m, t, MRG32K3A_MOD2, n);
curand_MRGmatVecMul3x3( t, state->s2, MRG32K3A_MOD2);
}
changing curand_init to
void curand_init(unsigned long long seed,
unsigned long long subsequence,
unsigned long long offset,
curandStateMRG32k3a_t *state)
{
int i;
for ( i=0; i<3; i++ ) {
state->s1[i] = 12345.;
state->s2[i] = 12345.;
}
if (seed != 0ull) {
unsigned int x1 = ((unsigned int)seed) ^ 0x55555555UL;
unsigned int x2 = (unsigned int)((seed >> 32) ^ 0xAAAAAAAAUL);
state->s1[0] = curand_MRGmodMul(x1, state->s1[0], MRG32K3A_MOD1);
state->s1[1] = curand_MRGmodMul(x2, state->s1[1], MRG32K3A_MOD1);
state->s1[2] = curand_MRGmodMul(x1, state->s1[2], MRG32K3A_MOD1);
state->s2[0] = curand_MRGmodMul(x2, state->s2[0], MRG32K3A_MOD2);
state->s2[1] = curand_MRGmodMul(x1, state->s2[1], MRG32K3A_MOD2);
state->s2[2] = curand_MRGmodMul(x2, state->s2[2], MRG32K3A_MOD2);
}
curandMRG32k3aPtrs_t scratch;
sMRG32k3aSkipUnits_t unitsM1;
sMRG32k3aSkipUnits_t unitsM2;
sMRG32k3aSkipSubSeq_t subSeqM1;
sMRG32k3aSkipSubSeq_t subSeqM2;
sMRG32k3aSkipSeq_t seqM1;
sMRG32k3aSkipSeq_t seqM2;
scratch.seqM1 = &seqM1;
scratch.seqM2 = &seqM2;
scratch.subSeqM1 = &subSeqM1;
scratch.subSeqM2 = &subSeqM2;
scratch.unitsM2 = &unitsM2;
scratch.unitsM1 = &unitsM1;
state->scratch = &scratch;
skipahead_subsequence( subsequence, state );
skipahead( offset, state );
}
solve the problem.
However, by reading the rest of the code, it seems that the temporaty elements that I allocate are too big and are not completely used.
Hence, I think that the solution intended by the author of this code is different.
Can you provide me with the good usage of this PRNG ?
Thank you.
Reagrds,
Wilfried K.