template<unsigned int N>
static __device__ __inline__ unsigned int tea( unsigned int val0, unsigned int val1,bool p=false )
{
unsigned int v0 = val0;
unsigned int v1 = val1;
unsigned int s0 = 0;
// if(p)printf("Iteration %d: v0 = %d, v1 = %d, s0 = %d\n", N,v0, v1, s0);
for( unsigned int n = 0; n < N; n++ )
{
s0 += 0x9e3779b9;
v0 += ((v1<<4)+0xa341316c)^(v1+s0)^((v1>>5)+0xc8013ea4);
v1 += ((v0<<4)+0xad90777d)^(v0+s0)^((v0>>5)+0x7e95761e);
// if(p)printf("Iteration %d: v0 = %u, v1 = %u, s0 = %u\n", n, v0, v1, s0);
}
// if(p)printf("over %d: v0 = %u, v1 = %u, s0 = %u\n", N,v0, v1, s0);
return v0;
}
tea<16>(0,1); The cpu result is1892921073,but in raygen program it returns 4118247010.then I turn on printf just like:
template<unsigned int N>
static __device__ __inline__ unsigned int tea( unsigned int val0, unsigned int val1,bool p=false )
{
unsigned int v0 = val0;
unsigned int v1 = val1;
unsigned int s0 = 0;
if(p)printf("Iteration %d: v0 = %d, v1 = %d, s0 = %d\n", N,v0, v1, s0);
for( unsigned int n = 0; n < N; n++ )
{
s0 += 0x9e3779b9;
v0 += ((v1<<4)+0xa341316c)^(v1+s0)^((v1>>5)+0xc8013ea4);
v1 += ((v0<<4)+0xad90777d)^(v0+s0)^((v0>>5)+0x7e95761e);
if(p)printf("Iteration %d: v0 = %u, v1 = %u, s0 = %u\n", n, v0, v1, s0);
}
if(p)printf("over %d: v0 = %u, v1 = %u, s0 = %u\n", N,v0, v1, s0);
return v0;
}
things become different:
Iteration 16: v0 = 0, v1 = 1, s0 = 0
Iteration 0: v0 = 4118247010, v1 = 283647064, s0 = 2654435769
Iteration 1: v0 = 697629954, v1 = 1877591623, s0 = 1013904242
Iteration 2: v0 = 1247946618, v1 = 1715258152, s0 = 3668340011
Iteration 3: v0 = 1569133783, v1 = 941708474, s0 = 2027808484
Iteration 4: v0 = 19502569, v1 = 3735762688, s0 = 387276957
Iteration 5: v0 = 3525557839, v1 = 2890237912, s0 = 3041712726
Iteration 6: v0 = 660004408, v1 = 3560623501, s0 = 1401181199
Iteration 7: v0 = 353526449, v1 = 1535207764, s0 = 4055616968
Iteration 8: v0 = 2417009224, v1 = 1762521688, s0 = 2415085441
Iteration 9: v0 = 4167167376, v1 = 4236864917, s0 = 774553914
Iteration 10: v0 = 1698174388, v1 = 2914919014, s0 = 3428989683
Iteration 11: v0 = 257148189, v1 = 1374668584, s0 = 1788458156
Iteration 12: v0 = 1640083417, v1 = 1955196887, s0 = 147926629
Iteration 13: v0 = 2644043492, v1 = 3032958913, s0 = 2802362398
Iteration 14: v0 = 1974379242, v1 = 3852325290, s0 = 1161830871
Iteration 15: v0 = 1892921073, v1 = 1379537219, s0 = 3816266640
over 16: v0 = 1892921073, v1 = 1379537219, s0 = 3816266640
Now it’s the correct result.So why is it happen?I can’t understand… I use optix8 and /fp:precise option.