Excellent! That means I don’t have to go to bizarre lengths to emulate gotos with normal function calls!

As for rand(), I know that it’s not built in, so I have my own implementation. Here’s the code from my old Cuda version if anyone’s interested. It’s a Combined Tausworthe Generator, adjusted to use partial state sharing across a warp to allow greater performance and smaller total state while still retaining good quality.

```
__constant__ unsigned int shift1[4] = {6, 2, 13, 3};
__constant__ unsigned int shift2[4] = {13, 27, 21, 12};
__constant__ unsigned int shift3[4] = {18, 2, 7, 13};
__constant__ unsigned int offset[4] = {4294967294, 4294967288, 4294967280, 4294967168};
__shared__ unsigned int randStates[32];
__device__ unsigned int TausStep(unsigned int &z, int S1, int S2, int S3, unsigned int M)
{
unsigned int b = (((z << S1) ^ z) >> S2);
return z = (((z &M) << S3) ^ b);
}
__device__ unsigned int randInt()
{
TausStep(randStates[threadIdx.x&31], shift1[threadIdx.x&3], shift2[threadIdx.x&3],shift3[threadIdx.x&3],offset[threadIdx.x&3]);
return (randStates[(threadIdx.x)&31]^randStates[(threadIdx.x+1)&31]^randStates[(threadIdx.x+2)&31]^randStates[(threadIdx.x+3)&31]);
}
__device__ float randFloat()
//This function returns a random float in [0,1] and updates seed
{
unsigned int y = randInt();
return __int_as_float((y&0x007FFFFF)|0x3F800000)-1.0f;
}
__device__ float randFloatWarp()
//This function is a workaround for getting a warp wide rand number
{
unsigned int y = randInt();
return __int_as_float((randStates[(threadIdx.x&31)]&0x007FFFFF)|0x3F800000)-1.0f;
}
...
w=randFloatWarp();
if ((threadIdx.x&31)!=0) //workaround for strange bug with compute 1.0 hardware where device crashes if all threads in a warp try to read from the same address
w = __int_as_float((randStates[0]&0x007FFFFF)|0x3F800000)-1.0f;
...
```

You might notice the strange workaround in randFloatWarp. This is there because of an apparent bug in compute 1.0 hardware where having an entire warp reading from a single location in shared memory causes a crash (note that 1.3 hardware works as expected in this case).