Im working on a rainbow table generator in CUDA and have come to a stopping point. Whenever I put my function call inside a for loop, the compiler? (be.exe) never finishes and gets stuck.
Relevant code:
#pragma unroll 1
for(i = 0; i < 2; i++)
{
index = plainToHash( 1, 2,3, 12);
}
__device__ uint64 plainToHash(uint32 x0, uint32 x1, uint32 x2, uint32 size)
{
uint32 A, B, C, D;
uint32 X[16];
A = 0x67452301; B = 0xEFCDAB89; C = 0x98BADCFE; D = 0x10325476;
X[0] = x0;
X[1] = x1;
X[2] = x2;
X[3] = 0;
X[4] = 0;
X[5] = 0;
X[6] = 0;
X[7] = 0;
X[8] = 0;
X[9] = 0;
X[10] = 0;
X[11] = 0;
X[12] = 0;
X[13] = 0;
X[14] = size<<3;
X[15] = 0;
((uchar8 *)X) = 0x80;
R0(A,B,C,D,X[ 0], 3,0); R0(D,A,B,C,X[ 1], 7,0);
R0(C,D,A,B,X[ 2],11,0); R0(B,C,D,A,X[ 3],19,0);
R0(A,B,C,D,X[ 4], 3,0); R0(D,A,B,C,X[ 5], 7,0);
R0(C,D,A,B,X[ 6],11,0); R0(B,C,D,A,X[ 7],19,0);
R0(A,B,C,D,X[ 8], 3,0); R0(D,A,B,C,X[ 9], 7,0);
R0(C,D,A,B,X[10],11,0); R0(B,C,D,A,X[11],19,0);
R0(A,B,C,D,X[12], 3,0); R0(D,A,B,C,X[13], 7,0);
R0(C,D,A,B,X[14],11,0); R0(B,C,D,A,X[15],19,0);
R1(A,B,C,D,X[ 0], 3,0x5A827999); R1(D,A,B,C,X[ 4], 5,0x5A827999);
R1(C,D,A,B,X[ 8], 9,0x5A827999); R1(B,C,D,A,X[12],13,0x5A827999);
R1(A,B,C,D,X[ 1], 3,0x5A827999); R1(D,A,B,C,X[ 5], 5,0x5A827999);
R1(C,D,A,B,X[ 9], 9,0x5A827999); R1(B,C,D,A,X[13],13,0x5A827999);
R1(A,B,C,D,X[ 2], 3,0x5A827999); R1(D,A,B,C,X[ 6], 5,0x5A827999);
R1(C,D,A,B,X[10], 9,0x5A827999); R1(B,C,D,A,X[14],13,0x5A827999);
R1(A,B,C,D,X[ 3], 3,0x5A827999); R1(D,A,B,C,X[ 7], 5,0x5A827999);
R1(C,D,A,B,X[11], 9,0x5A827999); R1(B,C,D,A,X[15],13,0x5A827999);
R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1);
R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); R2(B,C,D,A,X[12],15,0x6ED9EBA1);
R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); R2(D,A,B,C,X[10], 9,0x6ED9EBA1);
R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); R2(B,C,D,A,X[14],15,0x6ED9EBA1);
R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1);
R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); R2(B,C,D,A,X[13],15,0x6ED9EBA1);
R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); R2(D,A,B,C,X[11], 9,0x6ED9EBA1);
R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); R2(B,C,D,A,X[15],15,0x6ED9EBA1);
A += 0x67452301; B += 0xEFCDAB89; C += 0x98BADCFE; D += 0x10325476;
return ((uint64)B <<32) | A;
}
Removing the loop makes it compile, and the pragma doesn’t help either.