I am new to CUDA but want to learn.
I’m trying to add functionality to a CUDA Enabled Rainbow Table Generator. Currently it only supports MD5, and i want to add LM and NTLM to the capabilities.
I have encountered a strange piece of code in this program that is stopping me from continuing.
[codebox]bow_md5<<<nVsetup.BLOKS,nVsetup.THREADS>>>(ddata,dol);[/codebox]
I don’t know what this does, because here is the function:
[codebox]global static void bow_md5(DWORD * retd, DWORD * ol)
{
Ddata dd;
DWORD tid = (threadIdx.x + (blockIdx.x*indada.THREADS))*4;
all_tipe nIndex;
all_tipe O_col ;
O_col.dd = indada.m_nPlainSpaceTotal;
all_tipe dell;
dell.dd = indada._m_nPlainSpaceTotal;
all_tipe dellc = dell;
DWORD m_nPlainLenMin = indada.m_nPlainLenMin;
DWORD m_nPlainLenMax = indada.m_nPlainLenMax;
for (int i = 15; i >= m_nPlainLenMax; i--)
{
dd.b[i]=0;
}
unsigned char MAXst = indada.m_nPlainCharsetLen;
DWORD ind=indada.ind;
DWORD nv8600gt = indada.LengChineE;
DWORD col = indada.LengChineS;
if (col == 0)
{
nIndex.d[0] = retd[tid+0];
nIndex.d[1] = retd[tid+1];
}
else
{
nIndex.d[0] = retd[tid+2];
nIndex.d[1] = retd[tid+3];
}
int m_nPlainLen=1;
DWORD datae;
unsigned long long m_nPlainSpaceUpToX[15];
for (int i = 0 ; i < m_nPlainLenMax ; i++)
{
m_nPlainSpaceUpToX[i] = indada.m_nPlainSpaceUpToX[i].dd;
}
//--------------------------------------
for (;col<(nv8600gt-1);col++)
{
__syncthreads(); //---
// for (ii = m_nPlainLenMax - 1; ii >= m_nPlainLenMin - 1; ii--)
// {
// if (nIndex.dd >= m_nPlainSpaceUpToX[ii]) break;
// }
// m_nPlainLen = ii + 1;
int ii;
ii = m_nPlainLenMax - 1;
while (nIndex.dd < m_nPlainSpaceUpToX[ii])
{
if (ii < (m_nPlainLenMin - 1)) break;
ii--;
}
m_nPlainLen = ii + 1;
nIndex.dd -= m_nPlainSpaceUpToX[m_nPlainLen-1];
datae = m_nPlainLen* 8;
dd.b[m_nPlainLen] = 0x80;
for (int i = m_nPlainLen+1; i < (m_nPlainLenMax+1); i++)
{
dd.b[i]=0;
}
for (int iii = (m_nPlainLen -1); iii >= 0; iii--)
{
if (nIndex.d[1] != 0)
{
// dd.b[iii] = indada.m_PlainCharset[(nIndex.dd % MAXst)];
// nIndex.dd /= MAXst;
unsigned long long tmp = nIndex.dd / MAXst;
int tmpc = nIndex.dd - ( tmp * MAXst );
dd.b[iii] = indada.m_PlainCharset[tmpc];
nIndex.dd = tmp ;
}
else
{
dd.b[iii] = indada.m_PlainCharset[nIndex.d[0] % MAXst];
nIndex.d[0] /= MAXst;
}
//__syncthreads(); //---
}
/*
ol[0]=5;
retd[tid+0]=dd.d[1];
retd[tid+1]=dd.d[2];
retd[tid+2]=nIndex.d[0];
retd[tid+3]=nIndex.d[1];
return;
*/
__syncthreads(); //---
DWORD a=0x67452301;
DWORD b=0xefcdab89;
DWORD c=0x98badcfe;
DWORD d=0x10325476;
DWORD data0=dd.d[0];
DWORD data1=dd.d[1];
DWORD data2=dd.d[2];
DWORD data3=dd.d[3];
a= b + lrot32(a + (d ^ (b & (c ^ d))) + data0 + 0xd76aa478,7);
d= a + lrot32(d + (c ^ (a & (b ^ c))) + data1 + 0xe8c7b756,12);
c= d + lrot32(c + (b ^ (d & (a ^ B))) + data2 + 0x242070db,17);
b= c + lrot32(b + (a ^ (c & (d ^ a))) + data3 + 0xc1bdceee,22);
a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0xf57c0faf,7);
d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0x4787c62a,12);
c= d + lrot32(c + (b ^ (d & (a ^ B))) + 0xa8304613,17);
b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0xfd469501,22);
a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0x698098d8,7);
d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0x8b44f7af,12);
c= d + lrot32(c + (b ^ (d & (a ^ B))) + 0xffff5bb1,17);
b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0x895cd7be,22);
a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0x6b901122,7);
d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0xfd987193,12);
c= d + lrot32(c + (b ^ (d & (a ^ B))) + datae + 0xa679438e,17);
b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0x49b40821,22);
a= b + lrot32(a + (c ^ (d & (b ^ c))) + data1 + 0xf61e2562,5);
d= a + lrot32(d + (b ^ (c & (a ^ B))) + 0xc040b340,9);
c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0x265e5a51,14);
b= c + lrot32(b + (d ^ (a & (c ^ d))) + data0 + 0xe9b6c7aa,20);
a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0xd62f105d,5);
d= a + lrot32(d + (b ^ (c & (a ^ B))) + 0x02441453,9);
c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0xd8a1e681,14);
b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0xe7d3fbc8,20);
a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0x21e1cde6,5);
d= a + lrot32(d + (b ^ (c & (a ^ B))) + datae + 0xc33707d6,9);
c= d + lrot32(c + (a ^ (b & (d ^ a))) + data3 + 0xf4d50d87,14);
b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0x455a14ed,20);
a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0xa9e3e905,5);
d= a + lrot32(d + (b ^ (c & (a ^ B))) + data2 + 0xfcefa3f8,9);
c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0x676f02d9,14);
b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0x8d2a4c8a,20);
a= b + lrot32(a + (b ^ c ^ d) + 0xfffa3942,4);
d= a + lrot32(d + (a ^ b ^ c) + 0x8771f681,11);
c= d + lrot32(c + (d ^ a ^ B) + 0x6d9d6122,16);
b= c + lrot32(b + (c ^ d ^ a) + datae + 0xfde5380c,23);
a= b + lrot32(a + (b ^ c ^ d) + data1 + 0xa4beea44,4);
d= a + lrot32(d + (a ^ b ^ c) + 0x4bdecfa9,11);
c= d + lrot32(c + (d ^ a ^ B) + 0xf6bb4b60,16);
b= c + lrot32(b + (c ^ d ^ a) + 0xbebfbc70,23);
a= b + lrot32(a + (b ^ c ^ d) + 0x289b7ec6,4);
d= a + lrot32(d + (a ^ b ^ c) + data0 + 0xeaa127fa,11);
c= d + lrot32(c + (d ^ a ^ B) + data3 + 0xd4ef3085,16);
b= c + lrot32(b + (c ^ d ^ a) + 0x04881d05,23);
a= b + lrot32(a + (b ^ c ^ d) + 0xd9d4d039,4);
d= a + lrot32(d + (a ^ b ^ c) + 0xe6db99e5,11);
c= d + lrot32(c + (d ^ a ^ B) + 0x1fa27cf8,16);
b= c + lrot32(b + (c ^ d ^ a) + data2 + 0xc4ac5665,23);
a= b + lrot32(a + (c ^ (b | (~ d))) + data0 + 0xf4292244,6);
d= a + lrot32(d + (b ^ (a | (~ c))) + 0x432aff97,10);
c= d + lrot32(c + (a ^ (d | (~ B))) + datae + 0xab9423a7,15);
b= c + lrot32(b + (d ^ (c | (~ a))) + 0xfc93a039,21);
a= b + lrot32(a + (c ^ (b | (~ d))) + 0x655b59c3,6);
d= a + lrot32(d + (b ^ (a | (~ c))) + data3 + 0x8f0ccc92,10);
c= d + lrot32(c + (a ^ (d | (~ B))) + 0xffeff47d,15);
b= c + lrot32(b + (d ^ (c | (~ a))) + data1 + 0x85845dd1,21);
a= b + lrot32(a + (c ^ (b | (~ d))) + 0x6fa87e4f,6);
d= a + lrot32(d + (b ^ (a | (~ c))) + 0xfe2ce6e0,10);
c= d + lrot32(c + (a ^ (d | (~ B))) + 0xa3014314,15);
b= c + lrot32(b + (d ^ (c | (~ a))) + 0x4e0811a1,21);
a= b + lrot32(a + (c ^ (b | (~ d))) + 0xf7537e82,6);
d= a + lrot32(d + (b ^ (a | (~ c))) + 0xbd3af235,10);
c= d + lrot32(c + (a ^ (d | (~ B))) + data2 + 0x2ad7d2bb,15);
b= c + lrot32(b + (d ^ (c | (~ a))) + 0xeb86d391,21);
a=0x67452301+a;
b=0xefcdab89+b;
nIndex.d[1] = b;
nIndex.d[0] = a;
nIndex.dd += col;
if (ind!=0)
nIndex.dd += ind;
// nIndex.dd %= O_col.dd;
// fast version
dell.dd = dellc.dd;
while (nIndex.dd >= O_col.dd)
{
if ( nIndex.dd >= dell.dd ) nIndex.dd -= dell.dd;
dell.dd>>=1;
}
}//while
ol[0]=col;
retd[tid+2]=nIndex.d[0];
retd[tid+3]=nIndex.d[1];
return;
}[/codebox]
Can you please help me understand the “<<<” and “>>>” part.
Thanks