Anyone know what this does?

I am new to CUDA but want to learn.

I’m trying to add functionality to a CUDA Enabled Rainbow Table Generator. Currently it only supports MD5, and i want to add LM and NTLM to the capabilities.

I have encountered a strange piece of code in this program that is stopping me from continuing.

[codebox]bow_md5<<<nVsetup.BLOKS,nVsetup.THREADS>>>(ddata,dol);[/codebox]

I don’t know what this does, because here is the function:

[codebox]global static void bow_md5(DWORD * retd, DWORD * ol)

{

Ddata dd;

DWORD tid = (threadIdx.x + (blockIdx.x*indada.THREADS))*4;

all_tipe nIndex;

all_tipe O_col ;



O_col.dd = indada.m_nPlainSpaceTotal;



all_tipe dell;



dell.dd = indada._m_nPlainSpaceTotal;

		

all_tipe dellc = dell;	

DWORD m_nPlainLenMin = indada.m_nPlainLenMin;

DWORD m_nPlainLenMax = indada.m_nPlainLenMax;



for (int i = 15; i >= m_nPlainLenMax; i--)

{

	dd.b[i]=0;

}

unsigned char MAXst = indada.m_nPlainCharsetLen;

DWORD ind=indada.ind;

DWORD nv8600gt = indada.LengChineE;

DWORD col	   = indada.LengChineS;

if (col == 0)

{

	nIndex.d[0] = retd[tid+0];

	nIndex.d[1] = retd[tid+1];

}

else

{

	nIndex.d[0] = retd[tid+2];

	nIndex.d[1] = retd[tid+3];

}

int m_nPlainLen=1;

DWORD datae; 

unsigned long long m_nPlainSpaceUpToX[15];

for (int i = 0 ; i < m_nPlainLenMax ; i++)

{

	m_nPlainSpaceUpToX[i] = indada.m_nPlainSpaceUpToX[i].dd;

}

//--------------------------------------



for (;col<(nv8600gt-1);col++) 

{

__syncthreads(); //---

//	for (ii = m_nPlainLenMax - 1; ii >= m_nPlainLenMin - 1; ii--)

//	{

//		if (nIndex.dd >= m_nPlainSpaceUpToX[ii]) break;

//	}

//	m_nPlainLen = ii + 1;



int ii;

ii = m_nPlainLenMax - 1;



while (nIndex.dd < m_nPlainSpaceUpToX[ii])

{

	if (ii < (m_nPlainLenMin - 1)) break;

	ii--;

}



	m_nPlainLen = ii + 1;

nIndex.dd -= m_nPlainSpaceUpToX[m_nPlainLen-1];

datae = m_nPlainLen* 8;

dd.b[m_nPlainLen] = 0x80;

for (int i = m_nPlainLen+1; i < (m_nPlainLenMax+1); i++)

{

	dd.b[i]=0;

}



for (int iii = (m_nPlainLen -1); iii >= 0; iii--)

	{

	  if (nIndex.d[1] != 0)

	  {

		//	dd.b[iii] = indada.m_PlainCharset[(nIndex.dd % MAXst)];

		//	nIndex.dd /= MAXst;

			unsigned long long tmp =  nIndex.dd / MAXst;

			int tmpc = nIndex.dd - ( tmp * MAXst );

			dd.b[iii] = indada.m_PlainCharset[tmpc];

			nIndex.dd = tmp ;

	  }

	  else

	  {					

		  dd.b[iii] = indada.m_PlainCharset[nIndex.d[0] % MAXst];

	      nIndex.d[0] /= MAXst;

	  }

//__syncthreads(); //---

	}

/*

ol[0]=5;

retd[tid+0]=dd.d[1];

retd[tid+1]=dd.d[2];

retd[tid+2]=nIndex.d[0];

retd[tid+3]=nIndex.d[1];

return;

*/

__syncthreads(); //---

DWORD a=0x67452301;

DWORD b=0xefcdab89;

DWORD c=0x98badcfe;

DWORD d=0x10325476;

DWORD data0=dd.d[0];

DWORD data1=dd.d[1];

DWORD data2=dd.d[2];

DWORD data3=dd.d[3];

a= b + lrot32(a + (d ^ (b & (c ^ d))) + data0 + 0xd76aa478,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + data1 + 0xe8c7b756,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + data2 + 0x242070db,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + data3 + 0xc1bdceee,22);

a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0xf57c0faf,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0x4787c62a,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + 0xa8304613,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0xfd469501,22);

a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0x698098d8,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0x8b44f7af,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + 0xffff5bb1,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0x895cd7be,22);

a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0x6b901122,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0xfd987193,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + datae + 0xa679438e,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0x49b40821,22);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + data1 + 0xf61e2562,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + 0xc040b340,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0x265e5a51,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + data0 + 0xe9b6c7aa,20);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0xd62f105d,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + 0x02441453,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0xd8a1e681,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0xe7d3fbc8,20);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0x21e1cde6,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + datae + 0xc33707d6,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + data3 + 0xf4d50d87,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0x455a14ed,20);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0xa9e3e905,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + data2 + 0xfcefa3f8,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0x676f02d9,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0x8d2a4c8a,20);

a= b + lrot32(a + (b ^ c ^ d) + 0xfffa3942,4);

d= a + lrot32(d + (a ^ b ^ c) + 0x8771f681,11);

c= d + lrot32(c + (d ^ a ^ B) + 0x6d9d6122,16);

b= c + lrot32(b + (c ^ d ^ a) + datae + 0xfde5380c,23);

a= b + lrot32(a + (b ^ c ^ d) + data1 + 0xa4beea44,4);

d= a + lrot32(d + (a ^ b ^ c) + 0x4bdecfa9,11);

c= d + lrot32(c + (d ^ a ^ B) + 0xf6bb4b60,16);

b= c + lrot32(b + (c ^ d ^ a) + 0xbebfbc70,23);

a= b + lrot32(a + (b ^ c ^ d) + 0x289b7ec6,4);

d= a + lrot32(d + (a ^ b ^ c) + data0 + 0xeaa127fa,11);

c= d + lrot32(c + (d ^ a ^ B) + data3 + 0xd4ef3085,16);

b= c + lrot32(b + (c ^ d ^ a) + 0x04881d05,23);

a= b + lrot32(a + (b ^ c ^ d) + 0xd9d4d039,4);

d= a + lrot32(d + (a ^ b ^ c) + 0xe6db99e5,11);

c= d + lrot32(c + (d ^ a ^ B) + 0x1fa27cf8,16);

b= c + lrot32(b + (c ^ d ^ a) + data2 + 0xc4ac5665,23);

a= b + lrot32(a + (c ^ (b | (~ d))) + data0 + 0xf4292244,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + 0x432aff97,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + datae + 0xab9423a7,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + 0xfc93a039,21);

a= b + lrot32(a + (c ^ (b | (~ d))) + 0x655b59c3,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + data3 + 0x8f0ccc92,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + 0xffeff47d,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + data1 + 0x85845dd1,21);

a= b + lrot32(a + (c ^ (b | (~ d))) + 0x6fa87e4f,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + 0xfe2ce6e0,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + 0xa3014314,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + 0x4e0811a1,21);

a= b + lrot32(a + (c ^ (b | (~ d))) + 0xf7537e82,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + 0xbd3af235,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + data2 + 0x2ad7d2bb,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + 0xeb86d391,21);

a=0x67452301+a;

b=0xefcdab89+b;

nIndex.d[1] = b;

nIndex.d[0] = a;

nIndex.dd += col;

if (ind!=0)

   nIndex.dd += ind;

// nIndex.dd %= O_col.dd;



// fast version

dell.dd = dellc.dd;

 while (nIndex.dd >= O_col.dd)

 {

  if ( nIndex.dd >= dell.dd )	nIndex.dd -= dell.dd;

  dell.dd>>=1;

 }

}//while

ol[0]=col;

retd[tid+2]=nIndex.d[0];

retd[tid+3]=nIndex.d[1];

return;

}[/codebox]

Can you please help me understand the “<<<” and “>>>” part.

Thanks

Time to read some CUDA manuals and “Getting Started” guides or tutorials. It’s all explained there. I don’t think anyone here would be willing to explain to you all the basics from the ground up.

NOTE: You will need the CUDA toolkit to compile the part with the <<< and >>>. It’s a vendor specific extension to the C/C++ language. It launches the computation on the graphics card.

I am willing to read them but i can’t find them in the CUDA SDK. Can you point me to where i can find the “Getting Started” guide or an API Documentation? All i have is example code.

EDIT:

Found a useful source now that i know what to look for. Thanks!

They are part of the Cuda toolkit, not the Cuda SDK