Anyone know what this does?

eat_pb · April 24, 2010, 9:47pm

I am new to CUDA but want to learn.

I’m trying to add functionality to a CUDA Enabled Rainbow Table Generator. Currently it only supports MD5, and i want to add LM and NTLM to the capabilities.

I have encountered a strange piece of code in this program that is stopping me from continuing.

[codebox]bow_md5<<<nVsetup.BLOKS,nVsetup.THREADS>>>(ddata,dol);[/codebox]

I don’t know what this does, because here is the function:

[codebox]global static void bow_md5(DWORD * retd, DWORD * ol)

{

Ddata dd;

DWORD tid = (threadIdx.x + (blockIdx.x*indada.THREADS))*4;

all_tipe nIndex;

all_tipe O_col ;



O_col.dd = indada.m_nPlainSpaceTotal;



all_tipe dell;



dell.dd = indada._m_nPlainSpaceTotal;

		

all_tipe dellc = dell;	

DWORD m_nPlainLenMin = indada.m_nPlainLenMin;

DWORD m_nPlainLenMax = indada.m_nPlainLenMax;



for (int i = 15; i >= m_nPlainLenMax; i--)

{

	dd.b[i]=0;

}

unsigned char MAXst = indada.m_nPlainCharsetLen;

DWORD ind=indada.ind;

DWORD nv8600gt = indada.LengChineE;

DWORD col	   = indada.LengChineS;

if (col == 0)

{

	nIndex.d[0] = retd[tid+0];

	nIndex.d[1] = retd[tid+1];

}

else

{

	nIndex.d[0] = retd[tid+2];

	nIndex.d[1] = retd[tid+3];

}

int m_nPlainLen=1;

DWORD datae; 

unsigned long long m_nPlainSpaceUpToX[15];

for (int i = 0 ; i < m_nPlainLenMax ; i++)

{

	m_nPlainSpaceUpToX[i] = indada.m_nPlainSpaceUpToX[i].dd;

}

//--------------------------------------



for (;col<(nv8600gt-1);col++) 

{

__syncthreads(); //---

//	for (ii = m_nPlainLenMax - 1; ii >= m_nPlainLenMin - 1; ii--)

//	{

//		if (nIndex.dd >= m_nPlainSpaceUpToX[ii]) break;

//	}

//	m_nPlainLen = ii + 1;



int ii;

ii = m_nPlainLenMax - 1;



while (nIndex.dd < m_nPlainSpaceUpToX[ii])

{

	if (ii < (m_nPlainLenMin - 1)) break;

	ii--;

}



	m_nPlainLen = ii + 1;

nIndex.dd -= m_nPlainSpaceUpToX[m_nPlainLen-1];

datae = m_nPlainLen* 8;

dd.b[m_nPlainLen] = 0x80;

for (int i = m_nPlainLen+1; i < (m_nPlainLenMax+1); i++)

{

	dd.b[i]=0;

}



for (int iii = (m_nPlainLen -1); iii >= 0; iii--)

	{

	  if (nIndex.d[1] != 0)

	  {

		//	dd.b[iii] = indada.m_PlainCharset[(nIndex.dd % MAXst)];

		//	nIndex.dd /= MAXst;

			unsigned long long tmp =  nIndex.dd / MAXst;

			int tmpc = nIndex.dd - ( tmp * MAXst );

			dd.b[iii] = indada.m_PlainCharset[tmpc];

			nIndex.dd = tmp ;

	  }

	  else

	  {					

		  dd.b[iii] = indada.m_PlainCharset[nIndex.d[0] % MAXst];

	      nIndex.d[0] /= MAXst;

	  }

//__syncthreads(); //---

	}

/*

ol[0]=5;

retd[tid+0]=dd.d[1];

retd[tid+1]=dd.d[2];

retd[tid+2]=nIndex.d[0];

retd[tid+3]=nIndex.d[1];

return;

*/

__syncthreads(); //---

DWORD a=0x67452301;

DWORD b=0xefcdab89;

DWORD c=0x98badcfe;

DWORD d=0x10325476;

DWORD data0=dd.d[0];

DWORD data1=dd.d[1];

DWORD data2=dd.d[2];

DWORD data3=dd.d[3];

a= b + lrot32(a + (d ^ (b & (c ^ d))) + data0 + 0xd76aa478,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + data1 + 0xe8c7b756,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + data2 + 0x242070db,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + data3 + 0xc1bdceee,22);

a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0xf57c0faf,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0x4787c62a,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + 0xa8304613,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0xfd469501,22);

a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0x698098d8,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0x8b44f7af,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + 0xffff5bb1,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0x895cd7be,22);

a= b + lrot32(a + (d ^ (b & (c ^ d))) + 0x6b901122,7);

d= a + lrot32(d + (c ^ (a & (b ^ c))) + 0xfd987193,12);

c= d + lrot32(c + (b ^ (d & (a ^ B))) + datae + 0xa679438e,17);

b= c + lrot32(b + (a ^ (c & (d ^ a))) + 0x49b40821,22);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + data1 + 0xf61e2562,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + 0xc040b340,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0x265e5a51,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + data0 + 0xe9b6c7aa,20);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0xd62f105d,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + 0x02441453,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0xd8a1e681,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0xe7d3fbc8,20);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0x21e1cde6,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + datae + 0xc33707d6,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + data3 + 0xf4d50d87,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0x455a14ed,20);

a= b + lrot32(a + (c ^ (d & (b ^ c))) + 0xa9e3e905,5);

d= a + lrot32(d + (b ^ (c & (a ^ B))) + data2 + 0xfcefa3f8,9);

c= d + lrot32(c + (a ^ (b & (d ^ a))) + 0x676f02d9,14);

b= c + lrot32(b + (d ^ (a & (c ^ d))) + 0x8d2a4c8a,20);

a= b + lrot32(a + (b ^ c ^ d) + 0xfffa3942,4);

d= a + lrot32(d + (a ^ b ^ c) + 0x8771f681,11);

c= d + lrot32(c + (d ^ a ^ B) + 0x6d9d6122,16);

b= c + lrot32(b + (c ^ d ^ a) + datae + 0xfde5380c,23);

a= b + lrot32(a + (b ^ c ^ d) + data1 + 0xa4beea44,4);

d= a + lrot32(d + (a ^ b ^ c) + 0x4bdecfa9,11);

c= d + lrot32(c + (d ^ a ^ B) + 0xf6bb4b60,16);

b= c + lrot32(b + (c ^ d ^ a) + 0xbebfbc70,23);

a= b + lrot32(a + (b ^ c ^ d) + 0x289b7ec6,4);

d= a + lrot32(d + (a ^ b ^ c) + data0 + 0xeaa127fa,11);

c= d + lrot32(c + (d ^ a ^ B) + data3 + 0xd4ef3085,16);

b= c + lrot32(b + (c ^ d ^ a) + 0x04881d05,23);

a= b + lrot32(a + (b ^ c ^ d) + 0xd9d4d039,4);

d= a + lrot32(d + (a ^ b ^ c) + 0xe6db99e5,11);

c= d + lrot32(c + (d ^ a ^ B) + 0x1fa27cf8,16);

b= c + lrot32(b + (c ^ d ^ a) + data2 + 0xc4ac5665,23);

a= b + lrot32(a + (c ^ (b | (~ d))) + data0 + 0xf4292244,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + 0x432aff97,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + datae + 0xab9423a7,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + 0xfc93a039,21);

a= b + lrot32(a + (c ^ (b | (~ d))) + 0x655b59c3,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + data3 + 0x8f0ccc92,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + 0xffeff47d,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + data1 + 0x85845dd1,21);

a= b + lrot32(a + (c ^ (b | (~ d))) + 0x6fa87e4f,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + 0xfe2ce6e0,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + 0xa3014314,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + 0x4e0811a1,21);

a= b + lrot32(a + (c ^ (b | (~ d))) + 0xf7537e82,6);

d= a + lrot32(d + (b ^ (a | (~ c))) + 0xbd3af235,10);

c= d + lrot32(c + (a ^ (d | (~ B))) + data2 + 0x2ad7d2bb,15);

b= c + lrot32(b + (d ^ (c | (~ a))) + 0xeb86d391,21);

a=0x67452301+a;

b=0xefcdab89+b;

nIndex.d[1] = b;

nIndex.d[0] = a;

nIndex.dd += col;

if (ind!=0)

   nIndex.dd += ind;

// nIndex.dd %= O_col.dd;



// fast version

dell.dd = dellc.dd;

 while (nIndex.dd >= O_col.dd)

 {

  if ( nIndex.dd >= dell.dd )	nIndex.dd -= dell.dd;

  dell.dd>>=1;

 }

}//while

ol[0]=col;

retd[tid+2]=nIndex.d[0];

retd[tid+3]=nIndex.d[1];

return;

}[/codebox]

Can you please help me understand the “<<<” and “>>>” part.

Thanks

cbuchner1 · April 24, 2010, 9:59pm

Time to read some CUDA manuals and “Getting Started” guides or tutorials. It’s all explained there. I don’t think anyone here would be willing to explain to you all the basics from the ground up.

NOTE: You will need the CUDA toolkit to compile the part with the <<< and >>>. It’s a vendor specific extension to the C/C++ language. It launches the computation on the graphics card.

eat_pb · April 25, 2010, 2:58am

I am willing to read them but i can’t find them in the CUDA SDK. Can you point me to where i can find the “Getting Started” guide or an API Documentation? All i have is example code.

EDIT:

Found a useful source now that i know what to look for. Thanks!

laughingrice · April 27, 2010, 11:40pm

They are part of the Cuda toolkit, not the Cuda SDK

Topic		Replies	Views
What does <<< >>> mean? CUDA Programming and Performance	6	4896	August 15, 2007
New to CUDA Some troubles with computing MD5 CUDA Programming and Performance	30	11595	November 28, 2008
Annoying problems with memory and/or syntax CUDA Programming and Performance	19	4972	April 8, 2008
.CU syntax "<<< >>>" CUDA Programming and Performance	1	1981	August 18, 2009
Generating 62^8 WiFi password in 0.5s I need cloud for testing CUDA Programming and Performance	35	968	March 10, 2023
Cuda program taking more time. CUDA Programming and Performance	15	7191	November 21, 2010
Salsa 20 Ransomware Brute Force program CUDA Programming and Performance	1	1187	May 26, 2016
CUDA SUCKS!!! Why <block, thread> cannot be judged by itself CUDA Programming and Performance	20	8429	February 17, 2015
더욱 쉬워진 CUDA 입문 Technical Blog - South Korea	1	83	September 6, 2024
CUDA 1.0 FAQ (OBSOLETE) Frequently asked questions about CUDA Announcements	2	75944	February 9, 2009

Anyone know what this does?

Related topics