Hello everybody
For my degree dissertation I coded a bruteforce NTML (windows-hash) cracker in C. Now I would like to transform this code on CUDA to be able to make a performance analysis. However I’m stuck…
Attached you can find my code (please bear in mind that it is only a code for testing purpose, so currently all user inputs are ignored). What is wrong?
I coded the cu-file with the thought that all my threads will come and execute the global function. So I could use threadIdx.x as a counter. But at the end I think this thought isn’t right.
Thanks in advance!
#include <string.h> //espinosa
#include <windows.h>
#include <stdio.h> //standard IO
#include <math.h> //f¸r pow
#include <conio.h> // f¸r _getch()
#include <ctype.h> // f¸r toupper() und tolower()
#include "I:\Program Files\NVIDIA Corporation\NVIDIA CUDA SDK\common\inc\cutil.h" //cutil funtions
#include <time.h> //f¸r clock
//This is the MD4 compress function
__device__ void ntlm_crypt(int* nt_buffer, int* output)
{
unsigned int INIT_A = 0x67452301;
unsigned int INIT_B = 0xefcdab89;
unsigned int INIT_C = 0x98badcfe;
unsigned int INIT_D = 0x10325476;
unsigned int a = INIT_A;
unsigned int b = INIT_B;
unsigned int c = INIT_C;
unsigned int d = INIT_D;
unsigned int SQRT_2 = 0x5a827999;
unsigned int SQRT_3 = 0x6ed9eba1;
/* Round 1 */
a += (d ^ (b & (c ^ d))) + nt_buffer[0] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[1] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[2] ;c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[3] ;b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[4] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[5] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[6] ;c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[7] ;b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[8] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[9] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[10];c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[11];b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[12];a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[13];d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[14];c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[15];b = (b << 19) | (b >> 13);
/* Round 2 */
a += ((b & (c | d)) | (c & d)) + nt_buffer[0] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[4] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[8] +SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[12]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[1] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[5] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[9] +SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[13]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[2] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[6] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[10]+SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[14]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[3] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[7] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[11]+SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[15]+SQRT_2; b = (b<<13) | (b>>19);
/* Round 3 */
a += (d ^ c ^ b) + nt_buffer[0] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[8] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[4] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[12] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[2] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[10] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[6] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[14] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[1] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[9] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[5] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[13] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[3] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[11] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[7] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[15] + SQRT_3; b = (b << 15) | (b >> 17);
output[0] = a + INIT_A;
output[1] = b + INIT_B;
output[2] = c + INIT_C;
output[3] = d + INIT_D;
}
//This include the unicode conversion and the padding
__device__ void prepare_key(char* key, int length, int* nt_buffer)
{
int i=0;
memset(nt_buffer,0,16*4);
//The length of key need to be <= 27
for(;i<length/2;i++)
nt_buffer[i] = key[2*i] | (key[2*i+1]<<16);
//padding
if(length%2==1)
nt_buffer[i] = key[length-1] | 0x800000;
else
nt_buffer[i]=0x80;
//put the length
nt_buffer[14] = length << 4;
}
//This convert the output to hexadecimal form
__device__ void convert_hex(int* output, char* hex_format)
{
char itoa16[17] = "0123456789abcdef";
int i=0;
//Iterate the integer
for(;i<4;i++)
{
int j=0;
unsigned int n=output[i];
//iterate the bytes of the integer
for(;j<4;j++)
{
unsigned int convert=n%256;
hex_format[i*8+j*2+1]=itoa16[convert%16];
convert=convert/16;
hex_format[i*8+j*2+0]=itoa16[convert%16];
n=n/256;
}
}
//null terminate the string
hex_format[33]=0;
}
__device__ int adistrcmp (const char * src, const char * dst)
{
int ret = 0;
while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
++src, ++dst;
if ( ret < 0 )
ret = -1;
else if ( ret > 0 )
ret = 1;
return( ret );
}
void gotolower(char *str){
int i=0;
while (str[i]){
// Hier kann ich char = char, weil ich mich auf eine Referenz (*) beziehe.
str[i] = tolower(str[i]);
i++;
}
}
__global__ void bruteforce(char* g_charset, char* g_hash_hex, unsigned int* g_counter, char* g_pstorer, bool* g_found, int maxPwLength){
unsigned int* nt_buffer[16]; //memory zuordnen und von global ¸bergeben
unsigned int* output[4]; //memory zuordnen und von global ¸bergeben
char* hex_format[33];
*g_counter = *g_counter+2;
int position = 0;
unsigned int decounter;
decounter = blockIdx.x * blockDim.x + threadIdx.x;
//storer reinigen
for (int i = 0; i < maxPwLength; i++) {
g_pstorer[i] = '
#include <string.h> //espinosa
#include <windows.h>
#include <stdio.h> //standard IO
#include <math.h> //f¸r pow
#include <conio.h> // f¸r _getch()
#include <ctype.h> // f¸r toupper() und tolower()
#include “I:\Program Files\NVIDIA Corporation\NVIDIA CUDA SDK\common\inc\cutil.h” //cutil funtions
#include <time.h> //f¸r clock
//This is the MD4 compress function
device void ntlm_crypt(int* nt_buffer, int* output)
{
unsigned int INIT_A = 0x67452301;
unsigned int INIT_B = 0xefcdab89;
unsigned int INIT_C = 0x98badcfe;
unsigned int INIT_D = 0x10325476;
unsigned int a = INIT_A;
unsigned int b = INIT_B;
unsigned int c = INIT_C;
unsigned int d = INIT_D;
unsigned int SQRT_2 = 0x5a827999;
unsigned int SQRT_3 = 0x6ed9eba1;
/* Round 1 */
a += (d ^ (b & (c ^ d))) + nt_buffer[0] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[1] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[2] ;c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[3] ;b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[4] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[5] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[6] ;c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[7] ;b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[8] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[9] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[10];c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[11];b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[12];a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[13];d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[14];c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[15];b = (b << 19) | (b >> 13);
/* Round 2 */
a += ((b & (c | d)) | (c & d)) + nt_buffer[0] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[4] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[8] +SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[12]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[1] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[5] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[9] +SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[13]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[2] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[6] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[10]+SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[14]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[3] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[7] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[11]+SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[15]+SQRT_2; b = (b<<13) | (b>>19);
/* Round 3 */
a += (d ^ c ^ b) + nt_buffer[0] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[8] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[4] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[12] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[2] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[10] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[6] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[14] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[1] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[9] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[5] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[13] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[3] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[11] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[7] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[15] + SQRT_3; b = (b << 15) | (b >> 17);
output[0] = a + INIT_A;
output[1] = b + INIT_B;
output[2] = c + INIT_C;
output[3] = d + INIT_D;
}
//This include the unicode conversion and the padding
device void prepare_key(char* key, int length, int* nt_buffer)
{
int i=0;
memset(nt_buffer,0,16*4);
//The length of key need to be <= 27
for(;i<length/2;i++)
nt_buffer[i] = key[2i] | (key[2i+1]<<16);
//padding
if(length%2==1)
nt_buffer[i] = key[length-1] | 0x800000;
else
nt_buffer[i]=0x80;
//put the length
nt_buffer[14] = length << 4;
}
//This convert the output to hexadecimal form
device void convert_hex(int* output, char* hex_format)
{
char itoa16[17] = "0123456789abcdef";
int i=0;
//Iterate the integer
for(;i<4;i++)
{
int j=0;
unsigned int n=output[i];
//iterate the bytes of the integer
for(;j<4;j++)
{
unsigned int convert=n%256;
hex_format[i*8+j*2+1]=itoa16[convert%16];
convert=convert/16;
hex_format[i*8+j*2+0]=itoa16[convert%16];
n=n/256;
}
}
//null terminate the string
hex_format[33]=0;
}
device int adistrcmp (const char * src, const char * dst)
{
int ret = 0;
while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
++src, ++dst;
if ( ret < 0 )
ret = -1;
else if ( ret > 0 )
ret = 1;
return( ret );
}
void gotolower(char *str){
int i=0;
while (str[i]){
// Hier kann ich char = char, weil ich mich auf eine Referenz (*) beziehe.
str[i] = tolower(str[i]);
i++;
}
}
global void bruteforce(char* g_charset, char* g_hash_hex, unsigned int* g_counter, char* g_pstorer, bool* g_found, int maxPwLength){
unsigned int* nt_buffer[16]; //memory zuordnen und von global ¸bergeben
unsigned int* output[4]; //memory zuordnen und von global ¸bergeben
char* hex_format[33];
*g_counter = *g_counter+2;
int position = 0;
unsigned int decounter;
decounter = blockIdx.x * blockDim.x + threadIdx.x;
//storer reinigen
for (int i = 0; i < maxPwLength; i++) {
g_pstorer[i] = ’
#include <string.h> //espinosa
#include <windows.h>
#include <stdio.h> //standard IO
#include <math.h> //f¸r pow
#include <conio.h> // f¸r _getch()
#include <ctype.h> // f¸r toupper() und tolower()
#include "I:\Program Files\NVIDIA Corporation\NVIDIA CUDA SDK\common\inc\cutil.h" //cutil funtions
#include <time.h> //f¸r clock
//This is the MD4 compress function
__device__ void ntlm_crypt(int* nt_buffer, int* output)
{
unsigned int INIT_A = 0x67452301;
unsigned int INIT_B = 0xefcdab89;
unsigned int INIT_C = 0x98badcfe;
unsigned int INIT_D = 0x10325476;
unsigned int a = INIT_A;
unsigned int b = INIT_B;
unsigned int c = INIT_C;
unsigned int d = INIT_D;
unsigned int SQRT_2 = 0x5a827999;
unsigned int SQRT_3 = 0x6ed9eba1;
/* Round 1 */
a += (d ^ (b & (c ^ d))) + nt_buffer[0] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[1] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[2] ;c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[3] ;b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[4] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[5] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[6] ;c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[7] ;b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[8] ;a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[9] ;d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[10];c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[11];b = (b << 19) | (b >> 13);
a += (d ^ (b & (c ^ d))) + nt_buffer[12];a = (a << 3 ) | (a >> 29);
d += (c ^ (a & (b ^ c))) + nt_buffer[13];d = (d << 7 ) | (d >> 25);
c += (b ^ (d & (a ^ b))) + nt_buffer[14];c = (c << 11) | (c >> 21);
b += (a ^ (c & (d ^ a))) + nt_buffer[15];b = (b << 19) | (b >> 13);
/* Round 2 */
a += ((b & (c | d)) | (c & d)) + nt_buffer[0] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[4] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[8] +SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[12]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[1] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[5] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[9] +SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[13]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[2] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[6] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[10]+SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[14]+SQRT_2; b = (b<<13) | (b>>19);
a += ((b & (c | d)) | (c & d)) + nt_buffer[3] +SQRT_2; a = (a<<3 ) | (a>>29);
d += ((a & (b | c)) | (b & c)) + nt_buffer[7] +SQRT_2; d = (d<<5 ) | (d>>27);
c += ((d & (a | b)) | (a & b)) + nt_buffer[11]+SQRT_2; c = (c<<9 ) | (c>>23);
b += ((c & (d | a)) | (d & a)) + nt_buffer[15]+SQRT_2; b = (b<<13) | (b>>19);
/* Round 3 */
a += (d ^ c ^ b) + nt_buffer[0] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[8] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[4] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[12] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[2] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[10] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[6] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[14] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[1] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[9] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[5] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[13] + SQRT_3; b = (b << 15) | (b >> 17);
a += (d ^ c ^ b) + nt_buffer[3] + SQRT_3; a = (a << 3 ) | (a >> 29);
d += (c ^ b ^ a) + nt_buffer[11] + SQRT_3; d = (d << 9 ) | (d >> 23);
c += (b ^ a ^ d) + nt_buffer[7] + SQRT_3; c = (c << 11) | (c >> 21);
b += (a ^ d ^ c) + nt_buffer[15] + SQRT_3; b = (b << 15) | (b >> 17);
output[0] = a + INIT_A;
output[1] = b + INIT_B;
output[2] = c + INIT_C;
output[3] = d + INIT_D;
}
//This include the unicode conversion and the padding
__device__ void prepare_key(char* key, int length, int* nt_buffer)
{
int i=0;
memset(nt_buffer,0,16*4);
//The length of key need to be <= 27
for(;i<length/2;i++)
nt_buffer[i] = key[2*i] | (key[2*i+1]<<16);
//padding
if(length%2==1)
nt_buffer[i] = key[length-1] | 0x800000;
else
nt_buffer[i]=0x80;
//put the length
nt_buffer[14] = length << 4;
}
//This convert the output to hexadecimal form
__device__ void convert_hex(int* output, char* hex_format)
{
char itoa16[17] = "0123456789abcdef";
int i=0;
//Iterate the integer
for(;i<4;i++)
{
int j=0;
unsigned int n=output[i];
//iterate the bytes of the integer
for(;j<4;j++)
{
unsigned int convert=n%256;
hex_format[i*8+j*2+1]=itoa16[convert%16];
convert=convert/16;
hex_format[i*8+j*2+0]=itoa16[convert%16];
n=n/256;
}
}
//null terminate the string
hex_format[33]=0;
}
__device__ int adistrcmp (const char * src, const char * dst)
{
int ret = 0;
while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
++src, ++dst;
if ( ret < 0 )
ret = -1;
else if ( ret > 0 )
ret = 1;
return( ret );
}
void gotolower(char *str){
int i=0;
while (str[i]){
// Hier kann ich char = char, weil ich mich auf eine Referenz (*) beziehe.
str[i] = tolower(str[i]);
i++;
}
}
__global__ void bruteforce(char* g_charset, char* g_hash_hex, unsigned int* g_counter, char* g_pstorer, bool* g_found, int maxPwLength){
unsigned int* nt_buffer[16]; //memory zuordnen und von global ¸bergeben
unsigned int* output[4]; //memory zuordnen und von global ¸bergeben
char* hex_format[33];
*g_counter = *g_counter+2;
int position = 0;
unsigned int decounter;
decounter = blockIdx.x * blockDim.x + threadIdx.x;
//storer reinigen
for (int i = 0; i < maxPwLength; i++) {
g_pstorer[i] = '\0';
}
do{
g_pstorer[position] = g_charset[decounter%((int)(sizeof(g_charset)/sizeof(char)))];
decounter = decounter/((int)(sizeof(g_charset)/sizeof(char)));
position++;
} while (decounter != 0);
prepare_key(g_pstorer, position, (int*)nt_buffer);
__syncthreads();
ntlm_crypt((int*)nt_buffer, (int*)output);
__syncthreads();
convert_hex((int*)output, (char*)hex_format);
__syncthreads();
//Vergleich machen...
if (adistrcmp((char*)hex_format,g_hash_hex) == 0){
*g_found = true;
}
__syncthreads();
}
void print_device_information(){
int deviceCount;
CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0)
printf("There is no device supporting CUDA\n");
int dev;
for (dev = 0; dev < deviceCount; ++dev) {
cudaDeviceProp deviceProp;
CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev));
if (dev == 0) {
if (deviceProp.major == 9999 && deviceProp.minor == 9999)
printf("There is no device supporting CUDA.\n");
else if (deviceCount == 1)
printf("There is 1 device supporting CUDA\n");
else
printf("There are %d devices supporting CUDA\n", deviceCount);
}
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
printf(" Major revision number: %d\n",
deviceProp.major);
printf(" Minor revision number: %d\n",
deviceProp.minor);
printf(" Total amount of global memory: %u bytes\n",
deviceProp.totalGlobalMem);
#if CUDART_VERSION >= 2000
printf(" Number of multiprocessors: %d\n",
deviceProp.multiProcessorCount);
printf(" Number of cores: %d\n",
8 * deviceProp.multiProcessorCount);
#endif
printf(" Total amount of constant memory: %u bytes\n",
deviceProp.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n",
deviceProp.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n",
deviceProp.regsPerBlock);
printf(" Warp size: %d\n",
deviceProp.warpSize);
printf(" Maximum number of threads per block: %d\n",
deviceProp.maxThreadsPerBlock);
printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Maximum memory pitch: %u bytes\n",
deviceProp.memPitch);
printf(" Texture alignment: %u bytes\n",
deviceProp.textureAlignment);
printf(" Clock rate: %.2f GHz\n",
deviceProp.clockRate * 1e-6f);
#if CUDART_VERSION >= 2000
printf(" Concurrent copy and execution: %s\n",
deviceProp.deviceOverlap ? "Yes" : "No");
#endif
}
}
int main(int argc, char* argv[])
{
const int maxPwLength = 10;
unsigned int start = 0;
unsigned int ende = 1000;
char charset[63] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char hash_hex[33];
unsigned int counter;
char pstorer[maxPwLength];
bool found = false;
//nur main
char switcher[100];
int pwLength;
// -> header
printf(" \n");
printf(" ################################################################# \n");
printf(" # # \n");
printf(" # NTLM Brutforce Attack by Adrian Schwalller # \n");
printf(" # Algorithm by Alain Espinosa # \n");
printf(" # # \n");
printf(" ################################################################# \n");
printf(" \n");
printf(" \n");
printf("Press any key to start the programm...\n");
// _getch() iest ein Zeichen (kˆnnte es auch zur¸ckgeben)
_getch();
//print device information
print_device_information();
printf("Please insert the HTML-Hash-Value:\n");
scanf ("%s",hash_hex);
gotolower(hash_hex);
printf("Would you like to insert a maxPwLength then insert 'pw'\n. Else you work with the counter.\n");
scanf ("%s",switcher);
if (strcmp(switcher, "pw") == 0){
printf("\nPlease insert the max passwort length:\n");
scanf ("%d",&pwLength);
start = 0;
ende = (int)pow((double)strlen(charset), pwLength);
}else{
printf("\nPlease initialise the counter:\n");
scanf ("%d",&start);
printf("\nPlease insert the maxCount for the counter:\n");
scanf ("%d",&ende);
pwLength = 1;
for(unsigned int i = ende; i > (unsigned int)strlen(charset); i = (unsigned int) ceil((double)i/62)){
pwLength++;
}
}
printf("PwLength: %d\n", pwLength);
printf("Start: %d\n", start);
printf("Ende: %d\n", ende);
//Variablen verschieben
char* g_charset;
char* g_hash_hex;
unsigned int* g_counter;
char* g_pstorer;
bool* g_found;
CUDA_SAFE_CALL(cudaMalloc((void**) &g_charset, sizeof(char)*strlen(charset)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_hash_hex, sizeof(char)*strlen(hash_hex)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_counter, sizeof(int)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_pstorer, sizeof(char)*maxPwLength));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_found, sizeof(bool)));
printf("I'am here0");
CUDA_SAFE_CALL(cudaMemcpy(g_charset, charset, sizeof(char)*strlen(charset), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(g_hash_hex, hash_hex, sizeof(char)*strlen(hash_hex), cudaMemcpyHostToDevice));
//CUDA_SAFE_CALL(cudaMemcpy(g_counter, counter, sizeof(int), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(g_pstorer, pstorer, sizeof(char)*maxPwLength, cudaMemcpyHostToDevice));
//CUDA_SAFE_CALL(cudaMemcpy(g_found, found, sizeof(bool), cudaMemcpyHostToDevice));
int difference = ende - start;
//int words_per_thread = 10;
const int threads_per_block = 512;
int threads = 512;
int blocks = 65535; //65535*512=33553920
dim3 grid(blocks,1);
dim3 threadblock(threads,1,1);
printf("I'am here1");
clock_t t1,t2;
t1=clock();
bruteforce<<< grid, threadblock >>>(g_charset, g_hash_hex, g_counter, g_pstorer, g_found, maxPwLength);
t2=clock();
double time=((float)(t2-t1)/CLOCKS_PER_SEC);
printf("I'am here2");
CUDA_SAFE_CALL(cudaMemcpy(&counter, g_counter, sizeof(int), cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaMemcpy(pstorer, g_pstorer, maxPwLength, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaMemcpy(&found, g_found, sizeof(bool), cudaMemcpyDeviceToHost));
if(found){
printf("\nThe Password is: %s\n", pstorer);
}else{
printf("\nNo Password found in the list.\n");
}
printf("\n\n + Done. \n\n");
printf("%d words checked\n\n", counter);
printf("Rechenzeit: %f\n\n", time);
getchar(); //equivalent zu _getch()
return 0;
}
';
}
do{
g_pstorer[position] = g_charset[decounter%((int)(sizeof(g_charset)/sizeof(char)))];
decounter = decounter/((int)(sizeof(g_charset)/sizeof(char)));
position++;
} while (decounter != 0);
prepare_key(g_pstorer, position, (int*)nt_buffer);
__syncthreads();
ntlm_crypt((int*)nt_buffer, (int*)output);
__syncthreads();
convert_hex((int*)output, (char*)hex_format);
__syncthreads();
//Vergleich machen…
if (adistrcmp((char*)hex_format,g_hash_hex) == 0){
*g_found = true;
}
__syncthreads();
}
void print_device_information(){
int deviceCount;
CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0)
printf(“There is no device supporting CUDA\n”);
int dev;
for (dev = 0; dev < deviceCount; ++dev) {
cudaDeviceProp deviceProp;
CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev));
if (dev == 0) {
if (deviceProp.major == 9999 && deviceProp.minor == 9999)
printf(“There is no device supporting CUDA.\n”);
else if (deviceCount == 1)
printf(“There is 1 device supporting CUDA\n”);
else
printf(“There are %d devices supporting CUDA\n”, deviceCount);
}
printf(“\nDevice %d: "%s"\n”, dev, deviceProp.name);
printf(" Major revision number: %d\n",
deviceProp.major);
printf(" Minor revision number: %d\n",
deviceProp.minor);
printf(" Total amount of global memory: %u bytes\n",
deviceProp.totalGlobalMem);
#if CUDART_VERSION >= 2000
printf(" Number of multiprocessors: %d\n",
deviceProp.multiProcessorCount);
printf(" Number of cores: %d\n",
8 * deviceProp.multiProcessorCount);
printf(" Total amount of constant memory: %u bytes\n",
deviceProp.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n",
deviceProp.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n",
deviceProp.regsPerBlock);
printf(" Warp size: %d\n",
deviceProp.warpSize);
printf(" Maximum number of threads per block: %d\n",
deviceProp.maxThreadsPerBlock);
printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Maximum memory pitch: %u bytes\n",
deviceProp.memPitch);
printf(" Texture alignment: %u bytes\n",
deviceProp.textureAlignment);
printf(" Clock rate: %.2f GHz\n",
deviceProp.clockRate * 1e-6f);
#if CUDART_VERSION >= 2000
printf(" Concurrent copy and execution: %s\n",
deviceProp.deviceOverlap ? “Yes” : “No”);
}
}
int main(int argc, char* argv)
{
const int maxPwLength = 10;
unsigned int start = 0;
unsigned int ende = 1000;
char charset[63] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char hash_hex[33];
unsigned int counter;
char pstorer[maxPwLength];
bool found = false;
//nur main
char switcher[100];
int pwLength;
// -> header
printf(" \n");
printf(" ################################################################# \n");
printf(" # # \n");
printf(" # NTLM Brutforce Attack by Adrian Schwalller # \n");
printf(" # Algorithm by Alain Espinosa # \n");
printf(" # # \n");
printf(" ################################################################# \n");
printf(" \n");
printf(" \n");
printf("Press any key to start the programm...\n");
// _getch() iest ein Zeichen (kˆnnte es auch zur¸ckgeben)
_getch();
//print device information
print_device_information();
printf("Please insert the HTML-Hash-Value:\n");
scanf ("%s",hash_hex);
gotolower(hash_hex);
printf("Would you like to insert a maxPwLength then insert 'pw'\n. Else you work with the counter.\n");
scanf ("%s",switcher);
if (strcmp(switcher, "pw") == 0){
printf(“\nPlease insert the max passwort length:\n”);
scanf (“%d”,&pwLength);
start = 0;
ende = (int)pow((double)strlen(charset), pwLength);
}else{
printf(“\nPlease initialise the counter:\n”);
scanf (“%d”,&start);
printf(“\nPlease insert the maxCount for the counter:\n”);
scanf (“%d”,&ende);
pwLength = 1;
for(unsigned int i = ende; i > (unsigned int)strlen(charset); i = (unsigned int) ceil((double)i/62)){
pwLength++;
}
}
printf("PwLength: %d\n", pwLength);
printf("Start: %d\n", start);
printf("Ende: %d\n", ende);
//Variablen verschieben
char* g_charset;
char* g_hash_hex;
unsigned int* g_counter;
char* g_pstorer;
bool* g_found;
CUDA_SAFE_CALL(cudaMalloc((void**) &g_charset, sizeof(char)*strlen(charset)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_hash_hex, sizeof(char)*strlen(hash_hex)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_counter, sizeof(int)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_pstorer, sizeof(char)*maxPwLength));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_found, sizeof(bool)));
printf("I'am here0");
CUDA_SAFE_CALL(cudaMemcpy(g_charset, charset, sizeof(char)*strlen(charset), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(g_hash_hex, hash_hex, sizeof(char)*strlen(hash_hex), cudaMemcpyHostToDevice));
//CUDA_SAFE_CALL(cudaMemcpy(g_counter, counter, sizeof(int), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(g_pstorer, pstorer, sizeof(char)*maxPwLength, cudaMemcpyHostToDevice));
//CUDA_SAFE_CALL(cudaMemcpy(g_found, found, sizeof(bool), cudaMemcpyHostToDevice));
int difference = ende - start;
//int words_per_thread = 10;
const int threads_per_block = 512;
int threads = 512;
int blocks = 65535; //65535*512=33553920
dim3 grid(blocks,1);
dim3 threadblock(threads,1,1);
printf("I'am here1");
clock_t t1,t2;
t1=clock();
bruteforce<<< grid, threadblock >>>(g_charset, g_hash_hex, g_counter, g_pstorer, g_found, maxPwLength);
t2=clock();
double time=((float)(t2-t1)/CLOCKS_PER_SEC);
printf("I'am here2");
CUDA_SAFE_CALL(cudaMemcpy(&counter, g_counter, sizeof(int), cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaMemcpy(pstorer, g_pstorer, maxPwLength, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaMemcpy(&found, g_found, sizeof(bool), cudaMemcpyDeviceToHost));
if(found){
printf(“\nThe Password is: %s\n”, pstorer);
}else{
printf(“\nNo Password found in the list.\n”);
}
printf("\n\n + Done. \n\n");
printf("%d words checked\n\n", counter);
printf("Rechenzeit: %f\n\n", time);
getchar(); //equivalent zu _getch()
return 0;
}
';
}
do{
g_pstorer[position] = g_charset[decounter%((int)(sizeof(g_charset)/sizeof(char)))];
decounter = decounter/((int)(sizeof(g_charset)/sizeof(char)));
position++;
} while (decounter != 0);
prepare_key(g_pstorer, position, (int*)nt_buffer);
__syncthreads();
ntlm_crypt((int*)nt_buffer, (int*)output);
__syncthreads();
convert_hex((int*)output, (char*)hex_format);
__syncthreads();
//Vergleich machen...
if (adistrcmp((char*)hex_format,g_hash_hex) == 0){
*g_found = true;
}
__syncthreads();
}
void print_device_information(){
int deviceCount;
CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0)
printf("There is no device supporting CUDA\n");
int dev;
for (dev = 0; dev < deviceCount; ++dev) {
cudaDeviceProp deviceProp;
CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev));
if (dev == 0) {
if (deviceProp.major == 9999 && deviceProp.minor == 9999)
printf("There is no device supporting CUDA.\n");
else if (deviceCount == 1)
printf("There is 1 device supporting CUDA\n");
else
printf("There are %d devices supporting CUDA\n", deviceCount);
}
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
printf(" Major revision number: %d\n",
deviceProp.major);
printf(" Minor revision number: %d\n",
deviceProp.minor);
printf(" Total amount of global memory: %u bytes\n",
deviceProp.totalGlobalMem);
#if CUDART_VERSION >= 2000
printf(" Number of multiprocessors: %d\n",
deviceProp.multiProcessorCount);
printf(" Number of cores: %d\n",
8 * deviceProp.multiProcessorCount);
#endif
printf(" Total amount of constant memory: %u bytes\n",
deviceProp.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n",
deviceProp.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n",
deviceProp.regsPerBlock);
printf(" Warp size: %d\n",
deviceProp.warpSize);
printf(" Maximum number of threads per block: %d\n",
deviceProp.maxThreadsPerBlock);
printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Maximum memory pitch: %u bytes\n",
deviceProp.memPitch);
printf(" Texture alignment: %u bytes\n",
deviceProp.textureAlignment);
printf(" Clock rate: %.2f GHz\n",
deviceProp.clockRate * 1e-6f);
#if CUDART_VERSION >= 2000
printf(" Concurrent copy and execution: %s\n",
deviceProp.deviceOverlap ? "Yes" : "No");
#endif
}
}
int main(int argc, char* argv[])
{
const int maxPwLength = 10;
unsigned int start = 0;
unsigned int ende = 1000;
char charset[63] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char hash_hex[33];
unsigned int counter;
char pstorer[maxPwLength];
bool found = false;
//nur main
char switcher[100];
int pwLength;
// -> header
printf(" \n");
printf(" ################################################################# \n");
printf(" # # \n");
printf(" # NTLM Brutforce Attack by Adrian Schwalller # \n");
printf(" # Algorithm by Alain Espinosa # \n");
printf(" # # \n");
printf(" ################################################################# \n");
printf(" \n");
printf(" \n");
printf("Press any key to start the programm...\n");
// _getch() iest ein Zeichen (kˆnnte es auch zur¸ckgeben)
_getch();
//print device information
print_device_information();
printf("Please insert the HTML-Hash-Value:\n");
scanf ("%s",hash_hex);
gotolower(hash_hex);
printf("Would you like to insert a maxPwLength then insert 'pw'\n. Else you work with the counter.\n");
scanf ("%s",switcher);
if (strcmp(switcher, "pw") == 0){
printf("\nPlease insert the max passwort length:\n");
scanf ("%d",&pwLength);
start = 0;
ende = (int)pow((double)strlen(charset), pwLength);
}else{
printf("\nPlease initialise the counter:\n");
scanf ("%d",&start);
printf("\nPlease insert the maxCount for the counter:\n");
scanf ("%d",&ende);
pwLength = 1;
for(unsigned int i = ende; i > (unsigned int)strlen(charset); i = (unsigned int) ceil((double)i/62)){
pwLength++;
}
}
printf("PwLength: %d\n", pwLength);
printf("Start: %d\n", start);
printf("Ende: %d\n", ende);
//Variablen verschieben
char* g_charset;
char* g_hash_hex;
unsigned int* g_counter;
char* g_pstorer;
bool* g_found;
CUDA_SAFE_CALL(cudaMalloc((void**) &g_charset, sizeof(char)*strlen(charset)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_hash_hex, sizeof(char)*strlen(hash_hex)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_counter, sizeof(int)));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_pstorer, sizeof(char)*maxPwLength));
CUDA_SAFE_CALL(cudaMalloc((void**) &g_found, sizeof(bool)));
printf("I'am here0");
CUDA_SAFE_CALL(cudaMemcpy(g_charset, charset, sizeof(char)*strlen(charset), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(g_hash_hex, hash_hex, sizeof(char)*strlen(hash_hex), cudaMemcpyHostToDevice));
//CUDA_SAFE_CALL(cudaMemcpy(g_counter, counter, sizeof(int), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(g_pstorer, pstorer, sizeof(char)*maxPwLength, cudaMemcpyHostToDevice));
//CUDA_SAFE_CALL(cudaMemcpy(g_found, found, sizeof(bool), cudaMemcpyHostToDevice));
int difference = ende - start;
//int words_per_thread = 10;
const int threads_per_block = 512;
int threads = 512;
int blocks = 65535; //65535*512=33553920
dim3 grid(blocks,1);
dim3 threadblock(threads,1,1);
printf("I'am here1");
clock_t t1,t2;
t1=clock();
bruteforce<<< grid, threadblock >>>(g_charset, g_hash_hex, g_counter, g_pstorer, g_found, maxPwLength);
t2=clock();
double time=((float)(t2-t1)/CLOCKS_PER_SEC);
printf("I'am here2");
CUDA_SAFE_CALL(cudaMemcpy(&counter, g_counter, sizeof(int), cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaMemcpy(pstorer, g_pstorer, maxPwLength, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL(cudaMemcpy(&found, g_found, sizeof(bool), cudaMemcpyDeviceToHost));
if(found){
printf("\nThe Password is: %s\n", pstorer);
}else{
printf("\nNo Password found in the list.\n");
}
printf("\n\n + Done. \n\n");
printf("%d words checked\n\n", counter);
printf("Rechenzeit: %f\n\n", time);
getchar(); //equivalent zu _getch()
return 0;
}