I’m not really sure if this is a CUDA Problem but I strictly followed this guide and the result isn’t what I expect. There is only one message block because it will be a bruteforcer sooner or later -.- I compared the results with a working sha-function but it is wrong.
I’m pretty sure the memInit-function does the right thing. With the input ‘aaaaaa’ it precomputes:
61616161 61618000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000030
Here is the code. Maybe it is a problem with some type length, endians or something.
#include "sha_function.h"
/* f1 to f4 */
#define f1(x,y,z) ( ( x & y ) | ( ~x & z ) )
#define f2(x,y,z) ( x ^ y ^ z )
#define f3(x,y,z) ( ( x & y ) | ( x & z ) | ( y & z ) )
#define f4(x,y,z) ( x ^ y ^ z )
/* SHA init values */
#define I1 0x67452301L
#define I2 0xEFCDAB89L
#define I3 0x98BADCFEL
#define I4 0x10325476L
#define I5 0xC3D2E1F0L
/* SHA constants */
#define C1 0x5a827999L
#define C2 0x6ed9eba1L
#define C3 0x8f1bbcdcL
#define C4 0xca62c1d6L
/* 32-bit rotate */
#define ROT(x,n) ( ( x << n ) | ( x >> ( 32 - n ) ) )
/* main function */
#define CALC(n,i) temp = ROT ( A , 5 ) + f##n( B , C, D ) + W[i] + E + C##n ; E = D; D = C; C = ROT ( B , 30 ); B = A; A = temp
int main()
{
unsigned long * hash = (unsigned long *) malloc(4);
int length = 6;
start(hash, length);
return 1;
}
void start(unsigned long * hash, int length)
{
unsigned long * buffer = 0;
unsigned long * res = 0;
unsigned char * input;
res = (unsigned long*) malloc(5 * sizeof(unsigned long));
cudaMalloc((void** ) &buffer, 5 * sizeof(unsigned long));
cudaMalloc((void** ) &input, length * sizeof(unsigned char));
unsigned char input_tmp[length];
input_tmp[0] = 'a';
input_tmp[1] = 'a';
input_tmp[2] = 'a';
input_tmp[3] = 'a';
input_tmp[4] = 'a';
input_tmp[5] = 'a';
cudaMemcpy(input, input_tmp, length * sizeof(unsigned char), cudaMemcpyHostToDevice);
doSHA<<<1,1>>>(input, length, buffer);
cudaMemcpy(res, buffer, 5 * sizeof(unsigned long), cudaMemcpyDeviceToHost);
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
printf( "Cuda error: %s.\n", cudaGetErrorString( err) );
printf("%X\n", res[0]);
}
__global__ void doSHA(unsigned char * input, int length, unsigned long * buffer)
{
unsigned long W[80],A,B,C,D,E,temp;
memInit(W, input, length);
// buffer[0] = W[1];
for(int i = 16; i < 80; i++)
W[i] = ROT( ( W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16] ) , 1 );
A = I1; B = I2; C = I3; D = I4; E = I5;
int i = 0;
for (i = 0; i < 20; ++i)
CALC(1,i);
for (i = 20; i < 40; ++i)
CALC(2,i);
for (i = 40; i < 60; ++i)
CALC(3,i);
for (i = 60; i < 80; ++i)
CALC(4,i);
buffer[0] = A + I1;
buffer[1] = B + I2;
buffer[2] = C + I3;
buffer[3] = D + I4;
buffer[4] = E + I5;
}
__device__ void memInit(unsigned long * tmp, unsigned char * input, int length)
{
int stop = 0;
// reseting tmp
for(int i = 0; i < 80; i++) tmp[i] = 0;
// fill tmp like: message char c0,c1,c2,...,cn,10000000,00...000
for(int i = 0; i < length; i+=4)
{
for(int j = 0; j < 4; j++)
if(i + j < length)
tmp[i/4] |= input[i+j] << (24-j * 8);
else
{
tmp[i/4] |= 0x80 << (24-j * 8); // Append 1 then zeros
stop = 1;
break;
}
if(stop)
break;
}
// Adding length as last value
tmp[15] |= length * 8;
}
Thank you for helping me, very appreciated!