Hey,guys
I tried to compile sha512 with cuda v3.1, but i met a very strange compiling error with int64.
just look at the following code.
typedef unsigned long long uint64_t;
typedef union
{
uint8_t b[64];
uint32_t d[16];
uint64_t ll[8];
} Byte64;
typedef union
{
uint8_t b[128];
uint32_t d[32];
uint64_t ll[16];
}Byte128;
typedef struct
{
Byte64 hash;
}SHA512_CTX;
device void SHA512_Init(SHA512_CTX *ctx)
{
ctx->hash.ll[0] = INIT_A0;
ctx->hash.ll[1] = INIT_A1;
ctx->hash.ll[2] = INIT_A2;
ctx->hash.ll[3] = INIT_A3;
ctx->hash.ll[4] = INIT_A4;
ctx->hash.ll[5] = INIT_A5;
ctx->hash.ll[6] = INIT_A6;
ctx->hash.ll[7] = INIT_A7;
}
device void SHA512_Block(SHA512_CTX *ctx, Byte128 *b128)
{
uint64_t a=ctx->hash.ll[0];
uint64_t b=ctx->hash.ll[1];
uint64_t c=ctx->hash.ll[2];
uint64_t d=ctx->hash.ll[3];
uint64_t e=ctx->hash.ll[4];
uint64_t f=ctx->hash.ll[5];
uint64_t g=ctx->hash.ll[6];
uint64_t h=ctx->hash.ll[7];
uint64_t t1=0,t2=0;
_RSHA512(a,b,c,d,e,f,g,h,0,b128->ll[0]);b128->ll[0]+=sigma1(b128->ll[14])+b128->ll[9]+sigma0(b128->ll[1]);
_RSHA512(h,a,b,c,d,e,f,g,1,b128->ll[1]);b128->ll[1]+=sigma1(b128->ll[15])+b128->ll[10]+sigma0(b128->ll[2]);
_RSHA512(g,h,a,b,c,d,e,f,2,b128->ll[2]);b128->ll[2]+=sigma1(b128->ll[0])+b128->ll[11]+sigma0(b128->ll[3]);
_RSHA512(f,g,h,a,b,c,d,e,3,b128->ll[3]);b128->ll[3]+=sigma1(b128->ll[1])+b128->ll[12]+sigma0(b128->ll[4]);
_RSHA512(e,f,g,h,a,b,c,d,4,b128->ll[4]);b128->ll[4]+=sigma1(b128->ll[2])+b128->ll[13]+sigma0(b128->ll[5]);
_RSHA512(d,e,f,g,h,a,b,c,5,b128->ll[5]);b128->ll[5]+=sigma1(b128->ll[3])+b128->ll[14]+sigma0(b128->ll[6]);
_RSHA512(c,d,e,f,g,h,a,b,6,b128->ll[6]);b128->ll[6]+=sigma1(b128->ll[4])+b128->ll[15]+sigma0(b128->ll[7]);
_RSHA512(b,c,d,e,f,g,h,a,7,b128->ll[7]);b128->ll[7]+=sigma1(b128->ll[5])+b128->ll[0]+sigma0(b128->ll[8]);
_RSHA512(a,b,c,d,e,f,g,h,8,b128->ll[8]);b128->ll[8]+=sigma1(b128->ll[6])+b128->ll[1]+sigma0(b128->ll[9]);
_RSHA512(h,a,b,c,d,e,f,g,9,b128->ll[9]);b128->ll[9]+=sigma1(b128->ll[7])+b128->ll[2]+sigma0(b128->ll[10]);
_RSHA512(g,h,a,b,c,d,e,f,10,b128->ll[10]);b128->ll[10]+=sigma1(b128->ll[8])+b128->ll[3]+sigma0(b128->ll[11]);
_RSHA512(f,g,h,a,b,c,d,e,11,b128->ll[11]);b128->ll[11]+=sigma1(b128->ll[9])+b128->ll[4]+sigma0(b128->ll[12]);
_RSHA512(e,f,g,h,a,b,c,d,12,b128->ll[12]);b128->ll[12]+=sigma1(b128->ll[10])+b128->ll[5]+sigma0(b128->ll[13]);
_RSHA512(d,e,f,g,h,a,b,c,13,b128->ll[13]);b128->ll[13]+=sigma1(b128->ll[11])+b128->ll[6]+sigma0(b128->ll[14]);
_RSHA512(c,d,e,f,g,h,a,b,14,b128->ll[14]);b128->ll[14]+=sigma1(b128->ll[12])+b128->ll[7]+sigma0(b128->ll[15]);
_RSHA512(b,c,d,e,f,g,h,a,15,b128->ll[15]);b128->ll[15]+=sigma1(b128->ll[13])+b128->ll[8]+sigma0(b128->ll[0]);
_RSHA512(a,b,c,d,e,f,g,h,64,b128->ll[0]);
_RSHA512(h,a,b,c,d,e,f,g,65,b128->ll[1]);
_RSHA512(g,h,a,b,c,d,e,f,66,b128->ll[2]);
_RSHA512(f,g,h,a,b,c,d,e,67,b128->ll[3]);
_RSHA512(e,f,g,h,a,b,c,d,68,b128->ll[4]);
_RSHA512(d,e,f,g,h,a,b,c,69,b128->ll[5]);
_RSHA512(c,d,e,f,g,h,a,b,70,b128->ll[6]);
_RSHA512(b,c,d,e,f,g,h,a,71,b128->ll[7]);
_RSHA512(a,b,c,d,e,f,g,h,72,b128->ll[8]);
_RSHA512(h,a,b,c,d,e,f,g,73,b128->ll[9]);
_RSHA512(g,h,a,b,c,d,e,f,74,b128->ll[10]);
_RSHA512(f,g,h,a,b,c,d,e,75,b128->ll[11]);
_RSHA512(e,f,g,h,a,b,c,d,76,b128->ll[12]);
_RSHA512(d,e,f,g,h,a,b,c,77,b128->ll[13]);
_RSHA512(c,d,e,f,g,h,a,b,78,b128->ll[14]);
_RSHA512(b,c,d,e,f,g,h,a,79,b128->ll[15]);
ctx->hash.ll[0]+=a;
ctx->hash.ll[1]+=b;
ctx->hash.ll[2]+=c;
ctx->hash.ll[3]+=d;
ctx->hash.ll[4]+=e;
ctx->hash.ll[5]+=f;
ctx->hash.ll[6]+=g;
ctx->hash.ll[7]+=h;
}
_RSHA512,sigma1 and sigma0 are sha512 MACROs;
code snippet:
SHA512_CTX i_Ctx;
Byte128 k_iPad;
SHA512_CTX o_Ctx;
Byte128 k_oPad;
SHA512_Init(&i_Ctx);
SHA512_Block(&i_Ctx,&k_iPad);// first call SHA512_Block, no error
SHA512_Init(&o_Ctx);
SHA512_Block(&o_Ctx,&k_oPad); //error occurs here
compile error:
1>Kernel.compute_10.cudafe1.gpu
1>Kernel.compute_10.cudafe2.gpu
1>### Assertion failure at line 1923 of …/…/be/cg/cgemit.cxx:
1>### Compiler Error in file Kernel.compute_20.cpp3.i during Assembly phase:
1>### incorrect register class for operand 0
1>nvopencc ERROR: C:\CUDA\bin/…/open64/lib//be.exe returned non-zero status 1
Does anybody know why it happened? It really trapped me.
thanks!