hi
Is there any ‘more’ efficient float to half and half to float conversion than the code below ?
Any tips’n’tricks in this topic ?
Handling of NaN and +/-Inf is probably not required :)
__device__ unsigned short __float_as_half(float Value)
{
//
// Zero case
//
if (Value == 0.0f)
return 0;
//
// Generic case
//
int i = __float_as_int(Value);
int s = (i >> 16) & 0x00008000;
int e = ((i >> 23) & 0x000000ff) - (127 - 15);
int m = i & 0x007fffff;
if (e <= 0)
{
//
// Below half_min
//
if (e < -10)
return 0;
m = (m | 0x00800000) >> (1 - e);
if (m & 0x00001000)
m += 0x00002000;
return s | (m >> 13);
}
else
if (e == 0xff - (127 - 15))
{
if (m == 0)
{
return s | 0x7c00;
}
m >>= 13;
return s | 0x7c00 | m | (m == 0);
}
if (m & 0x00001000)
{
m += 0x00002000;
if (m & 0x00800000)
{
m = 0;
e += 1;
}
}
if (e > 30)
return s | 0x7c00;
return s | (e << 10) | (m >> 13);
}
__device__ float __half_as_float(unsigned int Value)
{
//
// Zero case
//
if (Value == 0)
return 0.0f;
//
// Generic case
//
unsigned int Mantissa;
unsigned int Exponent;
unsigned int Result;
Mantissa = (Value & 0x03FF);
if ((Value & 0x7C00) != 0)
{
Exponent = ((Value >> 10) & 0x1F);
}
else
if (Mantissa != 0)
{
Exponent = 1;
do
{
Exponent--;
Mantissa <<= 1;
} while ((Mantissa & 0x0400) == 0);
Mantissa &= 0x03FF;
}
else
{
Exponent = (unsigned int)-112;
}
Result = ((Value & 0x8000) << 16) | // Sign
((Exponent + 112) << 23) | // Exponent
(Mantissa << 13); // Mantissa
return __int_as_float(Result);
}