Hello,

I am trying to reduce the register usage of the following quaternion class. Especially I need to reduce the register usage of the rotateVector method. From what I have read in the forum, there is no deterministic rule of how the compiler assigns registers. I got stuck with the following code which I tried to optimize via trial and error. Hopefully you can give me some hints how I can further reduce the register usage.

Thanks for your help

Christoph

the Code:

[codebox]class cuQuat

{

public:

```
float4 m_fDat;
__device__ cuQuat(){}
__device__ cuQuat(const float4 &q)
{
m_fDat.x = q.x;
m_fDat.y = q.y;
m_fDat.z = q.z;
m_fDat.w = q.w;
}
__device__ cuQuat(const cuQuat &q)
{
m_fDat.x = q.m_fDat.x;
m_fDat.y = q.m_fDat.y;
m_fDat.z = q.m_fDat.z;
m_fDat.w = q.m_fDat.w;
}
__device__ void set(const volatile float &x, const volatile float &y, const volatile float &z)
{
volatile float tmp;
m_fDat.x = x;
m_fDat.y = y;
m_fDat.z = z;
tmp = 1.0f - x*x - y*y - z*z;
if(tmp > 0.000001)
m_fDat.w = sqrtf(tmp);
else
{
m_fDat.w = 0.0f;
tmp = x*x + y*y + z*z;
tmp = rsqrtf(tmp);
if(tmp > 0.000001)
{
m_fDat.x *= tmp;
m_fDat.y *= tmp;
m_fDat.z *= tmp;
}
}
}
// vector v3f has to be normalized, angle is in radian
__device__ void fromAxisAngle(const float3 &v3f, float fAngle)
{
fAngle *= 0.5f;
float sinA = __sinf(fAngle);
m_fDat.x = v3f.x * sinA;
m_fDat.y = v3f.y * sinA;
m_fDat.z = v3f.z * sinA;
m_fDat.w = __cosf(fAngle);
}
__device__ void rotateVector(const volatile float &x,
const volatile float &y,
const volatile float &z,
volatile float &xRet,
volatile float &yRet,
volatile float &zRet)
{
volatile float f1, f2, f3, f4;
f1 = m_fDat.x; f1 *= f1;
f2 = m_fDat.y; f2 *= f2;
f3 = m_fDat.z; f3 *= f3;
f4 = m_fDat.w; f4 *= f4;
xRet = (f4 + f1 - f2 - f3) * x;
yRet = (f4 - f1 + f2 - f3) * y;
zRet = (f4 - f1 - f2 + f3) * z;
f1 = m_fDat.x;
f2 = m_fDat.y;
f3 = m_fDat.z;
f4 = m_fDat.w;
f1 *= f2; f3 *= f4;
f2 = f1 - f3; f2 *= y; xRet += 2.0f * f2;
f2 = f1 + f3; f2 *= x; yRet += 2.0f * f2;
f1 = m_fDat.x;
f2 = m_fDat.y;
f3 = m_fDat.z;
f4 = m_fDat.w;
f1 *= f3; f2 *= f4;
f3 = f1 - f2; f3 *= x; zRet += 2.0f * f3;
f3 = f1 + f2; f3 *= z; xRet += 2.0f * f3;
f1 = m_fDat.x;
f2 = m_fDat.y;
f3 = m_fDat.z;
f4 = m_fDat.w;
f1 *= f4; f2 *= f3;
f3 = f2 - f1; f3 *= z; yRet += 2.0f * f3;
f3 = f2 + f1; f3 *= y; zRet += 2.0f * f3;
}
__device__ void conjugate(const cuQuat &q)
{
m_fDat.x = -q.m_fDat.x;
m_fDat.y = -q.m_fDat.y;
m_fDat.z = -q.m_fDat.z;
m_fDat.w = q.m_fDat.w;
}
__device__ void normalize()
{
float fNorm = rsqrtf(m_fDat.x*m_fDat.x + m_fDat.y*m_fDat.y +
m_fDat.z*m_fDat.z + m_fDat.w*m_fDat.w);
if(fNorm > 0.000001)
{
m_fDat.x *= fNorm;
m_fDat.y *= fNorm;
m_fDat.z *= fNorm;
m_fDat.w *= fNorm;
}
}
__device__ friend cuQuat operator*(cuQuat &q1, cuQuat &q2)
{
cuQuat res;
res.m_fDat.x = q1.m_fDat.x * q2.m_fDat.w;
res.m_fDat.x += q1.m_fDat.y * q2.m_fDat.z;
res.m_fDat.x -= q1.m_fDat.z * q2.m_fDat.y;
res.m_fDat.x += q1.m_fDat.w * q2.m_fDat.x;
res.m_fDat.y = q1.m_fDat.y * q2.m_fDat.w;
res.m_fDat.y -= q1.m_fDat.x * q2.m_fDat.z;
res.m_fDat.y += q1.m_fDat.z * q2.m_fDat.x;
res.m_fDat.y += q1.m_fDat.w * q2.m_fDat.y;
res.m_fDat.z = q1.m_fDat.x * q2.m_fDat.y;
res.m_fDat.z -= q1.m_fDat.y * q2.m_fDat.x;
res.m_fDat.z += q1.m_fDat.z * q2.m_fDat.w;
res.m_fDat.z += q1.m_fDat.w * q2.m_fDat.z;
res.m_fDat.w = q1.m_fDat.w * q2.m_fDat.w;
res.m_fDat.w -= q1.m_fDat.x * q2.m_fDat.x;
res.m_fDat.w -= q1.m_fDat.y * q2.m_fDat.y;
res.m_fDat.w -= q1.m_fDat.z * q2.m_fDat.z;
return res;
}
```

};

[/codebox]