I want to do some basic math involving 3x3 matrices and 3x1 vectors.
For my specific application, I need the following:
- outer product of a vector3 with itself to create a matrix3x3
- matrix3x3 inverse
- ability to multiply a 3x1 vector by a 3x3 matrix
- dot product with two 3x1 vectors
In HLSL there were basic matrix types available to do this. Are there some equivalents in CUDA, or is it recommended that I write my own? If so, can anyone suggest the best way to layout the struct for alignment purposes?
Alright, so I’m assuming I have to write it myself. Here’s what I’ve got:
struct float3x3
{
float m00, m01, m02,
m10, m11, m12,
m20, m21, m22;
};
__device__ float3x3 outer_product( float3 v )
{
float3x3 m;
m.m00 = v.x*v.x;
m.m10 = m.m01 = v.x*v.y;
m.m20 = m.m02 = v.x*v.z;
m.m11 = v.y*v.y;
m.m21 = m.m12 = v.y*v.z;
m.m22 = v.z*v.z;
return m;
}
__device__ float3 transform( float3x3 &m, float3 &v )
{
float3 res;
res.x = m.m00*v.x + m.m01*v.y + m.m02*v.z;
res.y = m.m10*v.x + m.m11*v.y + m.m12*v.z;
res.z = m.m20*v.x + m.m21*v.y + m.m22*v.z;
return res;
}
__device__ float dot( float3 &a, float3 &b)
{
return a.x*b.x + a.y*b.y + a.z*b.z;
}
bool inverse( float3x3 *in, float3x3 *out, float *det )
{
float A0 = in->m11*in->m22,
A1 = in->m10*in->m22,
A2 = in->m10*in->m21,
B0 = in->m12*in->m21,
B1 = in->m12*in->m20,
B2 = in->m11*in->m20;
float K0 = A0 - B0,
K1 = A1 - B1,
K2 = A2 - B2;
float d = in->m00*K0 - in->m01*K1 + in->m02*K2;
*det = d;
if (d != 0){
float detInv = 1/d;
out->m00 = K0*detInv;
out->m10 = -K1*detInv;
out->m20 = K2*detInv;
out->m01 = (in->m02*in->m21 - in->m01*in->m22)*detInv;
out->m11 = (in->m00*in->m22 - in->m02*in->m20)*detInv;
out->m21 = (in->m01*in->m20 - in->m00*in->m21)*detInv;
out->m02 = (in->m01*in->m12 - in->m02*in->m11)*detInv;
out->m12 = (in->m02*in->m10 - in->m00*in->m12)*detInv;
out->m22 = (in->m00*in->m11 - in->m01*in->m10)*detInv;
return true;
}
return false;
}
I’m still having some issues getting operator overloading working. It seems this used to be provided in the cutil_math.h file but is no longer provided…or is that renamed? Tried making these, but they don’t work:
__device__ float3 operator*( float3 v, float s )
{
float3 res = { v.x*s, v.y*s, v.z*s };
return res;
}
__device__ void operator+=( float3 &a, float3 b )
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
}
Again, if there’s an existing solution, please let me know as I feel like I’m reinventing the wheel here.