__device__ void inverse3x3(float m1[3][3])
{
int is[3] = {0, 0, 0};
int js[3] = {0, 0, 0};
float fDet = 1.0f;
int f = 1;
int i, j, k;
float m[3][3] = {0,0,0, 0,0,0, 0,0,0};
for (int i=0; i<9; ++i)
((float*)m)[i]=((float*)m1)[i];
for (k = 0; k < 3; k ++)
{
float fMax = 0.0f;
for (i = k; i < 3; i ++)
{
for (j = k; j < 3; j ++)
{
const float f = fabs(m[i][j]);
if (f > fMax)
{
fMax = f;
is[k] = i;
js[k] = j;
}
}
}
if (fabs(fMax) < 0.0001f) //@@@1
return;
if (is[k] != k) //@@@2
{
}
}
}
__global__ void test(float mx[3][3]) {
inverse3x3(mx);
}
And it did compile without a problem. I think you need to publish a bit more code so that the error can be reproduced. And please make it self-contained, no calls to assign_ which is not defined…
here it is my program, I’ve comment out everything else.
and the problem turns to be between the @@@3:mult_… & @@@4:inverse3x3 . when I commented out either line, the compiler said ok, but they seem to hate each other.
//matrix multiply, (m x t) * (t x n) ==> (m x n)
__device__ void mult_ (float *r, const float*x, const float*y, int m, int t, int n)
{
int i, j, k;
for(i = 0; i < m; i ++)
{
for(j = 0; j < n; j ++)
{
r[i * n + j] = 0;
for(k = 0; k < t; k ++)
r[i * n + j] += x[i * t + k] * y[j + k * t];
}
}
}
//matrix inverse 3 x 3 version
__device__ void inverse3x3(float m1[3][3])
{
int is[3] = {0, 0, 0};
int js[3] = {0, 0, 0};
float fDet = 1.0f;
int f = 1;
int i, j, k;
float m[3][3] = {0,0,0, 0,0,0, 0,0,0};
for (k = 0; k < 3; k ++)
{
float fMax = 0.0f;
for (i = k; i < 3; i ++)
{
for (j = k; j < 3; j ++)
{
const float f = fabs(m[i][j]);
if (f > fMax)
{
fMax = f;
is[k] = i;
js[k] = j;
}
}
}
if (fabs(fMax) < 0.0001f)
return;
if (is[k] != k)
{
}
}
}
__device__ void LineCross3d(float pt[3], float l1[3], float l2[3], float l1pt[3], float l2pt[3])
{
float A[4][3] = {0,};
float At[3][4] = {0,};
float AtA[3][3] = {0,0,0, 0,0,0, 0,0,0};
float Li[4] = {0,};
float AtL[3] = {0,};
// transpose((float *)At, (const float *)A, 4, 3);
mult_((float *)AtA, (const float *)At, (const float *)A, 3, 4, 3); //@@@3
inverse3x3(AtA); //@@@4
}
__global__ void IBR_Kernel(uchar4 *output, int iImgNum, int width, int height)
{
unsigned int z = 0;
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if(x >= width || y >= height)
return;
float imgxy_[3] = {x, y, 1};
float li[3], lc[3], lipt[3], lcpt[3];
//Get the depth from cross point to DesView eye.
float cpt[3];
LineCross3d(cpt, li, lc, lipt, lcpt);
}
The only time I had the ‘unexpected mtype’ errors is when I was accessing out-of-bounds arrays in the kernel. You might want to recheck your code. The error message is totally bogus, and if you comment out some totally harmless code it’s gone, but still won’t work until you hunt down the error. At least, that was it for me.