help! unexpected mtype ; /...be.exe returned non-zero status 1

1>tmpxft_00000980_00000000-3_render.cudafe1.gpu

1>tmpxft_00000980_00000000-8_render.cudafe2.gpu

1>### Assertion failure at line 123 of …/…/be/cg/NVISA/expand.cxx:

1>### Compiler Error in file C:\Users\CGILab\AppData\Local\Temp/tmpxft_00000980_00000000-9_render.cpp3.i during Code_Expansion phase:

1>### unexpected mtype

1>nvopencc ERROR: D:\CUDA\bin/…/open64/lib//be.exe returned non-zero status 1

__device__ void inverse3x3(float m1[3][3]) 

{

	int is[3] = {0, 0, 0};

	int js[3] = {0, 0, 0};

	float fDet = 1.0f; 

	int f = 1; 

	int i, j, k;

	float m[3][3] = {0,0,0, 0,0,0, 0,0,0};

	assign_((float *)m, (const float *)m1, 9);

	for (k = 0; k < 3; k ++) 

	{ 

		float fMax = 0.0f; 

		for (i = k; i < 3; i ++) 

		{ 

			for (j = k; j < 3; j ++) 

			{ 

				const float f = fabs(m[i][j]); 

				if (f > fMax) 

				{ 

					fMax = f; 

					is[k] = i; 

					js[k] = j; 

				} 

			} 

		} 

//		 if (fabs(fMax) < 0.0001f)  //@@@1

//			 return; 

//		if (is[k] != k)				  //@@@2

//		{ 

//		} 

	//.............

	}

}

in my program, a global function call this device func,

at @@@1 or @@@2 does the compiling error come out.

I couldn’t fig out what’s wrong…

help!

thanks,

lyso External Image

Uh, I hate those compilator bugs. Still I tried

__device__ void inverse3x3(float m1[3][3])

{

	int is[3] = {0, 0, 0};

	int js[3] = {0, 0, 0};

	float fDet = 1.0f;

	int f = 1;

	int i, j, k;

	float m[3][3] = {0,0,0, 0,0,0, 0,0,0};

	for (int i=0; i<9; ++i)

		((float*)m)[i]=((float*)m1)[i];

	for (k = 0; k < 3; k ++)

	{

		float fMax = 0.0f;

		for (i = k; i < 3; i ++)

		{

			for (j = k; j < 3; j ++)

			{

				const float f = fabs(m[i][j]);

				if (f > fMax)

				{

					fMax = f;

					is[k] = i;

					js[k] = j;

				}

			}

		}

		 if (fabs(fMax) < 0.0001f)  //@@@1

			 return;

		if (is[k] != k)				  //@@@2

		{

		}

	}

}

__global__ void test(float mx[3][3]) {

	inverse3x3(mx);

}

And it did compile without a problem. I think you need to publish a bit more code so that the error can be reproduced. And please make it self-contained, no calls to assign_ which is not defined…

please see the post down~

thanks to Cygnus !

here it is my program, I’ve comment out everything else.

and the problem turns to be between the @@@3:mult_… & @@@4:inverse3x3 . when I commented out either line, the compiler said ok, but they seem to hate each other.

//matrix multiply, (m x t) * (t x n) ==> (m x n)

__device__ void mult_ (float *r, const float*x, const float*y, int m, int t, int n)

{

	int i, j, k;

	for(i = 0; i < m; i ++)

	{

		for(j = 0; j < n; j ++)

		{

			r[i * n + j] = 0;

			for(k = 0; k < t; k ++)

				r[i * n + j] += x[i * t + k] * y[j + k * t];

		}

	}

}

//matrix inverse 3 x 3 version

__device__ void inverse3x3(float m1[3][3]) 

{

	int is[3] = {0, 0, 0};

	int js[3] = {0, 0, 0};

	float fDet = 1.0f; 

	int f = 1; 

	int i, j, k;

	float m[3][3] = {0,0,0, 0,0,0, 0,0,0};

	for (k = 0; k < 3; k ++) 

	{ 

		float fMax = 0.0f; 

		for (i = k; i < 3; i ++) 

		{ 

			for (j = k; j < 3; j ++) 

			{ 

				const float f = fabs(m[i][j]); 

				if (f > fMax) 

				{ 

					fMax = f; 

					is[k] = i; 

					js[k] = j; 

				} 

			} 

		} 

		if (fabs(fMax) < 0.0001f) 

			return; 

		if (is[k] != k) 

		{ 

		} 

	} 

}

__device__ void LineCross3d(float pt[3], float l1[3], float l2[3], float l1pt[3], float l2pt[3])

{

	float	A[4][3] = {0,};

	float	At[3][4] = {0,};

	float	AtA[3][3] = {0,0,0, 0,0,0, 0,0,0};

	float	Li[4] = {0,};

	float	AtL[3] = {0,};

	// 	transpose((float *)At, (const float *)A, 4, 3);

	mult_((float *)AtA, (const float *)At, (const float *)A, 3, 4, 3);	//@@@3

	inverse3x3(AtA);													//@@@4

}

__global__ void IBR_Kernel(uchar4 *output, int iImgNum, int width, int height)

{

	unsigned int z = 0;

	const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;

	const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

	if(x >= width || y >= height)

		return;

	float imgxy_[3] = {x, y, 1};

	float li[3], lc[3], lipt[3], lcpt[3];

	//Get the depth from cross point to DesView eye.

	float cpt[3];

	LineCross3d(cpt, li, lc, lipt, lcpt);

}

Hmm… still compiles without an error, just lots of warnings.

Also, the kernel has no output so most of the code is treated as dead and kernel does nothing.

1>ptxas info	: Compiling entry function '_Z10IBR_KernelP6uchar4iii'

1>ptxas info	: Used 0 registers, 16+16 bytes smem, 4 bytes cmem[0], 8 bytes cmem[14]

i’m using cuda build rule 2.3:

echo "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” "e:

echo "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj" 

 "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj"

_StereoProj\Stereo10\Stereo10.vcproj"

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” "e:

echo "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj" 

 "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj"

_StereoProj\Stereo10\Stereo10.vcproj"

_StereoProj\Stereo10\Stereo10.vcproj" 

 "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” "e:

echo "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj" 

 "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj"

_StereoProj\Stereo10\Stereo10.vcproj"

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” "e:

echo "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj" 

 "D:\CUDA\bin\nvcc.exe"	-arch sm_13 -ccbin "D:\Program Files\Microsoft Visual Studio 9.0\VC\bin"	-Xcompiler "/EHsc /W3 /nologo /O2 /Zi   /MT  "  -maxrregcount=32  --compile -o "Debug\Stereo10.cu.obj" "e:

echo “D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

“D:\CUDA\bin\nvcc.exe” -arch sm_13 -ccbin “D:\Program Files\Microsoft Visual Studio 9.0\VC\bin” -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " -maxrregcount=32 --compile -o “Debug\Stereo10.cu.obj” “e:\0_StereoProj\Stereo10\Stereo10.vcproj”

_StereoProj\Stereo10\Stereo10.vcproj"

_StereoProj\Stereo10\Stereo10.vcproj"

_StereoProj\Stereo10\Stereo10.vcproj"

is there anything wrong? I’m not familiar with the compiler-option things~

thanks~!

The only time I had the ‘unexpected mtype’ errors is when I was accessing out-of-bounds arrays in the kernel. You might want to recheck your code. The error message is totally bogus, and if you comment out some totally harmless code it’s gone, but still won’t work until you hunt down the error. At least, that was it for me.