Struct usage problem (guessing misalignment)

Hello, everybody!

I’m having problems to use a struct that has just float3 members. I’ve tried it with and without forcing alignment. The error occurs just when I enable the commented part of function checkRootValidityVF() and is reported as an “unknown error” after the execution of the kernel that calls the function. What is strange to me is that Parallel Nsight isn’t reporting any error of misalignment at all but I’m still guessing this is the case. The code below shows the definition of structure and give some context of stance modification.

struct __align__(16) NewtonCheckData{

	float3 ad , a0 , bd , b0 , cd , c0 , pd , p0;


__device__ __inline__ bool checkRootValidity_VF(float t, float3& baryc, NewtonCheckData &data) {

	float3 tempd =;

	float3 temp0 = data.a0;

	float3 a = f3v_add(f3v_mul1(tempd , t) , temp0);

	/*tempd =;

	temp0 = data.b0;

	float3 b = f3v_add(f3v_mul1(tempd , t) , temp0);*/


	/*tempd =;

	temp0 = data.c0;

	float3 c = f3v_add(f3v_mul1(tempd , t) , temp0);*/


	/*tempd = data.pd;

	temp0 = data.p0;

	float3 p = f3v_add(f3v_mul1(tempd , t) , temp0);*/

	//return _insideTriangle(a , b , c , p , baryc);

	return true;


__device__ __inline__ bool solveCubicWithIntervalNewton(double &l, double &r, float3& baryc, bool bVF,

										 NewtonCheckData &data, double coeffs[])


	double v2[2]={l*l,r*r};

	double v[2]={l,r};

	double rBkUp;

	unsigned char min3, min2, min1, max3, max2, max1;

	min3=*((unsigned char*)&coeffs[3]+7)>>7;max3=min3^1;

	min2=*((unsigned char*)&coeffs[2]+7)>>7;max2=min2^1;

	min1=*((unsigned char*)&coeffs[1]+7)>>7;max1=min1^1;

	// bound the cubic

	//Visualize the values by thinking of the components of the addition:

	//minor=coeffs[3]*v2[min3]*v[min3] which is a trivial cubic function, for example

	double minor= coeffs[3]*v2[min3]*v[min3] + coeffs[2]*v2[min2] + coeffs[1]*v[min1] + coeffs[0];

	double major= coeffs[3]*v2[max3]*v[max3] + coeffs[2]*v2[max2] + coeffs[1]*v[max1] + coeffs[0];

	if (major<0)

		return false; //No roots in the interval

	if (minor>0)

		return false; //No roots in the interval

	// starting here, the bounds have opposite values

	double m=0.5*(r+l);

	// bound the derivative

	double dminor = 3.0*coeffs[3]*v2[min3] + 2.0*coeffs[2]*v[min2] + coeffs[1];

	double dmajor = 3.0*coeffs[3]*v2[max3] + 2.0*coeffs[2]*v[max2] + coeffs[1];

	if ((dminor>0)||(dmajor<0)) // we can use Newton


		double m2=m*m;

		double fm=coeffs[3]*m2*m + coeffs[2]*m2 + coeffs[1]*m + coeffs[0];

		double nl=m;

		double nu=m;

		if (fm>0) {nl-=fm*(1.0/dminor);nu-=fm*(1.0/dmajor);}

		else {nu-=fm*(1.0/dminor);nl-=fm*(1.0/dmajor);}

		//intersect with [l,r]

		if (nl>r)

			return false; // pas de solution

		if (nu<l)

			return false; // pas de solution

		if (nl>l)


			if (nu<r) {l=nl;r=nu;m=0.5*(l+r);}

			else {l=nl;m=0.5*(l+r);}




			if (nu<r) {r=nu;m=0.5*(l+r);}



	// sufficient temporal resolution, check root validity

	if ((r-l)<ccdTimeResolution)

		if (bVF)

			return checkRootValidity_VF(r, baryc, data);


			return checkRootValidity_EE(r, baryc, data);

	rBkUp = r, r = m;

	if (solveCubicWithIntervalNewton(l,r,baryc, bVF, data, coeffs)) return true;	

	l = m, r = rBkUp;

	return (solveCubicWithIntervalNewton(l,r,baryc, bVF, data, coeffs));


__device__ __inline__ float

Intersect_VF(const float3 &ta0, const float3 &tb0, const float3 &tc0,

			 const float3 &tav, const float3 &tbv, const float3 &tcv,

			 const float3 &q0, const float3 &qv,

			 float3 &qi, float3 &baryc , const float deltaTime)


	/* Default value returned if no collision occurs */

	float collisionTime = -1.0f;


	* Compute scalar coefficients by evaluating dot and cross-products.


	float a, b, c, d; /* cubic polynomial coefficients */

	_equateCubic_VF(ta0, tav, tb0, tbv, tc0, tcv, q0, qv, a, b, c, d);

	if (IsZero(a) && IsZero(b) && IsZero(c) && IsZero(d))

		return collisionTime;

	NewtonCheckData data;

	data.a0 = ta0, data.b0 = tb0;

	data.c0 = tc0, data.p0 = q0; = tav, = tbv; = tcv, data.pd = qv;


	//iteratively solve the cubic (scalar) equation and test for validity of the solution.


	double l = 0;

	double r = deltaTime;

	double coeffs[4];

	coeffs[3] = a, coeffs[2] = b, coeffs[1] = c, coeffs[0] = d;

	if (solveCubicWithIntervalNewton(l, r, baryc, true, data, coeffs)) {

		collisionTime = (l+r)*0.5f;	

		f3s_add(qi , f3v_mul1(qv , collisionTime) , q0); //pont in the time of collision


	return collisionTime;


Did anyone has some clue of the problem? I would really appreciate any help, since I can’t figure it out alone. If more info or context is needed just ask.

What does “_insideTriangle(a , b , c , p , baryc)” do ?

Maybe some data from your Struct has not been initialized, and results in out of bound indexes.

Tried to replace the simple struct with an array to remove the struct from potential failure reasons?

Thanks for the reply. I’ve replaced the struct for an array of float3 and that did the trick.

Was the struct in shared memory?, i encountered several issues with structs in shared memory.

On a G80 card a shared struct with 3 floats did not worked, whereas a struct with 2 floats did worked.

On a GF100 card the 2 floats shared version did not worked, good that it was not that important to used the shared memory in my kernel.