Warp serialise problem No constant mem arrays or shared mem used!

Hi everyone,

I have a strange problem, I am writing a raytracing app and this problem only seems to manifest itself with transparent objects. I read a previously post saying that warp serialises have nothing to do with branching, but to do with accesses to constant memory and shared memory - I’ve edited my program to not use constant memory or shared memory now, everything is stored in textures, and yet I am still getting warp serialisation. Any ideas anyone?

If you need to see any code let me know.

Here is the profiler output (attached):

I’ve narrowed the problem down a bit now, and I think it has some thing to do with some local memory arrays in my shading code:

[codebox]

device RGBColour shade(ShadeRec& originalSR, const RGBColour& backgroundColour)

{

float3 wo, wi, wt;	

RGBColour fr, ft; 

ShadeRec sr[17];

Ray newRays[16];

Ray currRays[16];

RGBColour srC[17];

volatile int idx = threadIdx.y*blockDim.x + threadIdx.x;

volatile int zero = 0;

volatile int one = 1;

sr[0] = originalSR;

int currentSR, numNewRays, numCurrRays;

currentSR = zero;

numNewRays = zero;

numCurrRays = zero;

srC[zero] = specularShade(sr[zero]); 



if(sr[zero].material.y > one)

{

	createRefRay(currRays, numCurrRays, sr[zero], zero);

	if(sr[zero].material.y == 3)

	{

		if(!tir(sr[zero]))

		{

			createTransRay(currRays, numCurrRays, sr[zero], zero);

		}

	}

}

currentSR = currentSR + one;

if(numCurrRays > zero)

{

	for(int i = zero; i < vp.maxDepth; i++)

	{

		for (int currRay = 0; currRay < numCurrRays; currRay++)

		{

			sr[currentSR] = hitObjects(currRays[currRay]);    

			sr[currentSR].ray = currRays[currRay];

			if (sr[currentSR].hitAnObject) 

			{

				srC[currentSR] = specularShade(sr[currentSR]);

				if(sr[currentSR].material.y > one)

				{

					createRefRay(newRays, numNewRays, sr[currentSR], currentSR);

					if(sr[currentSR].material.y == 3)

					{

						if(!tir(sr[currentSR]))

							createTransRay(newRays, numNewRays, sr[currentSR], currentSR);

					}

				}

			}

			else

				srC[currentSR] = backgroundColour;

			currentSR = currentSR + one;

		}

		for (int j = 0; j < numNewRays; j++) 

			currRays[j] = newRays[j];

		numCurrRays = numNewRays;

		numNewRays = 0;

	}

	//Post process ShadeRec tree and get final colour of originalSR

	for(int i = currentSR - one; i > 0; i--)

	{

		wo = -sr[sr[i].ray.parent].ray.d;

		if(sr[i].ray.transparent == 0)

		{

			fr = reflectiveF(sr[sr[i].ray.parent], wo, wi);

			srC[sr[i].ray.parent] += fr * srC[i] * fabs(dot(sr[sr[i].ray.parent].normal, wi));

		}

		else

		{

			ft = transparentF(sr[sr[i].ray.parent], wo, wt); 

			srC[sr[i].ray.parent] += ft * srC[i] * fabs(dot(sr[sr[i].ray.parent].normal, wt));

		}

	}

}

return (srC[0]);	

}

[/codebox]

As far as i know, branching can cause serialization if the code in each branch is “big enough” that the compiler feels it is better to do that than to execute both branch and use the predication registers.
Feel free to correct me!