OpenCL: clBuildProgram return CL_BUILD_PROGRAM_FAILURE for valid cl program

Hi!

I’m using HelloWorld example from APP SDK 3.0 with your OpenCL compute program(Particles.cl), clBuildProgram return CL_BUILD_PROGRAM_FAILURE on nVidia GeForce GTX 670, but same program correct works on
AMD R7 240, nVidia 8600GT, nVida 210, Intel HD 4000.

I ahhached OpenCL Program and Sample Code with executable:

Also, Code for Particles.cl:

#ifdef cl_khr_gl_sharing
//#pragma OPENCL EXTENSION cl_khr_gl_sharing : enable
#endif

#ifdef cl_amd_printf
#pragma OPENCL EXTENSION cl_amd_printf : enable
#endif

#ifdef cl_apple_gl_sharing
#pragma OPENCL EXTENSION cl_apple_gl_sharing : enable
#endif

#ifdef cl_khr_byte_addressable_store
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#endif

#ifdef cl_khr_d3d11_sharing 
//#pragma OPENCL EXTENSION cl_khr_d3d11_sharing : enable
#endif

#ifdef cl_khr_global_int32_base_atomics
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif

typedef float3 Vector3D;
typedef float4 Vector4D;
typedef float4 Quaternion;

__attribute__((aligned(4)))

#define USE_SIMD_PARTICLE

// 64 byte
typedef struct Particle {
#ifdef USE_SIMD_PARTICLE
	Vector4D position;
#else
	float position[3];
#endif
	uchar4 colour;
	float Width;
	float Height;
	float angle;
#ifdef USE_SIMD_PARTICLE
	Vector4D direction;
#else
	float direction[3];
#endif
	float timeAlive;
	float velocity;
	float totalTimeAlive;
	uint particleEmitter;
} Particle;

//#define USE_SIMD_PARTICLE_INSTANCE

// 36 byte
typedef struct ParticleInstanceData {
#ifdef USE_SIMD_PARTICLE_INSTANCE
	float4 position;
	//
#else
	float position[4];
#endif
	uchar4 Color;
#ifdef USE_SIMD_PARTICLE_INSTANCE
	float4 SizeAngle;
#else
	float SizeAngle[4];
#endif
} ParticleInstanceData;

// 20 byte
typedef struct ParticleSimulationData {
	float dt;
	int seed;
	uint num;
	uint maxParticles;
	//uint particleVBIndex;
} ParticleSimulationData;

// 16 byte
typedef struct LinearForceData {
	Vector4D force;
} LinearForceData;

// 4 byte
typedef struct ScaleData {
	float scale;
} ScaleData;

// 32 byte
typedef struct ColourFaderData {
	float endRed;
	float endGreen;
	float endBlue;
	float endAlpha;
	float red;
	float green;
	float blue;
	float alpha;
} ColourFaderData;

// 16 byte
typedef struct SpiralMotionData {
	float startSpeed;
	float endSpeed;
	float startRadius;
	float endRadius;
} SpiralMotionData;

// 20 byte
typedef struct RotationData {
	float startAngle;
	float endAngle;
	float startSpeed;
	float endSpeed;
} RotationData;

// 24 byte
typedef struct RandomDirectionData {
	float startRandom[3];
	float endRandom[3];
} RandomDirectionData;

#define NO_VEC_EMITTER_DATA

// 112 byte
typedef struct EmitterData {
	uint particleEmitter;
#ifdef NO_VEC_EMITTER_DATA
	float Position[3];
	float Direction[3];
#else
	Vector4D Position;
	Vector4D Direction;
#endif
	float minVelocity;
	float maxVelocity;
	float velocity;
	float angle;
	float minTimeAlive;
	float maxTimeAlive;
	float timeAlive;
	float startTime;
	float endTime;
	float emitterTime;
	float dEmitterTime;
	float curTime;
	uchar startColour[4];
	uchar endColour[4];
	//uint numParticles;
	//uint numAliveParticles;
	//uint particlePerDeltaTime;
	float width;
	float height;
	uint attachToCamera;
	uint isRepeat;
} EmitterData;

// 160 byte
typedef struct BoxEmitter {
	EmitterData emitterData;
	Vector4D xRange;
	Vector4D yRange;
	Vector4D zRange;
} BoxEmitter;

// 128 byte
typedef struct EllipsoidEmitter {
	EmitterData emitterData;
	Vector4D dimension;
} EllipsoidEmitter;

// 132 byte
typedef struct RingEmitter {
	EmitterData emitterData;
	float Width;
	float Height;
	float depth;
	float InnerRadius;
	float OuterRadius;
} RingEmitter;

typedef EmitterData PointerEmitter;

__constant float TWO_PI = 2 * 3.1415f;

inline int FastRand(__global int* seed)
{
	barrier(CLK_GLOBAL_MEM_FENCE);
	return (((*seed = *seed * 214013L + 2531011L) >> 16) & 0x7fff);
}

inline float rand(__global int* seed)
{
	const int RAND_MAX = 0x7fff;
	float t = FastRand(seed) * (1.0f / RAND_MAX);
	return t;
}

inline float Rand(float minVal, float maxVal, __global int* seed)
{
	float t = rand(seed);
	//printf((__constant char*)"Rand minVal %f, maxVal %f, t %f\n", minVal, maxVal, t);
	return mix(minVal, maxVal, t);
}

inline float SymmetricRandom(__global int* seed)
{
	return 2.0f * rand(seed);
}

void DumpParticle(const Particle particle)
{
	__constant char* format = (__constant char *)"Particle width %f height %f pos %f %f %f"
		"totalTimeAlive %f timeAlive %f\n";
#ifdef USE_SIMD_PARTICLE
	printf(format, particle.Width, particle.Height, particle.position.x, particle.position.y, particle.position.z, particle.totalTimeAlive, particle.timeAlive);
#else
	printf(format, particle.Width, particle.Height, particle.position[0], particle.position[1], particle.position[2], particle.totalTimeAlive, particle.timeAlive);
#endif
}

//
void LinearForceAffector(__global Particle* particle, __global LinearForceData* data, __global const ParticleSimulationData* particleSimulationData)
{
#ifdef USE_SIMD_PARTICLE
	particle->position.xyz += (particle->direction.xyz * data->force.xyz * particleSimulationData->dt);
#else
	particle->position[0] += particle->direction[0] * data->force.x * particleSimulationData->dt;
	particle->position[1] += particle->direction[1] * data->force.y * particleSimulationData->dt;
	particle->position[2] += particle->direction[2] * data->force.y * particleSimulationData->dt;
#endif
}

void ScaleAffector(__global Particle* particle, __global ScaleData* data, __global const ParticleSimulationData* particleSimulationData)
{
	float scale = data->scale * particleSimulationData->dt;
	particle->Width *= scale;
	particle->Height *= scale;
}

uchar ApplyColor(uchar color, float dc, float clampVal)
{
	int dval = (int)(UCHAR_MAX * dc);
	int Color = color - dval;
	const int MaxVal = UCHAR_MAX;
	Color = clamp(Color, 0, MaxVal);
	return Color;
}

void ColourFaderAffector(__global Particle* particle, __global ColourFaderData* data, __global const ParticleSimulationData* particleSimulationData)
{
	if (data->red != 0.0f) {
		particle->colour.x = ApplyColor(particle->colour.x, data->red, data->endRed);
	}
	if (data->green != 0.0f) {
		particle->colour.y = ApplyColor(particle->colour.y, data->green, data->endGreen);
	}
	if (data->blue != 0.0f) {
		particle->colour.z = ApplyColor(particle->colour.z, data->blue, data->endBlue);
	}
	if (data->alpha != 0.0f) {
		particle->colour.w = ApplyColor(particle->colour.w, data->alpha, data->endAlpha);
	}
}

void GetOffset(float radius, float angle, bool* isCos, float* offset)
{
	if (*isCos) {
		*offset = radius * sin(angle);
	}
	else {
		*offset = radius * cos(angle);
		*isCos = true;
	}
}

void SpiralMotionAffector(__global Particle* particle, __global SpiralMotionData* data, __global ParticleSimulationData* particleSimulationData)
{
#ifdef USE_SIMD_PARTICLE
	float maxCoord = fmax(particle->direction.x, fmax(particle->direction.y, particle->direction.z));
#else
	float maxCoord = fmax(particle->direction[0], fmax(particle->direction[1], particle->direction[2]));
#endif
	float speed = Rand(data->startSpeed, data->endSpeed, &particleSimulationData->seed);
	particle->angle += (speed * particleSimulationData->dt);
	bool isCos = false;
	float offset = 0.0f;
	float radius = Rand(data->startRadius, data->endRadius, &particleSimulationData->seed);
	//
#ifdef USE_SIMD_PARTICLE
	if (maxCoord != particle->direction.x) {
#else
	if (maxCoord != particle->direction[0]) {
#endif
		GetOffset(radius, particle->angle, &isCos, &offset);
#ifdef USE_SIMD_PARTICLE
		particle->position.x += offset;
#else
		particle->position[0] += offset;
#endif
	}
#ifdef USE_SIMD_PARTICLE
	if (maxCoord != particle->direction.z) {
#else
	//
	if (maxCoord != particle->direction[2]) {
#endif
		GetOffset(radius, particle->angle, &isCos, &offset);
#ifdef USE_SIMD_PARTICLE
		particle->position.z += offset;
#else
		particle->position[2] += offset;
#endif
	}
#ifdef USE_SIMD_PARTICLE
	if (maxCoord != particle->direction.y) {
#else
	//
	if (maxCoord != particle->direction[1]) {
#endif
		GetOffset(radius, particle->angle, &isCos, &offset);
#ifdef USE_SIMD_PARTICLE
		particle->position.y += offset;
#else
		particle->position[1] += offset;
#endif
	}
}

void RotationAffector(__global Particle* particle, __global RotationData* data, __global ParticleSimulationData* particleSimulationData)
{
	float angle = Rand(data->startAngle, data->endAngle, &particleSimulationData->seed);
	float speed = Rand(data->startSpeed, data->endSpeed, &particleSimulationData->seed);
	particle->angle += (angle * particleSimulationData->dt * speed);
}

void RandomDirectionAffector(__global Particle* particle, __global RandomDirectionData* data, __global ParticleSimulationData* particleSimulationData)
{
	float3 offset;
	offset.x = Rand(data->startRandom[0], data->endRandom[0], &particleSimulationData->seed);
	offset.y = Rand(data->startRandom[1], data->endRandom[1], &particleSimulationData->seed);
	offset.z = Rand(data->startRandom[2], data->endRandom[2], &particleSimulationData->seed);
	//printf((__constant char*)"RandomDirectionAffector offset %f %f %f\n", offset.x, offset.y, offset.z);
#ifdef USE_SIMD_PARTICLE
	particle->direction.xyz += (offset * particleSimulationData->dt);
	particle->direction.xyz = normalize(particle->direction.xyz);
#else
	particle->direction[0] += (offset.x * particleSimulationData->dt);
	particle->direction[1] += (offset.y * particleSimulationData->dt);
	particle->direction[2] += (offset.z * particleSimulationData->dt);
#endif
}

void updateParticleInstanceData(const Particle particle, __global ParticleSimulationData* particleSimulationData, __global ParticleInstanceData* particleInstanceData)
{
	//barrier(CLK_GLOBAL_MEM_FENCE);
	//printf((__constant char*)"updateParticleInstanceData\n");
	//DumpParticle(particle);
	//particleInstanceData->position = (Vector4D)(particle.position.xyz, 1.0f);
	//uint particleVBIndex = atomic_inc(&particleSimulationData->particleVBIndex);
	//__global ParticleInstanceData* ParticleInstanceData = particleInstanceData + particleVBIndex;
	//int iGID = get_global_id(0);
	//__global ParticleInstanceData* ParticleInstanceData = particleInstanceData + iGID;
	__global ParticleInstanceData* ParticleInstanceData = particleInstanceData;
#ifdef USE_SIMD_PARTICLE
#ifdef USE_SIMD_PARTICLE_INSTANCE
	ParticleInstanceData->position = (float4)(particle.position.xyz, 1.0f);
#else
	ParticleInstanceData->position[0] = particle.position.x;
	ParticleInstanceData->position[1] = particle.position.y;
	ParticleInstanceData->position[2] = particle.position.z;
#endif
#else
	ParticleInstanceData->position[0] = particle.position[0];
	ParticleInstanceData->position[1] = particle.position[1];
	ParticleInstanceData->position[2] = particle.position[2];
#endif
	ParticleInstanceData->Color = particle.colour;
#ifdef USE_SIMD_PARTICLE_INSTANCE
	ParticleInstanceData->SizeAngle = (Vector4D)(particle.Width, particle.Height, particle.angle, 1.0f);
#else
	ParticleInstanceData->SizeAngle[0] = particle.Width;
	ParticleInstanceData->SizeAngle[1] = particle.Height;
	ParticleInstanceData->SizeAngle[2] = particle.angle;
	ParticleInstanceData->SizeAngle[3] = 1.0f;
#endif
}

inline void UpdateP(__global Particle* particle, float dt)
{
#ifdef USE_SIMD_PARTICLE
	particle->position.xyz += (particle->direction.xyz * particle->velocity * dt);
#else
	float v = particle->velocity * dt;
	particle->position[0] += (particle->direction[0] * v);
	particle->position[1] += (particle->direction[1] * v);
	particle->position[2] += (particle->direction[2] * v);
#endif
	particle->totalTimeAlive += dt;
}

void updateParticles(__global Particle* particles, int affectorType,
	__global ColourFaderData* colourData,
	__global ScaleData* scaleData,
	__global RotationData* rdata,
	__global LinearForceData* forceData,
	__global RandomDirectionData* rdData,
	__global SpiralMotionData* spData,
	__global ParticleSimulationData* particleSimulationData,
	__global ParticleInstanceData* particleInstanceData,
	__global uint* numParticles)
{
	__global Particle* particle = particles;
	if (particle->totalTimeAlive < particle->timeAlive) {
		switch (affectorType) {
			case 0: {
				ColourFaderAffector(particle, colourData, particleSimulationData);
			}
			break;
			case 1: {
				ScaleAffector(particle, scaleData, particleSimulationData);
			}
			break;
			case 2: {
				RotationAffector(particle, rdata, particleSimulationData);
			}
			break;
			case 3: {
				LinearForceAffector(particle, forceData, particleSimulationData);
			}
			break;
			case 4: {
				RandomDirectionAffector(particle, rdData, particleSimulationData);
			}
			break;
			case 6: {
				SpiralMotionAffector(particle, spData, particleSimulationData);
			}
			break;
		}
		//particle->position += (particle->direction.xyz * particle->velocity * particleSimulationData->dt);
	//	particle->position[0] += (particle->direction[0] * particle->velocity * particleSimulationData->dt);
		//particle->position[1] += (particle->direction[1] * particle->velocity * particleSimulationData->dt);
		//particle->position[2] += (particle->direction[2] * particle->velocity * particleSimulationData->dt);
		//particle->totalTimeAlive += particleSimulationData->dt;
		UpdateP(particle, particleSimulationData->dt);
		updateParticleInstanceData(*particle, particleSimulationData, particleInstanceData);
	}
	else {
		particle->particleEmitter = UINT_MAX;
		barrier(CLK_GLOBAL_MEM_FENCE);
		//printf((__constant char*)"particle->totalTimeAlive >= particle->timeAlive");
		if (particleSimulationData->num != 0)
		{
			--particleSimulationData->num;
			//atomic_dec(&particleSimulationData->num);
		}
	}
}

__kernel void UpdateParticlesOnly(__global Particle* particles, int affectorType,
	__global ColourFaderData* colourData,
	__global ScaleData* scaleData,
	__global RotationData* rdata,
	__global LinearForceData* forceData,
	__global RandomDirectionData* rdData,
	__global SpiralMotionData* spData,
	__global ParticleSimulationData* particleSimulationData,
	__global ParticleInstanceData* particleInstanceData,
	__global uint* numParticles)
{
	updateParticles(particles, affectorType, colourData, scaleData, rdata, forceData, rdData, spData, particleSimulationData, particleInstanceData, numParticles);
}

inline float LengthSq(const Vector3D v)
{
	return v.x * v.x + v.y * v.y + v.z * v.z;
}

inline static Vector3D Perpendicular(const Vector3D v)
{
	const float fSquareZero = 1e-06f * 1e-06f;

	Vector3D perp = cross(v, (Vector3D)(1.0f, 0.0f, 0.0f));

	if (LengthSq(perp) < fSquareZero) {
		perp = cross(v, (Vector3D)(0.0f, 1.0f, 0.0f));
	}
	return perp;
}

static float3 QuaternionMultiply(const Quaternion q, const Vector3D v)
{
	Vector3D uv = cross(q.xyz, v);
	Vector3D uuv = cross(q.xyz, uv);
	uv *= (2.0f * q.w);
	uuv *= 2.0f;
	return v + uv + uuv;
}

//
Quaternion CreateFromAxisAngle(const Vector3D v, float angle)
{
	Quaternion q;
	angle *= 0.5f;
	float w = 0.0f;
	float result = sincos(angle, &w);
	q.w = w;
	q.xyz = v.xyz * result;
	q = normalize(q);
	return q;
}

static Vector3D RandomDeviant(const Vector3D v, float angle, __global int* seed)
{
	Vector3D newUp = Perpendicular(v);
	Quaternion q;
	__private const float TWO_PI = 2.0f * 3.1415f;
	q = CreateFromAxisAngle(v, rand(seed) * TWO_PI);
	newUp = QuaternionMultiply(q, newUp);
	q = CreateFromAxisAngle(newUp, angle);
	return QuaternionMultiply(q, v);
}

void UpdateAngleDirection(__global EmitterData* emitterData, __global int* seed, __global Particle* particle)
{
	if (emitterData->angle) {
		float Angle = Rand(0.0f, emitterData->angle, seed);
#ifdef USE_SIMD_PARTICLE
#ifdef NO_VEC_EMITTER_DATA
		particle->direction.xyz = RandomDeviant((float3)(emitterData->Direction[0], emitterData->Direction[1], emitterData->Direction[2]), 
			emitterData->angle, seed);
#else
		particle->direction.xyz = RandomDeviant(emitterData->Direction.xyz, emitterData->angle, seed);
#endif
#else
		Vector3D v = RandomDeviant(emitterData->Direction.xyz, emitterData->angle, seed);
		particle->direction[0] = v.x;
		particle->direction[1] = v.y;
		particle->direction[2] = v.z;
#endif
	}
	else {
#ifdef USE_SIMD_PARTICLE
#ifdef NO_VEC_EMITTER_DATA
		particle->direction.xyz = (float3)(emitterData->Direction[0], emitterData->Direction[1], emitterData->Direction[2]);
#else
		particle->direction.xyz = emitterData->Direction.xyz;
#endif
#else
		particle->direction[0] = emitterData->Direction.x;
		particle->direction[1] = emitterData->Direction.y;
		particle->direction[2] = emitterData->Direction.z;
#endif
	}
}

float UpdateVelocityDirection(__global EmitterData* emitterData, __global int* seed)
{
	//
	if (emitterData->minVelocity != emitterData->maxVelocity) {
		return Rand(emitterData->minVelocity, emitterData->maxVelocity, seed);
	}
	else if (emitterData->minVelocity != 0.0f) {
		return emitterData->minVelocity;
	}
	else {
		return 0.0f;
	}
}

inline void AtomicAdd(volatile __global float* source, const float operand) {
	union {
		unsigned int intVal;
		float floatVal;
	} newVal;
	union {
		unsigned int intVal;
		float floatVal;
	} prevVal;
	do {
		prevVal.floatVal = *source;
		newVal.floatVal = prevVal.floatVal + operand;
	} while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);
}

bool CheckEmitt(__global EmitterData* emitterData, __global ParticleSimulationData* particleSimulationData)
{
	barrier(CLK_GLOBAL_MEM_FENCE);
	//printf((__constant char *)"dt %f EmitterData curtime %f startTime %f\n", particleSimulationData->dt, emitterData->curTime, emitterData->startTime);
#if 0
	float t = emitterData->curTime - emitterData->startTime;
	if (t <= 0.0f) {
		//atom_xchg(&emitterData->numParticles, 0);
		return false;
	}
	//printf((__constant char *)"CheckEmitt emitterData->numParticles %d emitterData->dEmitterTime %f emitterData->emitterTime %f\n", 
		//emitterData->numParticles, emitterData->dEmitterTime, emitterData->emitterTime);
	if (emitterData->dEmitterTime < emitterData->emitterTime) {
		//atom_xchg(&emitterData->numParticles, 0);
		return false;
	}
	emitterData->dEmitterTime = emitterData->dEmitterTime - emitterData->emitterTime;
	//printf((__constant char *)"CheckEmitt emitterData->dEmitterTime %f\n", emitterData->dEmitterTime);
	if (emitterData->endTime > 0.0f && t > emitterData->endTime) {
		//atom_xchg(&emitterData->numParticles, 0);
		if (emitterData->isRepeat) {
			emitterData->curTime = emitterData->curTime - emitterData->startTime;
		}
		return false;
	}
#endif
	if (particleSimulationData->num > particleSimulationData->maxParticles) {
		//printf((__constant char *)"CheckEmitt max particles\n");
		//atom_xchg(&emitterData->numParticles, 0);
//		if (!emitterData->startTime) {
//			emitterData->curTime = 0.0f;
//		}
		return false;
	}
	//if (emitterData->particlePerDeltaTime + particleSimulationData->num > particleSimulationData->maxParticles) {
	//	emitterData->numParticles = particleSimulationData->maxParticles - particleSimulationData->num;
	//}
	//else {
	//	atom_xchg(&emitterData->numParticles, emitterData->particlePerDeltaTime);
	//}
	//if (emitterData->isRepeat && emitterData->endTime < 0.0f) {
//		emitterData->curTime = emitterData->curTime - emitterData->startTime;
	//}
	return true;
}

void UpdateCommonParameters(__global Particle* particle, __global EmitterData* emitterData, __global int* seed)
{
	//particle->direction = UpdateAngleDirection(emitterData, seed);
	//barrier(CLK_GLOBAL_MEM_FENCE);
	UpdateAngleDirection(emitterData, seed, particle);
	particle->velocity = UpdateVelocityDirection(emitterData, seed);
}

void InitStartParameters(__global Particle* particle, __global EmitterData* emitter, __global int* seed)
{
#ifdef USE_SIMD_PARTICLE
#ifdef NO_VEC_EMITTER_DATA
	particle->position.xyz = (float3)(emitter->Position[0], emitter->Position[1], emitter->Position[2]);
#else
	particle->position.xyz = emitter->Position.xyz;
#endif
#else
	particle->position[0] = emitter->Position.x;
	particle->position[1] = emitter->Position.y;
	particle->position[2] = emitter->Position.z;
#endif
	particle->particleEmitter = emitter->particleEmitter;
	//
	particle->Width = emitter->width;
	particle->Height = emitter->width;
	//
	if (emitter->minTimeAlive > 0.0f && emitter->maxTimeAlive > 0.0f) {
		particle->timeAlive = Rand(emitter->minTimeAlive, emitter->maxTimeAlive, seed);
	}
	else {
		particle->timeAlive = emitter->timeAlive;
	}
	particle->colour.x = Rand((float)emitter->startColour[0], (float)emitter->endColour[0], seed);
	particle->colour.y = Rand((float)emitter->startColour[1], (float)emitter->endColour[1], seed);
	particle->colour.z = Rand((float)emitter->startColour[2], (float)emitter->endColour[2], seed);
	particle->colour.w = 255;
	particle->totalTimeAlive = 0.0f;
	//barrier(CLK_GLOBAL_MEM_FENCE);
	//atomic_inc(&emitter->numAliveParticles);
	//printf((__constant char *)"InitStartParameters\n");
	//DumpParticle(*particle);
}

bool Update(__global Particle* particle, __global EmitterData* emitterData, __global ParticleSimulationData* particleSimulationData)
{
	if (particle->particleEmitter != UINT_MAX) {
		return true;
	}
	//
	InitStartParameters(particle, emitterData, &particleSimulationData->seed);
	barrier(CLK_GLOBAL_MEM_FENCE);
	particleSimulationData->num++;
	//atomic_inc(&particleSimulationData->num);
	//printf((__constant char *)"Update numParticles %d\n", particleSimulationData->num);
	//atomic_inc(&particleSimulationData->num);
	return false;
}

inline void OffsetPos(__global Particle* particle, const Vector3D p)
{
#ifdef USE_SIMD_PARTICLE
	particle->position.xyz += p;
#else
	particle->position[0] += p.x;
	particle->position[1] += p.y;
	particle->position[2] += p.z;
#endif
}

bool BoxEmitterEmittParticles(__global Particle* particle, __global BoxEmitter* boxEmitter, __global ParticleSimulationData* particleSimulationData)
{
	if (!CheckEmitt(&boxEmitter->emitterData, particleSimulationData)) {
		return false;
	}
	if (Update(particle, &boxEmitter->emitterData, particleSimulationData)) {
		return true;
	}
	float val = SymmetricRandom(&particleSimulationData->seed);
	Vector3D xOff = val * boxEmitter->xRange.xyz;
	val = SymmetricRandom(&particleSimulationData->seed);
	Vector3D yOff = val * boxEmitter->yRange.xyz;
	val = SymmetricRandom(&particleSimulationData->seed);
	Vector3D zOff = val * boxEmitter->zRange.xyz;
	Vector3D p = (xOff + yOff + zOff);// + Direction * dt;l
	OffsetPos(particle, p);
	UpdateCommonParameters(particle, &boxEmitter->emitterData, &particleSimulationData->seed);
	return false;
}

bool EllipsoidEmitterEmittParticles(__global Particle* particle, __global EllipsoidEmitter* ellpsoidEmitter, __global ParticleSimulationData* particleSimulationData)
{
	if (!CheckEmitt(&ellpsoidEmitter->emitterData, particleSimulationData)) {
		return false;
	}
	if (Update(particle, &ellpsoidEmitter->emitterData, particleSimulationData)) {
		return true;
	}
	Vector3D p;
	while (true) {
		p.x = SymmetricRandom(&particleSimulationData->seed);
		p.y = SymmetricRandom(&particleSimulationData->seed);
		p.z = SymmetricRandom(&particleSimulationData->seed);

		float d = dot(p, p);
		if (d <= 1.0f) {
			break;
		}
	}
#ifdef USE_SIMD_PARTICLE
	Vector3D pos = particle->position.xyz;
#else
	Vector3D pos = (Vector3D)(particle->position[0], particle->position[1], particle->position[2]);
#endif
	//particle->position += ellpsoidEmitter->dimension * p;
	OffsetPos(particle, ellpsoidEmitter->dimension.xyz * p);
#ifdef USE_SIMD_PARTICLE
	particle->direction.xyz = normalize(particle->position.xyz - pos);
#else
	Vector3D dir = normalize((Vector3D)(particle->position[0], particle->position[1], particle->position[2]) - pos);
	particle->direction[0] = dir.x;
	particle->direction[1] = dir.y;
	particle->direction[2] = dir.z;
#endif
	particle->velocity = UpdateVelocityDirection(&ellpsoidEmitter->emitterData, &particleSimulationData->seed);
	return false;
}

bool RingEmitterEmittParticles(__global Particle* particle, __global RingEmitter* ringEmitter, __global ParticleSimulationData* particleSimulationData)
{
	if (!CheckEmitt(&ringEmitter->emitterData, particleSimulationData)) {
		return false;
	}
	if (Update(particle, &ringEmitter->emitterData, particleSimulationData)) {
		return true;
	}
	float a = Rand(0.0f, TWO_PI, &particleSimulationData->seed);
	float z = SymmetricRandom(&particleSimulationData->seed);
	float cosa = 0.0f;
	float sina = sincos(a, &cosa);
	float inRndRadius = sina * Rand(ringEmitter->InnerRadius, 1.0f, &particleSimulationData->seed);
	float outRndRadius = cosa * Rand(ringEmitter->OuterRadius, 1.0f, &particleSimulationData->seed);
	Vector3D p = (Vector3D)(inRndRadius * ringEmitter->depth, z * ringEmitter->Height, outRndRadius * ringEmitter->Width);
	//particle->position += p;
	OffsetPos(particle, p);
	UpdateCommonParameters(particle, &ringEmitter->emitterData, &particleSimulationData->seed);
	return false;
}

bool PointerEmitterEmittParticles(__global Particle* particle, __global PointerEmitter* pointerEmitterData, __global ParticleSimulationData* particleSimulationData)
{
	//__constant char* format = (__constant char *)"PointerEmitterEmittParticles dt %f EmitterData curtime %f startTime %f\n";
	//printf(format, particleSimulationData->dt, pointerEmitterData->curTime, pointerEmitterData->startTime);
	if (!CheckEmitt(pointerEmitterData, particleSimulationData)) {
		return false;
	}
	if (Update(particle, pointerEmitterData, particleSimulationData)) {
		return true;
	}
	//printf((__constant char *)"PointerEmitterEmittParticles\n");
	//DumpParticle(*particle);
	UpdateCommonParameters(particle, pointerEmitterData, &particleSimulationData->seed);
	return false;
}

__kernel void UpdateParticles(__global Particle* particles,
	//__global BoxEmitter* boxEmitterData,
	//__global EllipsoidEmitter* ellpsoidEmitterData,
	//__global RingEmitter* ringEmitterData,
	__global PointerEmitter* pointerEmitterData,
	//__global ColourFaderData* colourData,
	//__global ScaleData* scaleData,
	//__global RotationData* rData,
	//__global LinearForceData* forceData,
	//__global RandomDirectionData* rdData,
	__global SpiralMotionData* spData,
	__global ParticleSimulationData* particleSimulationData,
	__global ParticleInstanceData* particleInstanceData)
{
#if 0
	printf((__constant char *)"Particle size %d\n", sizeof(Particle));
	printf((__constant char *)"ParticleInstanceData size %d\n", sizeof(ParticleInstanceData));
	printf((__constant char *)"ParticleSimulationData size %d\n", sizeof(ParticleSimulationData));
	printf((__constant char *)"RandomDirectionData size %d\n", sizeof(RandomDirectionData));
	printf((__constant char *)"LinearForceData size %d\n", sizeof(LinearForceData));
	printf((__constant char *)"ScaleData size %d\n", sizeof(ScaleData));
	printf((__constant char *)"RotationData size %d\n", sizeof(RotationData));
	printf((__constant char *)"ColourFaderData size %d\n", sizeof(ColourFaderData));
	printf((__constant char *)"SpiralMotionData size %d\n", sizeof(SpiralMotionData));
	printf((__constant char *)"PointerEmitter size %d\n", sizeof(PointerEmitter));
	printf((__constant char *)"RingEmitter size %d\n", sizeof(RingEmitter));
	printf((__constant char *)"EllipsoidEmitter size %d\n", sizeof(EllipsoidEmitter));
	printf((__constant char *)"BoxEmitter size %d\n", sizeof(BoxEmitter));
#endif
	//printf((__constant char *)"PointerEmitter size %d\n", sizeof(*pointerEmitterData)); // 144
	//__constant char* format = (__constant char *)"UpdateParticles2 dt %f EmitterData curtime %f startTime %f\n";
	//printf(format, particleSimulationData->dt, pointerEmitterData->curTime, pointerEmitterData->startTime);
	// get index into global data array
	int iGID = get_global_id(0);
	__global Particle* particle = particles + iGID;
	if (PointerEmitterEmittParticles(particle, pointerEmitterData, particleSimulationData)) {
		if (particle->totalTimeAlive < particle->timeAlive) {
			//printf((__constant char*)"particle->totalTimeAlive < particle->timeAlive");
			//RandomDirectionAffector(particle, rdData, particleSimulationData);
			//ColourFaderAffector(particle, colourData, particleSimulationData);
			SpiralMotionAffector(particle, spData, particleSimulationData);
			//RotationAffector(particle, rData, particleSimulationData);
			UpdateP(particle, particleSimulationData->dt);
			updateParticleInstanceData(*particle, particleSimulationData, particleInstanceData + iGID);
		}
		else {
			particle->particleEmitter = UINT_MAX;
			barrier(CLK_GLOBAL_MEM_FENCE);
			//printf((__constant char*)"particle->totalTimeAlive >= particle->timeAlive");
			if (particleSimulationData->num != 0) {
				--particleSimulationData->num;
				//atomic_dec(&particleSimulationData->num);
			}
		}
		//updateParticles2(particle, rdData, particleSimulationData, particleInstanceData + iGID);
		//updateParticles3(particle, rotationData, particleSimulationData, particleInstanceData + iGID);
	}
}

This Driver bug or not ?

I fixed some errors on GTX 570, but on GTX 760 still i have empty log after calling clGetProgramBuildInfo!

Thanks!

Particles.zip (5.11 KB)
HelloWorldOpenCL.zip (101 KB)

I found new information, printf function doesn’t works for OpenCL 1.1, also cl_*_printf extension doesn’t support