Hi,
The cl program below crashes clBuildProgram in driver version 8.17.12.6099 on a GTX 280. This used to work before. One thing to note is that if you comment in //eyeVec.z = … and/or //normal.z = … you will get a compiler error about an ambigous call. The code used to compile the cl program looks like
[codebox]mProgram = clCreateProgramWithSource(sContextCL, 2, lText, lSize, &sStatusCL);
CHECK_CL(“clCreateProgramWithSource”);
if (sStatusCL == CL_SUCCESS)
{
sStatusCL = clBuildProgram(mProgram, 1, &OclDevice::sInstance->getCurrentDevice(), 0, NULL, NULL);
[/codebox]
Pls let me know if you could reproduce the error. Otherwise I probably have a problem somewhere else in my code ?
[codebox]#define BOUNDING_RADIUS_2 4.f
typedef struct
{
float4 xAxis;
float4 yAxis;
float4 zAxis;
float4 ePos;
float4 xAxisInv;
float4 yAxisInv;
float4 zAxisInv;
float dFar;
float dNear;
}
_camera;
float4 qmult(float4 q1, float4 q2)
{
float4 q1yzw = (float4)(q1.y, q1.z, q1.w, 0);
float4 q2yzw = (float4)(q2.y, q2.z, q2.w, 0);
float4 c = cross(q1yzw, q2yzw);
float4 t = q2yzw * q1.x + q1yzw * q2.x + c;
float4 r;
r.x = q1.x * q2.x - dot(q1yzw, q2yzw);
r.yzw = t.xyz;
return r;
}
float4 qsqr(float4 q)
{
float4 qyzw = (float4)(q.y, q.z, q.w, 0);
float4 t = 2.0f * q.x * qyzw;
float4 r;
r.x = q.x * q.x - dot( qyzw, qyzw );
r.yzw = t.xyz;
return r;
}
float4 IntersectJulia(float4 rO, float4 rD, float4 c, float epsilon, uint iterations)
{
float rd = 0.0f;
float dist = epsilon;
while (dist >= epsilon && rd < BOUNDING_RADIUS_2)
{
float4 z = (float4)(rO.x, rO.y, rO.z, 0.0f);
float4 zp = (float4)(1.0f, 0.0f, 0.0f, 0.0f);
float zd = 0.0f;
uint count = 0;
while(zd < BOUNDING_RADIUS_2 && count < iterations)
{
zp = 2.0f * qmult(z, zp);
z = qsqr(z) + c;
zd = dot(z, z);
count++;
}
float normZ = fast_length(z);
dist = 0.5f * normZ * half_log(normZ)/ fast_length(zp);
rO += rD * dist;
rd = dot(rO, rO);
}
return (float4)(rO.x, rO.y, rO.z, dist);
}
#define DELTA 1e-4f
float4 EstimateNormal(float4 p, float4 c, int iterations)
{
float4 qp = (float4)( p.x, p.y, p.z, 0.0f );
float4 gx1 = qp - (float4)( DELTA, 0.0f, 0.0f, 0.0f );
float4 gx2 = qp + (float4)( DELTA, 0.0f, 0.0f, 0.0f );
float4 gy1 = qp - (float4)( 0.0f, DELTA, 0.0f, 0.0f );
float4 gy2 = qp + (float4)( 0.0f, DELTA, 0.0f, 0.0f );
float4 gz1 = qp - (float4)( 0.0f, 0.0f, DELTA, 0.0f );
float4 gz2 = qp + (float4)( 0.0f, 0.0f, DELTA, 0.0f );
for ( int i = 0; i < iterations; i++ )
{
gx1 = qsqr( gx1 ) + c;
gx2 = qsqr( gx2 ) + c;
gy1 = qsqr( gy1 ) + c;
gy2 = qsqr( gy2 ) + c;
gz1 = qsqr( gz1 ) + c;
gz2 = qsqr( gz2 ) + c;
}
float nx = fast_length(gx2) - fast_length(gx1);
float ny = fast_length(gy2) - fast_length(gy1);
float nz = fast_length(gz2) - fast_length(gz1);
return normalize((float4)( nx, ny, nz, 0 ));
}
float IntersectSphere(float4 rO, float4 rD)
{
float fB = 2.0f * dot( rO, rD );
float fB2 = fB * fB;
float fC = dot( rO, rO ) - BOUNDING_RADIUS_2;
float fT = (fB2 - 4.0f * fC);
if (fT <= 0.0f)
return 0.0f;
float fD = half_sqrt(fT);
float fT0 = ( -fB + fD ) * 0.5f;
float fT1 = ( -fB - fD ) * 0.5f;
fT = fmin(fT0, fT1);
return fT;
}
__kernel void JuliaGPU(
__global float4* diffuse,
__global float4* normals,
const float epsilon,
const uint iterations,
const float4 mu,
__constant _camera *camera,
const uint imageW,
const uint imageH) {
const int width = get_global_size(0);
const int height = get_global_size(1);
const int x = get_global_id(0);
const int y = get_global_id(1);
float u = (x / (float) imageW)*2.0f-1.0f;
float v = (y / (float) imageH)*2.0f-1.0f;
const float4 eyeRayOrig = camera->ePos;
const float4 eyeRayDir = normalize(-camera->zAxis + u*camera->xAxis + v*camera->yAxis);
float4 color = (float4) (0,0,0,HUGE_VALF);
float4 normal = (float4) (0,0,0,0);
float distSphere = IntersectSphere(eyeRayOrig, eyeRayDir);
if (distSphere >= 0.f)
{
float4 rayOrig = eyeRayOrig + eyeRayDir * (float4)distSphere;
float4 hitPoint = IntersectJulia(rayOrig, eyeRayDir, mu, epsilon, iterations);
if (hitPoint.w < epsilon)
{
float4 hitVec = hitPoint - eyeRayOrig;
float4 eyeVec;
eyeVec.x = dot(camera->xAxis, hitVec);
eyeVec.y = dot(camera->yAxis, hitVec);
//eyeVec.z = dot(-camera->zAxis, hitVec);
color = (float4) (0.4,0.4,0.4,eyeVec.z);
float4 normalVec = EstimateNormal(hitPoint, mu, iterations);
normal.x = dot(camera->xAxis, normalVec);
normal.y = dot(camera->yAxis, normalVec);
//normal.z = dot(-camera->zAxis, normalVec);
}
}
diffuse[y*width+x] = color;
normals[y*width+x] = normal;
}[/codebox]