Okay, I have an issue. I’m working on a simple raytracer, and for some reason, I can’t write to global memory. In the code below, at the end of the main global function we have:
if (hit.z != 1000.0) {
hits[outidx] = 0;
}
else {
hits[outidx] = 1;
}
If I run the code below, I get a Kernel Launch error. If I change the above code to just:
hits[outidx] = 0;
It works fine…what am I missing here?
Thanks in advance for the help, and btw, here’s my code:
__device__ float4 Intersects(float3 a, float3 b, float3 c, float3 o, float3 d,
float minT, float4 lastHit, float mat_index)
{
float3 e1 = sub(b,a);
float3 e2 = sub(c,a);
float3 p = cross(d, e2);
float det = dot(p, e1);
int icrap = det > 0.00001f;
float invdet = 1.0f / det;
float3 tvec = sub(o,a);
float u = dot(p, tvec) * invdet;
float3 q = cross(tvec, e1);
float v = dot(q, d) * invdet;
float t = dot(q, e2) * invdet;
icrap = (u >= 0.0f)
&& (v >= 0.0f)
&& (u + v <= 1.0f)
&& (t < lastHit.z)
&& (t >= 0.0f)
&& (t > minT);
float4 ret;
ret.x = u;
ret.y = v;
ret.z = t;
ret.w = mat_index;
return (icrap ? ret : lastHit);
}
__device__ inline int getidx(){
return blockIdx.x * blockDim.x + threadIdx.x;
}
extern "C" __global__ void raytrace (float *hits, float4 *rays, float4 *tris, uint tricount)
{
uint idx = getidx() * 2;
uint outidx = getidx();
float4 o = rays[idx];
float4 d = rays[idx+1];
float4 hit;
hit.z = 1000.0f;
for (int off = 0; off < tricount; off += 1){
float4 a = tris[(off * 3)];
float4 b = tris[(off * 3)+1];
float4 c = tris[(off * 3)+2];
hit = Intersects(dto3(a), dto3(b), dto3(c), dto3(o), dto3(d), 0.0001, hit, 1);
}
if (hit.z != 1000.0) {
hits[outidx] = 0;
}
else {
hits[outidx] = 1;
}
}