Hello,
In advance, sorry for my English.
I’m having strange problems with a kernel I’m trying to do for a Photon Mapper. Every time I try to access a device variable in the kernel (either static or dynamic), the program simply freezes the system. Unfortunately, I only own a video card to display and the CUDA (a GeForce 9800 GTX) and is hard to debug a program in this way. I’m using a Windows 7 Ultimate and CUDA Toolkit 3.0 and the new SDK (both 32-bit). The driver installed is 195.39.
Now, some code:
[codebox]global void photonmap( const int number_of_triangles,
const float4 light_pos,
const float4 light_color,
HostPhoton * out_data)
{
unsigned int index = (blockDim.x * blockIdx.x) + threadIdx.x ;
unsigned long seed = index;
int photon_depth = 0;
float4 p_dir;
p_dir.z = 1;
do{
p_dir.x = 2*random(&seed) - 1;
p_dir.y = 2*random(&seed) - 1;
p_dir.z = 2*random(&seed) - 1;
}while((p_dir.x*p_dir.x + p_dir.y*p_dir.y + p_dir.z*p_dir.z) > 1);
__syncthreads(); // Sincroniza os threads
Photon p(light_pos, p_dir, light_color); // Criar fóton
PhotonHitRecord hit_p; //Criar estrutura de gerenciamento do loop
float4 v0;
float4 e1;
float4 e2;
for(photon_depth = 0; photon_depth < DEPTH; photon_depth++)
{
// search through the triangles and find the nearest hit point
for(int i = 0; i < number_of_triangles; i++)
{
v0 = tex1Dfetch(triangle_texture,i*3);
e1 = tex1Dfetch(triangle_texture,i*3+1);
e2 = tex1Dfetch(triangle_texture,i*3+2);
float t = PhotonTriangleIntersection(p, make_float3(v0.x,v0.y,v0.z),
make_float3(e1.x,e1.y,e1.z),
make_float3(e2.x,e2.y,e2.z));
if(t < hit_p.t && t > 0.001)
{
hit_p.t = t;
hit_p.hit_index = i;
}
}
__syncthreads();
if(hit_p.hit_index < 0){
out_data[index+photon_depth].position = make_float4(0.0f,0.0f,0.0f,0.0f); // This variable freezes the system
out_data[index+photon_depth].direction = make_float4(0.0f,0.0f,0.0f,0.0f); // This variable freezes the system
out_data[index+photon_depth].power = make_float4(0.0f,0.0f,0.0f,0.0f); // This variable freezes the system
photon_counter++; // This variable freezes the system (static device variable)
if(index == 1) photon_counter++;
}else{
// Cria uma normal
hit_p.normal = cross(make_float3(e1.x,e1.y,e1.z), make_float3(e2.x,e2.y,e2.z));
hit_p.normal = normalize(hit_p.normal);
float4 hitpoint = p.pos + p.dir * hit_p.t;
float3 L = make_float3(light_pos.x - hitpoint.x,light_pos.y - hitpoint.y, light_pos.z - hitpoint.z);
float dist_to_light = length(L);
L = normalize(L);
float roulette = random(&seed);
if( roulette <= DIFF){
out_data[index+photon_depth].position = p.pos; // This variable freezes the system
out_data[index+photon_depth].direction = p.dir; // This variable freezes the system
out_data[index+photon_depth].power = p.power; // This variable freezes the system
float4 reflection = make_float4(0.0f,0.0f,0.0f,0.0f);
float r1 = random(&seed);
float r2 = random(&seed);
reflection.x = __cosf(2.0f*PI*r1)*__fsqrt_rn(1-__powf(r2,(2.0f/EXPO+1.0f)));
reflection.y = __sinf(2.0f*PI*r1)*__fsqrt_rn(1-__powf(r2,(2.0f/EXPO+1.0f)));
reflection.z = __fsqrt_rn(__powf(r2,(1.0f/EXPO+1.0f)));
reflection.w = 0.0f;
hit_p.resetT();
p = Photon(hitpoint, reflection, light_color);
} else if(roulette > DIFF && roulette <= (DIFF + SPEC)){
hit_p.resetT();
float3 reflected = reflect(make_float3(p.dir.x,p.dir.y,p.dir.z),hit_p.normal);
p = Photon(hitpoint,
make_float4(reflected.x,reflected.y,reflected.z,0.0f),
light_color);
} else{
}
}
}
}[/codebox]
I discovered the location of this problem by isolating parts of the code and compiling the rest separately until you find what freezes the system. Although I have come to this conclusion, I’m not “the best programmer” and I still a beginner when it comes to programming using CUDA. So, feel free to correct my logic.
I’m really perplexed by this strange behavior of the program.