Hey guys, I am a new newbie… Could you give some ideas about this kernel? I wanted to optimize it but didn’t know what to do…
global void kernel(… projection p, float step_size,
float sample_spacing, float pos) {
y = UMAD(blockIdx.x, blockDim.x, threadIdx.x);
z = UMAD(blockIdx.y, blockDim.y, threadIdx.y);
i = z*p.det.projydim + y;
…
num_steps = …;
for(int j=0;j<num_steps;j++) {
sum+=tex3D(project_tex, pos.x, pos.y, pos.z);
pos+=dx;
}
d_proj [ i ] =sumstep_sizesample_spacing;
}
thanks a lot