I’m new to study CUDA, and before these days, I used HLSL and this code worked well using HLSL, but when I changed it to CUDA, I met serival strange errors.

Can anyone help me?

the following function is the first function that failed, it’s something like raycasting, but I want store the attenuation of the rays into 16 DCT coefficient. I check the depth by changes it to a image and render it. I find when I remove any three statements of the four in function caldctcoeffs() it will render correctly, otherwise, it will be all dark (all zero), even I set any non-zero value at the end of function attenMap_kernel(), it’s still dark.

// this is the function failed

**global**

void attenMap_kernel(float* depth_map, float* near_map, float* steplen_map, float* dct_coeffs_map1)

{

uint x = d_map_xoffset + blockIdx.x*blockDim.x + threadIdx.x;
uint y = d_map_yoffset + blockIdx.y*blockDim.y + threadIdx.y;

if ((x >= mapWidth) || (y >= mapHeight)) return;

```
float u = x - mapWidth / 2.0f;
float v = mapHeight / 2.0f - y;
Ray eyeRay;
eyeRay.o = lightPos;
eyeRay.d = normalize(make_float3(u, v, -mapFocal));
eyeRay.d = mul(light_invMatrix,eyeRay.d);
float tnear, tfar;
int hit = intersectBox(eyeRay, d_box_min, d_box_max, &tnear, &tfar);
if (!hit) return;
if (tnear < 0.0f) tnear = 0.0f; // clamp to near plane
float tstep = (tfar-tnear)/attenSteps;
float3 pos = eyeRay.o + eyeRay.d*tnear;
float3 step = eyeRay.d*tstep;
float depth = tnear;
float4 outcoeff1 = make_float4(0.0f,0.0f,0.0f,0.0f);
float4 outcoeff2 = make_float4(0.0f,0.0f,0.0f,0.0f);
float4 outcoeff3 = make_float4(0.0f,0.0f,0.0f,0.0f);
float4 outcoeff4 = make_float4(0.0f,0.0f,0.0f,0.0f);
float3 coord;
float tmp_atten = 1.0f;
const float pi = 3.1415926535f;
for(int i = 0;i < attenSteps;i++)
{
coord = (pos-d_box_min)/d_box_size;
uint occupyed = (uint)tex3D(tex_occupy,coord.x,coord.y,coord.z);
if( occupyed > 127)
break;
float angle = (2.0f*(float)i+1.0f)*pi/(2.0f*(float)attenSteps);
float dat = tex3D(tex_data,coord.x,coord.y,coord.z);
tmp_atten = tmp_atten * expf(-dat*tstep*dat);
float dis = plattcoeffa + plattcoeffb * depth;
float att_intensity = tmp_atten / (dis*dis);
pos = pos + step;
depth = depth + tstep;
caldctcoeffs(&outcoeff1,&outcoeff2,&outcoeff3,&outcoeff4,att_intensity,angle);
}
depth_map[y*mapWidth+x] = (depth - 3.0f*1.732f) * 0.3f;
near_map[y*mapWidth+x] = tnear;
steplen_map[y*mapWidth+x] = tstep;
uint map_size = mapWidth*mapHeight;
dct_coeffs_map1[y*mapWidth+x ] = outcoeff1.x;
dct_coeffs_map1[y*mapWidth+x+map_size ] = outcoeff1.y;
dct_coeffs_map1[y*mapWidth+x+map_size*2] = outcoeff1.z;
dct_coeffs_map1[y*mapWidth+x+map_size*3] = outcoeff1.w;
dct_coeffs_map1[y*mapWidth+x+map_size*4] = outcoeff2.x;
dct_coeffs_map1[y*mapWidth+x+map_size*5] = outcoeff2.y;
dct_coeffs_map1[y*mapWidth+x+map_size*6] = outcoeff2.z;
dct_coeffs_map1[y*mapWidth+x+map_size*7] = outcoeff2.w;
dct_coeffs_map1[y*mapWidth+x+map_size*8] = outcoeff3.x;
dct_coeffs_map1[y*mapWidth+x+map_size*9] = outcoeff3.y;
dct_coeffs_map1[y*mapWidth+x+map_size*10] = outcoeff3.z;
dct_coeffs_map1[y*mapWidth+x+map_size*11] = outcoeff3.w;
dct_coeffs_map1[y*mapWidth+x+map_size*12] = outcoeff4.x;
dct_coeffs_map1[y*mapWidth+x+map_size*13] = outcoeff4.y;
dct_coeffs_map1[y*mapWidth+x+map_size*14] = outcoeff4.z;
dct_coeffs_map1[y*mapWidth+x+map_size*15] = outcoeff4.w;
```

}

// remove any three statements of this function, it will work

inline **device**

void caldctcoeffs(float4 * coeff1, float4* coeff2, float4 * coeff3, float4* coeff4,

float att_intensity,float angle)

{

*coeff1 += att_intensity * make_float4(
u0 * cosf(0.0f),
u1 * cosf(angle),
u1 * cosf(2.0f*angle),

u1 * cosf(3.0f

*angle));*

cosf(4.0fangle),

*coeff2 += att_intensity * u1*make_float4(cosf(4.0f

cosf(5.0f

*angle),*

cosf(6.0fangle),

cosf(6.0f

cosf(7.0f

*angle));*

cosf(8.0fangle),

*coeff3 += att_intensity * u1*make_float4(cosf(8.0f

cosf(9.0f

*angle),*

cosf(10.0fangle),

cosf(10.0f

cosf(11.0f

*angle));*

cosf(12.0fangle),

*coeff4 += att_intensity * u1*make_float4(cosf(12.0f

cosf(13.0f

*angle),*

cosf(14.0fangle),

cosf(14.0f

cosf(15.0f*angle));

}