Lets say I have a kernel as shown below:
__global__ void array_initializer_test
(
cudaPitchedPtr outvol_pitchptr,
cudaExtent out_vol_extent,
data_t* projection_ptr, MatrixDim_t projection_pitch,
data_t* pmat_data, int nu, int nv
)
{
int x = threadIdx.x;
int y = blockIdx.x;
int z = blockIdx.y;
char* devPtr = (char*) outvol_pitchptr.ptr;
size_t pitch = outvol_pitchptr.pitch;
size_t slicePitch = pitch * out_vol_extent.height;
char* slice = devPtr + z * slicePitch;
data_t* row = (data_t*)(slice + y * pitch);
#ifdef USE_ARRAY_INITIALIZER
data_t v_data[4] = {x, y, z, 1};
#else if
data_t v_data[4];
v_data[0]=x;
v_data[1]=y;
v_data[2]=z;
v_data[3]=1;
#endif
row[x] = v_data[0];
}
and call it with
dim3 threadsPerBlock( nx, 1, 1 );
dim3 numBlocks( ny, nz, 1);
// perform a test for blocks and threads per block
array_initializer_test<<<numBlocks, threadsPerBlock>>>(outVolumeDevPitchedPtr, outVolumeDevExtent, projection_ptr, projection_pitch, p_mat_device_ptr, nu, nv );
then I #define #ifdef USE_ARRAY_INITIALIZER to use array initialization “data_t v_data[4] = {x, y, z, 1};” and find one of these is not like the other! It appears to work in some executed threads as the volume I get back is striped in that some of the threads were able to set the value correctly… some were not. Can I get an answer from NVIDIA as to whether or not array initializers should/do work with nvcc-3.1. My experience is that … they do not.