Hi,
I am having a strange problem with this kernel:
const unsigned int BLOCK_SIZE=64;
__global__ void determineCombinedSteering_kernel (
const unsigned int GLOBAL_ARRAY_SIZE,
const float3* const vehicle_forward,
float3* const result
)
{
// the index to the data in the global data field
const unsigned int index = BLOCK_SIZE * blockIdx.x + threadIdx.x;
const float3 local_vehicle_position = vehicle_forward[index];
const int LOCAL_ARRAY_SIZE = 2;
float3 neighbor_position[LOCAL_ARRAY_SIZE];
neighbor_position[0] = make_float3(0.0f, 0.0f, 0.0f);
int j=0;
for (int i=0; i<GLOBAL_ARRAY_SIZE; ++i) {
if ( j < LOCAL_ARRAY_SIZE ) {
neighbor_position[j] = vehicle_forward[i];
++j;
}
}
result[index] = neighbor_position[0];
}
The execution of this kernel fails with the error message “unspecified launch failure”. I have no idea why this is happening. :( As far as I have understood cuda this code should execute correctly.
If I change the loop to
for (int i=0; i<LOCAL_ARRAY_SIZE; ++i) {
neighbor_position[i] = vehicle_forward[i];
}
the kernel executes without a problem.
Can anyone tell me why the kernel crashes?
-Jens
P.S. GLOBAL_ARRAY_SIZE > LOCAL_ARRAY_SIZE