Hi,
The following code exits with “unspecified launch failure” on winXP w/ both cuda 2.2 and 2.3
[codebox]
#include <cuda.h>
#include <cutil_inline.h>
#define NUM_ELEMENTS 8
// 0-112 : no crash
// 113-199 : crash
// 200 : no crash
// 201-231: crash
struct KernelParams
{
bool junk[133];
bool write_two;
};
struct KernelData
{
float* d_buf;
};
global void kernel(KernelParams kp, KernelData kd)
{
const int loc = blockDim.x * blockIdx.x + threadIdx.x;
if (loc >= NUM_ELEMENTS)
return;
kd.d_buf[loc] = 1;
if (kp.write_two)
{
kd.d_buf[loc] = 2;
}
}
int main(int argc, char** argv)
{
// Structs to be passed to the kernel
KernelParams kp;
KernelData kd;
std::cout << "Size of parameters = " << (sizeof(kp) + sizeof(kd)) << " bytes\n" << std::endl;
kp.write_two = true;
// Allocate device output array
int size = NUM_ELEMENTS*sizeof(float);
cutilSafeCall(cudaMalloc((void**)&kd.d_buf, size));
// Memset to 0
cutilSafeCall(cudaMemset(kd.d_buf, 0, size));
cutilSafeCall(cudaThreadSynchronize());
// Launch kernel
dim3 threadBlock(16,1);
dim3 blockGrid(1,1);
kernel<<<blockGrid, threadBlock>>>(kp, kd);
cudaThreadSynchronize();
cutilCheckMsg("kernel execution failed\n");
// Print results
float* h_buf = new float;
cutilSafeCall(cudaMemcpy(h_buf, kd.d_buf, size, cudaMemcpyDeviceToHost));
for (int i = 0; i < NUM_ELEMENTS; i++)
std::cout << h_buf[i] << " ";
std::cout << std::endl;
// Clean up
cutilSafeCall(cudaFree(kd.d_buf));
}
[/codebox]
I don’t see anything wrong with the code (has been isolated from a larger cuda program).
There are several ways to make it not crash:
-
In the KernelParams struct, change array length of .junk to anywhere between 1-112 or 200. Anything else causes it to crash (didn’t try exhaustively, but took a good number of samples)
-
In the KernelParams struct, change the ‘bool write_two’ to int
-
In the KernelParams struct, change the ‘bool junk’ array to an int array (reduce size to fit within the 256 byte limit). (However adding another bool between this int array and the bool write_two will cause a crash)
-
In the kernel function, comment out either write to kd.d_buf.
1,2,3 suggest something to do with alignment, however commenting out the first write to kd.d_buf in the kernel (without changing the KernelParams struct) also works which has nothing to do with alignment.
Is this a bug, or am I missing something?