Good Evening,
So, I’m trying compile some code and for some odd reason, using a printf() statement in my kernel causes the compiler to die. I googled my error and apparently this is indicative of a compiler bug which I find interesting. I’m not sure if it is or not.
But I’m getting this output from nvcc :
make
nvcc -O3 -lstdc++ -rdc=true -gencode arch=compute_50,code=sm_50 -c tetra.cu
nvcc error : 'ptxas' died due to signal 11 (Invalid memory reference)
nvcc error : 'ptxas' core dumped
Makefile:18: recipe for target 'tetra.o' failed
make: *** [tetra.o] Error 139
And this is the kernel that is causing it all :
typedef float real;
const int tpb = 256; // threads per block
const int bpg = 512; // blocks per grid
struct point
{
// Use a union as hybrid storage
union
{
struct
{
real x, y, z;
};
real p[3];
};
__host__ __device__
point(real a, real b, real c) : x(a), y(b), z(c) { };
__host__ __device__
void print(void) const
{
printf("(%.00f, %.00f, %.00f)\n", x, y, z);
};
};
struct tetrahedron
{
int v[4]; // list of vertices in point buffer
__host__ __device__
tetrahedron(int v0, int v1, int v2, int v3)
{
v[0] = v0; v[1] = v1; v[2] = v2; v[3] = v3;
};
};
__global__
void fracture(const int n,
const unsigned char *location_code,
tetrahedron *tetrahedra,
const int *tet_index,
const int *insertion_marked,
const int num_tetrahedra,
const int *pt_index)
{
const int thread_num = threadIdx.x + blockIdx.x * blockDim.x;
for (int tid = thread_num; tid < n; tid += blockDim.x * gridDim.x)
{
const unsigned char loc = location_code[tid];
if (loc != 0)
{
// To fracture, we need the tetrahedron for its face data
const tetrahedron t = tetrahedra[tet_index[tid]];
const int faces[4][3] = { { t.v[3], t.v[2], t.v[1] },
{ t.v[0], t.v[2], t.v[3] },
{ t.v[0], t.v[3], t.v[1] },
{ t.v[0], t.v[1], t.v[2] }
};
tetrahedron *address = tetrahedra + tet_index[tid];
int pos = 0;
for (int i = 0; i < 4; ++i)
{
if (loc & (1 << i))
{
const tetrahedron *tmp =
new(address) tetrahedron(faces[i][0],
faces[i][1],
faces[i][2],
pt_index[tid]);
address = tetrahedra + num_tetrahedra
+ insertion_marked[i] + pos;
++pos;
// This is the print that causes the compiler to die.
//printf("%d, %d, %d, %d\n", tmp->v[0], tmp->v[1], tmp->v[2], tmp->v[3]);
}
}
}
}
}
Edit :
Apparently, this code will work though :
if (loc & (1 << i))
{
const tetrahedron *tmp =
new(address) tetrahedron(faces[i][0],
faces[i][1],
faces[i][2],
pt_index[tid]);
printf("%u\n", (unsigned long ) tmp);
address = tetrahedra + num_tetrahedra
+ insertion_marked[i] + pos;
++pos;
//printf("%d, %d, %d, %d\n", tmp->v[0], tmp->v[1], tmp->v[2], tmp->v[3]);
}
Output :
30932992
30933008
30933024