Is this a race condition error?

Hey guys, I have some code that seems to be exhibiting behavior of a race condition…

Here are snippets of my code :

// Here's how we call the kernels in my host function...
    cudaEvent_t start, stop;
    cudaEventCreate( &start );
    cudaEventCreate( &stop );

    cudaEventRecord( start );

    is_degen<<< blocks, tpb >>>( points, mesh, dist, np, preds );
    compare_min<<< blocks, tpb >>>( mesh, points, dist, np );
    cudaDeviceSynchronize();

    cudaEventRecord( stop );

    cudaEventSynchronize( stop );

// These are the kernels. Basically, this first one isn't printing all the time.

__global__
void compare_min( tetra *tetras, point *points, float *dist, int n )
{
    int i = threadIdx.x + blockIdx.x * blockDim.x;

    if (i < n)
    {
        tetra *t = tetras + ( points + 4 + i )->host;

        t->min_dist = IFloatFlip( t->min_dist );

        union
        {
            float flt_dist;
            unsigned uint_dist;
        };

        uint_dist = t->min_dist;

        if ( dist[i] == flt_dist)
        {
            printf( "%d : %f\n", i, dist[i] );
        }
    }
}

__global__
void is_degen( point *points, tetra *mesh, float *dist, unsigned n, PredicateInfo *preds )
{
    int i = threadIdx.x + blockIdx.x * blockDim.x;

    if (i < n)
    { 
        tetra *t = mesh + ( points + 4 + i )->host;

        float *a = &( ( points + t->p[0] )->x );
        float *b = &( ( points + t->p[1] )->x );
        float *c = &( ( points + t->p[2] )->x );
        float *d = &( ( points + t->p[3] )->x );

        float *e = &( ( points + i + 4 )->x );

        float u[3] = { b[0] - a[0], b[1] - a[1], b[2] - a[2] };
        float v[3] = { c[0] - a[0], c[1] - a[1], c[2] - a[2] };
        float w[3] = { d[0] - a[0], d[1] - a[1], d[2] - a[2] };

        float A[9];

        #pragma unroll
        for ( int j = 0; j < 3; ++j )
        {
            int offset = 3 * j;

            A[offset + 0] = u[j];
            A[offset + 1] = v[j];
            A[offset + 2] = w[j];
        }

        three_by_three_inverse( A );

        float B[3] = { e[0] - a[0], e[1] - a[1], e[2] - a[2] };

        float x[3] = { A[0] * B[0] + A[1] * B[1] + A[2] * B[2], 
                       A[3] * B[0] + A[4] * B[1] + A[5] * B[2],
                       A[6] * B[0] + A[7] * B[1] + A[8] * B[2]
                     };

        if ( x[0] > 0 && x[1] > 0 && x[2] > 0 && 
            x[0] + x[1] + x[2] < 1 )
        {
            dist[i] = insphere( *preds, a, b, c, d, e);
        }
        else
        {
            dist[i] = FLT_MAX; // degenerate
        }

        atomicMin( &( t->min_dist ), FloatFlip( ( unsigned& ) dist[i] ) );
    }  
}

So what’s going on is, when I’m running my code on like a million points, the compare_min kernel will sometimes print the actual distance(s). Emphasis is on sometimes. Where in my code could there be a race condition? I even used cuda-memcheck --tool racecheck to no avail!

Lol I totally found it. There really was a race condition!

I changed to this :

uint_dist = IFloatFlip( t->min_dist );

and now my code seems to be behaving normally. Lol my bad.