This is my first cuda project. The following kernel code gives me a large number of warnings like:
/tmp/tmpxft_00004159_00000000-7_cvhaar_kernel.cpp3.i(270): Advisory: Cannot tell what pointer points to, assuming global memory space
(though yes indeed I keep using the global memory,) and then
nvcc error : ‘ptxas’ died due to signal 11 (Invalid memory reference)
make: *** [cvhaar_kernel.o] Error 255
I tried changing to shorter structure names but that didn’t help. Anybody has any insight on the possible bugs in my code or it might be a nvcc bug?
I use CentOS 5 64bit, cuda 2.0. I didn’t post the complete code because it’s a bit long and also you need to compile with opencv library installed. But don’t hesitate to ask for the codes. Thank you!
#define calc_sum(rect,offset) \
  ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
__device__ double
Eval( Cl* classifier, double factor, size_t p_offset )
{
int idx;
idx= 0;
do
{
N* node;
node = classifier->node + idx;
double t;
t = node->threshold * factor;
double sum;
sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
if( node->feature.rect[2].p0 )
sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
idx = sum < t ? node->left : node->right;
}
while( idx > 0 );
return classifier->alpha[-idx];
}
__device__ int Run( Ca* _cascade,
CvPoint pt, int start )
{
int result;
result = -1;
int p_offset, pq_offset;
int i, j;
double mean, factor;
H* cascade;
cascade = _cascade->hid;
if( pt.x < 0 || pt.y < 0 ||
pt.x + _cascade->real.width >= cascade->sum.width-2 ||
pt.y + _cascade->real.height >= cascade->sum.height-2 )
// EXIT;
return -1;
p_offset = pt.y * (cascade->sum.step/sizeof(sumtype)) + pt.x;
pq_offset = pt.y * (cascade->sqsum.step/sizeof(sqsumtype)) + pt.x;
mean = calc_sum(*cascade,p_offset)*cascade->inv;
factor = cascade->pq0[pq_offset] - cascade->pq1[pq_offset] -
cascade->pq2[pq_offset] + cascade->pq3[pq_offset];
factor = factor*cascade->inv - mean*mean;
if( factor >= 0. )
factor = sqrt(factor);
else
factor = 1.;
if( cascade->is)
{
S* ptr;
//assert( start_stage == 0 );
result = 1;
ptr = cascade->stage;
//if(ptr)
while( ptr )
{
double stage_sum;
stage_sum = 0;
for( j = 0; j < ptr->count; j++ )
{
stage_sum += Eval( ptr->classifier + j, factor, p_offset );
}
if( stage_sum >= ptr->threshold )
{
ptr = ptr->child;
}
else
{
while( ptr && ptr->next == NULL ) ptr = ptr->parent;
if( ptr == NULL )
{
result = 0;
return result;
}
ptr = ptr->next;
}
}
}
result = 1;
return result;
}