once my card gets to the bolded cutilSafeCall( cudaThreadSynchronize() ); my card hangs. the display goes blank and comes up with a message on windows 7 saying my nvidia driver stopped and was recovered?
any thoughts?
p.s. incase it help my card is inno3D GTS250
[codebox]global void create_mask (float* map, int rows, int cols);
device float getMinDistance (char *edge_map, float *map, int i, int j, int rows, int cols, int step);
global void getDistanceMap(float *map, float dist_map, char edge_map, int rows, int cols, int step);
void distanceTransform_GPU(char *edge_map, float *distanceMap, int rows, int cols, int step){
dim3 dimBlock(16,16);
float *map, *dist_map_GPU;
unsigned int hTimer=0;
char *edge_map_GPU;
double gpuTime;
static int initial_call=1;
if (initial_call){
dim3 dimGrid((int)ceil((float)(2*rows-1)/(float)dimBlock.x),(int)ceil((float)(2*cols-1)/(float)dimBlock.y));
cudaSetDevice( cutGetMaxGflopsDeviceId() ); //intialise fastest available device
//allocate memory
cudaMalloc((void **)&map, (2*rows-1)*(2*cols-1)*sizeof(float)); //allocate memory for map on GPU
cudaMalloc((void **)&dist_map_GPU, rows*cols*sizeof(float)); //allocate memory for distance map on GPU
cudaMalloc((void **)&edge_map_GPU, rows*cols*sizeof(char)); //allocate memory for edge map on GPU
cutilCheckError( cutCreateTimer(&hTimer) );
cutilCheckError( cutResetTimer(hTimer) );
cutilCheckError( cutStartTimer(hTimer) );
cutilSafeCall( cudaThreadSynchronize() );
create_mask<<<dimGrid, dimBlock>>>(map, rows,cols);
cutilSafeCall( cudaThreadSynchronize() );
cutilCheckError( cutStopTimer(hTimer) );
gpuTime = cutGetTimerValue(hTimer);
printf("d_map () time : %f msec\n", gpuTime);
//cudaMemcpy(distanceMap,map, (2*rows-1)*(2*cols-1)*sizeof(float),cudaMemcpyDeviceToHost); //copy distance transform back to CPU
initial_call=0;
}
dim3 dimGrid((int)ceil((float)rows/(float)dimBlock.x),(int)ceil((float)cols/(float)dimBlock.y));
cudaMemcpy(edge_map_GPU,edge_map, rows*step*sizeof(char),cudaMemcpyHostToDevice); //copy edge information to GPU
cutilCheckError( cutResetTimer(hTimer) );
cutilCheckError( cutStartTimer(hTimer) );
cutilSafeCall( cudaThreadSynchronize() );
getDistanceMap<<<dimGrid, dimBlock>>>(map, dist_map_GPU, edge_map_GPU, rows, cols, step);
<b>cutilSafeCall( cudaThreadSynchronize() );</b>
cutilCheckError( cutStopTimer(hTimer) );
gpuTime = cutGetTimerValue(hTimer);
printf("final distance transform time : %f msec\n", gpuTime);
cudaMemcpy(distanceMap,dist_map_GPU, rows*cols*sizeof(float),cudaMemcpyDeviceToHost); //copy distance transform back to CPU
}
global void create_mask (float* map, signed int rows, signed int cols){
signed int i=blockIdx.x * blockDim.x + threadIdx.x;
signed int j=blockIdx.y * blockDim.y + threadIdx.y;
int map_rows=cols*2-1;
if (i<(2*rows-1) && j<(2*cols-1))
*(float *)(map+i*map_rows+j)=sqrtf(powf((float)(i-(rows-1)),2.0f)+powf((float)(j-(cols-1)),2.0f));
//*(float *)(map+i*map_rows+j)=sqrtf(abs((float)(i-(rows-1)))+abs((float)(j-(cols-1))));
}
global void getDistanceMap(float *map, float dist_map, char edge_map, int rows, int cols, int step){
int i=blockIdx.x * blockDim.x + threadIdx.x;
int j=blockIdx.y * blockDim.y + threadIdx.y;
if (i<rows && j<cols)
//*(float *)(dist_map+i*rows+j)=5.0;
*(float *)(dist_map+i*cols+j)=getMinDistance(edge_map, map, i, j, rows, cols, step);
}
device float getMinDistance (char *edge_map, float *map, int i, int j, int rows, int cols, int step){
int k=0, l=0;
int map_cols=cols*2-1;
float minval=LARGE_VAL, distance=LARGE_VAL;
for (k=0; k<rows; k++)
for (l=0; l<rows; l++)
if (*(edge_map+k*step+l)){ /*if there is an edge pixel at (k, l)*/
distance=*(float *)(map+(rows-i+k)*map_cols+(cols-j+l));/*take note of distance from current pixel to this edge pixel*/
minval=fminf(distance,minval);
}
return minval;
}[/codebox]