I know that many would have faced this problem before, but i could not find a solution from the previous threads.
I have a code that runs well on the emulation mode but on the device it does not work properly. I try to write intermediate values into the variable and later copy it onto the host memory . The problem i am facing is that some garbage value gets written into these variables whenever i call this function
extern __constant__ float var_[3];
extern __constant__ float mean[3];
extern __constant__ float variance[3];
extern __device__ int globalmemory[8];
__device__ void cuda_LocalEnergy(int* class_label_array,int* array_size,int* inSite, int* value, int* label,float* sum){
float betaInClass=-1;
float betaOutClass=4*(-1);
int pixels[27];
int num_ele=1;
int min_coord[3];
int max_coord[3];
int radius=1;
int curr_indx[3];
*sum = powf((float) *value - mean[*label],2)/(2.0*variance[*label]);
num_ele=1;
radius=floorf(3/2);
int bx=blockIdx.x;
float energy = 0.0;
for (int i=0; i < 3; i++){
min_coord[i] =inSite[i] - (radius);
max_coord[i] =inSite[i] + (radius);
if (min_coord[i] < 0){
max_coord[i] = max_coord[i] - min_coord[i];
min_coord[i] = 0;
if (max_coord[i] >= array_size[i])
max_coord[i] = array_size[i] - 1;
}else{
if (max_coord[i] >= array_size[i]){
min_coord[i] = min_coord[i] - (max_coord[i] - array_size[i]) - 1;
max_coord[i] = array_size[i] - 1;
if (min_coord[i] < 0)
min_coord[i] = 0;
}
}
num_ele = num_ele * (max_coord[i] - min_coord[i] + 1);
}
globalmemory[bx]= num_ele;
int indx=0;
for (curr_indx[0]=min_coord[0]; curr_indx[0]<=max_coord[0]; curr_indx[0]++)
for (curr_indx[1]=min_coord[1]; curr_indx[1]<=max_coord[1]; curr_indx[1]++)
for (curr_indx[2]=min_coord[2]; curr_indx[2]<=max_coord[2]; curr_indx[2]++){
cuda_getDataValue_c(curr_indx,class_label_array,array_size, 262144,&pixels[indx]);
indx++;
}
for (int i = 0; i < num_ele; i++){
if (*label == pixels[i])
energy -= betaInClass;
else
energy += betaOutClass;
}
*sum+= energy;
}
I am calling this function from a global function. I have 8 blocks with 1 thread each. SO function would be executed simultaneously by 8 threads. I check the intermediate values written into the globalmemory array for debugging. I get values like -1390041501, 1084243120 etc. And i get this same value in the global memory array irrespective of whatever i am doing after i call this function. I get this same garbage value. When does problems like these occur?
Any body faced problems similar to this? i have been sitting for this for quite sometime, the entire code is little big so i just wanted to the paste the problematic function.