Greetings!

I have written this code but it has different output every time I run it. I believe something is wrong with the way I find the max value.

Can anybody please help me, letting me know what is the efficient method for finding the max. and generally what are the things that would improve my code to be faster?

Thanks so much!

```
__device__ float floor_exp(float x) {
return (x < -708.3f) ? 0.0 : exp(x);
}
__device__ const int nCom=50;
__device__ float maxVal=-0.5E10;
__device__ float tmp[60][50];
__device__ void findMax (int n){
for (int i=0;i<n;i++){
for (int j=0;j<nCom;j++)
if (tmp[i][j]> maxVal) maxVal=tmp[i][j];
}
}
__global__ void lowerBound(float* ref_GPU,float* test_GPU, uttSeg* result_GPU, int refSize){
int x= threadIdx.x;
int y=threadIdx.y;
result_GPU[blockIdx.x].start=blockIdx.x;
result_GPU[blockIdx.x].lb=0;
tmp[x][y]=ref_GPU[x*nCom+y] + test_GPU[(x+blockIdx.x)*nCom+y];
__syncthreads();
findMax(refSize);
tmp[x][y]=tmp[x][y]-maxVal;
result_GPU[blockIdx.x].lb+= log(floor_exp(tmp[x][y]))+maxVal;
}
```