a stange result with cuda program

i have a cuda program and i can’t find any problem with it but the result is so strange and i don’t know the reason or if it is a bug? the result should be printf the flag is 2, but always printf the value of flag is 0. however, if i add the statement printf(“result=%d\n”,*result), the value of the flag will be correct,it’s so strange and i want to know why.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda.h"
#include <stdio.h> 
struct S0 {

    int x;
    int t;
    int *y;
    int flag;
};

__device__ void func_5(struct S0 * p_1)
{ 
    if ((p_1->x < ((*p_1->y) = ((10 != p_1->x) - 22) ))){
        p_1->flag = 1;
    }
    else
    {  
       p_1->flag = 2;
    }

 }


 __global__ void entry(int *result ) {

    int i = blockIdx.x*blockDim.x+threadIdx.x;
    int j = blockIdx.y*blockDim.y+threadIdx.y;
    int k = blockIdx.z*blockDim.z+threadIdx.z;
    struct S0 c_1625;
    struct S0* p_1 = &c_1625;


    struct S0 c_1626 = {
        0L, // p_1->x
        2L,//p_1->t
        &(p_1->t),//p_1->y
        10,//p_1->flag
    };
    c_1625 = c_1626;
    __syncthreads();
    func_5(p_1);
    __syncthreads();
    if(i == 0 && j==0 && k==0){
       *result = p_1->flag;
     //  printf("result=%d\n",*result);//add this statement the result willbe correct
    }
}
int main()
{

   int *h_result;
   h_result = (int*)malloc(sizeof(int));
   int *d_result;
   size_t result_size = sizeof(int);
   cudaMalloc((void**)&d_result,result_size);
   dim3 dimGrid(2,2);
   dim3 dimBlock(2,2,2);

   entry<<<dimGrid,dimBlock>>>(d_result);

   cudaMemcpy(h_result,d_result,result_size,cudaMemcpyDeviceToHost);

   printf("flag is %d\n",*h_result);
   free(h_result);
   cudaFree(d_result);
   return 0;
}

as mentioned in a comment on your cross-posting:

https://stackoverflow.com/questions/50541540/a-strange-cuda-program-can-someone-tell-me-the-reason
this appears to be a compiler defect. You might want to file a bug at developer.nvidia.com