i have a cuda program and i can’t find any problem with it but the result is so strange and i don’t know the reason or if it is a bug? the result should be printf the flag is 2, but always printf the value of flag is 0. however, if i add the statement printf(“result=%d\n”,*result), the value of the flag will be correct,it’s so strange and i want to know why.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda.h"
#include <stdio.h>
struct S0 {
int x;
int t;
int *y;
int flag;
};
__device__ void func_5(struct S0 * p_1)
{
if ((p_1->x < ((*p_1->y) = ((10 != p_1->x) - 22) ))){
p_1->flag = 1;
}
else
{
p_1->flag = 2;
}
}
__global__ void entry(int *result ) {
int i = blockIdx.x*blockDim.x+threadIdx.x;
int j = blockIdx.y*blockDim.y+threadIdx.y;
int k = blockIdx.z*blockDim.z+threadIdx.z;
struct S0 c_1625;
struct S0* p_1 = &c_1625;
struct S0 c_1626 = {
0L, // p_1->x
2L,//p_1->t
&(p_1->t),//p_1->y
10,//p_1->flag
};
c_1625 = c_1626;
__syncthreads();
func_5(p_1);
__syncthreads();
if(i == 0 && j==0 && k==0){
*result = p_1->flag;
// printf("result=%d\n",*result);//add this statement the result willbe correct
}
}
int main()
{
int *h_result;
h_result = (int*)malloc(sizeof(int));
int *d_result;
size_t result_size = sizeof(int);
cudaMalloc((void**)&d_result,result_size);
dim3 dimGrid(2,2);
dim3 dimBlock(2,2,2);
entry<<<dimGrid,dimBlock>>>(d_result);
cudaMemcpy(h_result,d_result,result_size,cudaMemcpyDeviceToHost);
printf("flag is %d\n",*h_result);
free(h_result);
cudaFree(d_result);
return 0;
}