problem with mappe memory

I use to my program a variable with cudaHostAllocMapped. At this variable into kernel write a num but after kenrel at host the variable hasn’t the num.

for example:

typedef struct{

    float t;	//time

    int pos;	//posistion (reaction)

}data_t;

__global__ mykernel (data_t * data)

{

  data[0].t = 1.132f;

}

main()

{

  ...

  data = (data_t *) malloc(sizeof(data_t));

  cudaHostAlloc(&data, sizeof(data_t), cudaHostAllocMapped);

  cudaHostGetDevicePointer(&d_data, data, 0);

for(i=0;i<2;i++)

  {

    mykernel<<<1,5>> (d_data);

printf("data[0].t = %f\n",data[0].t);

  }

...

}

the result is

0

1.132

but it must be

1.132

1.132

Maybe I must sychronize but how?

sorry for my english

cudaThreadSynchronize() in this case.

the cudaThreadSychronize() i write it under kernel?

write it before you try reading the mapped memory. It will force the host thread to spinlock until the kernel is finished. Like this:

#include <stdio.h>

typedef struct{

    float t;    //time

    int pos;    //position (reaction)

}data_t;

__global__ void mykernel (data_t * data, int i)

{

  data[0].t = 1.132f;

  data[0].pos = i;

}

int main()

{

  data_t * data,  * d_data;

  cudaHostAlloc(&data, sizeof(data_t), cudaHostAllocMapped);

  cudaHostGetDevicePointer(&d_data, data, 0);

for(int i=0;i<2;i++)

  {

    mykernel<<<1,1>>>(d_data, i);

    cudaThreadSynchronize();

printf("%d: data[0].t = %f\n",i,data[0].t);

    printf("%d: data[0].pos = %d\n",i,data[0].pos);

  }

return -cudaThreadExit();

}
avidday@cuda:~$ nvcc -arch=sm_20 mappe.cu -o mappe

avidday@cuda:~$ ./mappe 

0: data[0].t = 1.132000

0: data[0].pos = 0

1: data[0].t = 1.132000

1: data[0].pos = 1