I use to my program a variable with cudaHostAllocMapped. At this variable into kernel write a num but after kenrel at host the variable hasn’t the num.
for example:
typedef struct{
float t; //time
int pos; //posistion (reaction)
}data_t;
__global__ mykernel (data_t * data)
{
data[0].t = 1.132f;
}
main()
{
...
data = (data_t *) malloc(sizeof(data_t));
cudaHostAlloc(&data, sizeof(data_t), cudaHostAllocMapped);
cudaHostGetDevicePointer(&d_data, data, 0);
for(i=0;i<2;i++)
{
mykernel<<<1,5>> (d_data);
printf("data[0].t = %f\n",data[0].t);
}
...
}
the result is
0
1.132
but it must be
1.132
1.132
Maybe I must sychronize but how?
sorry for my english
cudaThreadSynchronize() in this case.
the cudaThreadSychronize() i write it under kernel?
write it before you try reading the mapped memory. It will force the host thread to spinlock until the kernel is finished. Like this:
#include <stdio.h>
typedef struct{
float t; //time
int pos; //position (reaction)
}data_t;
__global__ void mykernel (data_t * data, int i)
{
data[0].t = 1.132f;
data[0].pos = i;
}
int main()
{
data_t * data, * d_data;
cudaHostAlloc(&data, sizeof(data_t), cudaHostAllocMapped);
cudaHostGetDevicePointer(&d_data, data, 0);
for(int i=0;i<2;i++)
{
mykernel<<<1,1>>>(d_data, i);
cudaThreadSynchronize();
printf("%d: data[0].t = %f\n",i,data[0].t);
printf("%d: data[0].pos = %d\n",i,data[0].pos);
}
return -cudaThreadExit();
}
avidday@cuda:~$ nvcc -arch=sm_20 mappe.cu -o mappe
avidday@cuda:~$ ./mappe
0: data[0].t = 1.132000
0: data[0].pos = 0
1: data[0].t = 1.132000
1: data[0].pos = 1