As Nico said was a problem of synchronization and inserting a cudaSynchronizeThread(); after the kernel execution permits to the host to read the correct value.
But now I’ve a new problem:
[codebox]
void my_function(void){
int *h_value = NULL;
int *d_value = NULL;
int size=1024;
dim3 block(1,1);
dim3 grid(1,1);
cudaMalloc((void**)&d_input_data, size); //< This line makes kernel increment fail!
cudaSetDeviceFlags(cudaDeviceMapHost);
cudaHostAlloc((void**) &h_value, sizeof( int ) , cudaHostAllocMapped | cudaHostAllocPortable);
cudaHostGetDevicePointer( &d_value , h_value , 0 );
//init value on host
*h_value=0;
my_kernel<<<grid,block>>>( d_input_data , d_value );
printf("h_value = %d \n",*h_value);
}
[/codebox]
[codebox]global void ky_kernel( char* device_array, int *value )
{
*value=1;
}
[/codebox]
Simply adding a cudaMalloc() and a parameter to kernel the mapped variable cannot be modified (!!!) and h_values remains 0.