Experimenting with __device__variables, the following code should set an int array on the host to hold values 0,…,9, then the kernel should set the device array to 0, which is then copied to the host array, but the host array is unchanged. Why does this not work?
#define N 10
#include <stdio.h>
device int device_array[N];
int size;
global void device_func();
main()
{
int host_array[N];
int i;
for(i=0 ; i<N ; i++) host_array[i] = i;
size = N*sizeof(int);
for(i=0 ; i<N ; i++) printf("\n%i\t%i",i,host_array[i]);
dim3 dimGrid(1,1); //dimGrid.x*dimGrid.y = number of blocks being used
dim3 dimBlock(N,1,1); //dimBlock.x*dimBlock.y*dimBlock.z = number of threads being used
// Launch the device computation
device_func<<<dimGrid, dimBlock>>>();
cudaMemcpy(host_array, device_array, size, cudaMemcpyDeviceToHost);
for(i=0 ; i<N ; i++) printf("\n%i\t%i",i,host_array[i]);
}
//////////////////////////////
global void device_func()
{
int i,id;
id = threadIdx.x;
if(id==0)
{
for(i=0 ; i<N ; i++) device_array[i] = id;
}
}