The following works on C870 in XP but not in Linux.

#define N 4

#include <stdio.h>

**global** void d_arrayinit(unsigned int* x);

int main()

{

unsigned int i;

unsigned int* d_x;

unsigned int x[N];

```
for(i=0 ; i<N ; i++) x[i] = 0;
cudaMalloc((void**)&d_x,N*sizeof(unsigned int));
cudaMemcpy(d_x,x,N*sizeof(unsigned int),cudaMemcpyHostToDevice);
d_arrayinit<<<1,N>>>(d_x);
cudaMemcpy(x,d_x,N*sizeof(unsigned int),cudaMemcpyDeviceToHost);
cudaFree(d_x);
for(i=0 ; i<N ; i++) printf("\n%i",x[i]);
printf("\n\n");
return 0;
```

}

////////////////////////////////

**global** void d_arrayinit(unsigned int* x)

{

unsigned int i = blockIdx.x*blockDim.x+threadIdx.x;

```
x[i] = i;
```

}

What reasons could there be for this?