[codebox]host code:
texture<int, 1, cudaReadModeElementType> texRef;
#define LEN 256
int *h_ind2, *d_ind2;
int *h_iscommon, *d_iscommon;
int i;
h_ind2 = (int *)malloc(LEN * sizeof(int));
for (i = 0; i < LEN; ++i)
{
h_ind2[i] = 1000;
}
cudaMalloc((void **)&d_ind2, LEN * sizeof(int));
cudaMemcpy(d_ind2, h_ind2, LEN * sizeof(int), cudaMemcpyHostToDevice);
cudaBindTexture(0, texRef, d_ind2);
h_iscommon = (int *)malloc(LEN * sizeof(int));
cudaMalloc((void **)&d_iscommon, LEN * sizeof(int));
…
//after kenerl innovaction
cudaMemcpy(h_iscommon, d_iscommon, LEN * sizeof(int), cudaMemcpyDeviceToHost);
for(i = 0; i < LEN; ++i)
{
printf(“%d %d\n”, i, h_iscommon[i]);
}
cudaUnbindTexture(texRef);
cudaFree(d_ind2):
cudaFree(d_iscommon);
Free(h_ind2);
Free(h_iscommon);
device code:
int i;
for (i = 0; i < LEN; ++i)
{
d_iscommon[i] = tex1Dfetch(texRef, threadIdx.x);
}
[/codebox]
it seems that for i = 0, h_iscommon[0] = h_ind2[0] = 1000 and for i >= 1, h_iscommon[i] = 0. Why is that?