Hi,
I have studied the Coalescing access ,but i can’t understand.can you recommend book or guide for me?and the follow code,which the value of offest can Coalesce access ,and why?
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <cutil_inline.h>
#include <cuda.h>
#include <cutil.h>
global void test(float *d_in,float *d_out,int offest)
{
int tid=blockIdx.x*blockDim.x+threadIdx.x+offest;
d_out[tid]=d_in[tid];
}
int main()
{ int i,m=16;
// Allocate host memory for the signal
float * h_idata = (float *)malloc(sizeof(float) * 512);
float * h_odata = (float *)malloc(sizeof(float) * 512);
for ( i =0; i <512; i++)
h_idata[i] =8.0f;
float *d_idata;
cudaMalloc( (void**)&d_idata, sizeof(float) * 512);
float *d_odata;
cudaMalloc( (void**)&d_odata, sizeof(float) * 512);
cudaMemcpy( d_idata,h_idata,sizeof(float) * 512, cudaMemcpyHostToDevice );
test<<<2,256>>>(d_idata,d_odata,m);
cudaMemcpy(h_odata,d_odata, sizeof(float)* 512, cudaMemcpyDeviceToHost);
for(i=0;i<512;i++)
printf(“%d:%f\n”,i,h_odata[i]);
return 0;
}