Dear All
http://cuda-programming.blogspot.pt/2013/01/what-is-constant-memory-in-cuda.html
I am having problems using constant memory. I done as the link above (complete example). The only difference is that I passed the constant memory pointer (or offset of it) as a parameter to inside the kernels and use it inside. I am getting wrong results contrary to before done that. My processing capability is 3.0 and 3.5.
__constant__ __device__ unsigned char input1[65512]; //for one antenna, must be allocated for more antennas
void main()
{
unsigned char frameori[65512];
//fill frameori with data
cudaMemcpyToSymbol((void *)input1, (void *)frameori, 512+sizeof(float)*(NRSAMPLES*2*NUMBEROFANTENNAELEMENTS+NRSAMPLES*2*NUMBEROFANTENNAELEMENTS+SUBSET+NTAPS*NUSERS*NUMBEROFANTENNAELEMENTS*4)+sizeof(int)*NTAPS*NUSERS*NUMBEROFANTENNAELEMENTS,0, cudaMemcpyHostToDevice);
cudaDeviceSynchronize();
delaytran=(int *)(input1+512+sizeof(float)*(NRSAMPLES*2*nant+NRSAMPLES*2*nant+SUBSET+ntaps*nusers*nant*2));
tapreal=(float *)(input1+512+sizeof(int)*ntaps*nusers*nant+sizeof(float)*(NRSAMPLES*2*nant+NRSAMPLES*2*nant+SUBSET+ntaps*nusers*nant*2));
tapimag=(float *)(input1+512+sizeof(int)*ntaps*nusers*nant+sizeof(float)*(NRSAMPLES*2*nant+NRSAMPLES*2*nant+SUBSET+ntaps*nusers*nant*3));
ciclo4<<<NRSAMPLES/32,32,0,stream[z5]>>>((complex1 *)(input1+512+sizeof(complex1)*(NRSAMPLES*nant)), timetotal,timeuser,comp1[z5],maxdelay+atraso,g,ntaps,nant,nusers,z5,
tapreal,tapimag,delaytran);
}
__global__ void ciclo4(complex1 *frame1,complex1 *timetotal,complex1 *timeuser,complex1 *comp1,int maxdelayatraso,int g,
int ntaps,int nant,int nusers,int z5,
float *tapreal,float *tapimag,int *delaytran)
{
int i1=blockIdx.x * blockDim.x + threadIdx.x;
complex1 const7,const8,const9;
complex1 *inri;
complex1 *user,*total;
int nr,t1,delay2;
const9.r=0;
const9.i=0;
for(nr=0 ; nr < nant ; nr++)
{
inri=frame1+NRSAMPLES*nr;
user=(timeuser+NRSAMPLES*(z5*nant+nr));
total=(timetotal+NRSAMPLES*nr);
for(t1=0; t1 < ntaps ;t1++)
{
delay2=maxdelayatraso-delaytran[t1+g*ntaps+nr*nusers*ntaps];
if ((i1-delay2) >= 0)
{
const7.r=(inri+i1-delay2)->r-total[i1-delay2].r+user[i1-delay2].r;
const7.i=(inri+i1-delay2)->i-total[i1-delay2].i+user[i1-delay2].i;
const8.r=*(tapreal+g*ntaps+t1+nr*nusers*ntaps);
const8.i=-*(tapimag+g*ntaps+t1+nr*nusers*ntaps);
const9.r+=const7.r* const8.r - const7.i * const8.i;
const9.i+=const7.r * const8.i + const7.i * const8.r;
}
}
} //NRANTENNAS
comp1[i1].r=const9.r;
comp1[i1].i=const9.i;
}
Thanks
Luis Gonçalves