Hi every one ,
Iam a newbie to cuda…i have a program where i need to process 300,000,000 values using textures in a single large array…but i cannot process them…i can process till 100 million but not 300 …can anyone please explain why…Iam ataching my code here…
#include<stdio.h>
#include<stdlib.h>
# define N 300000000
# define cs 25000
# define nc N/cs
texture <unsigned int> tex_arr;
__global__ void my_kernel(unsigned int *dev_arrb)
{
unsigned int xdir = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int ydir = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int tid = xdir + ydir * blockDim.x * gridDim.x;
//unsigned int tid=blockIdx.x*blockDim.x+threadIdx.x;
if(tid<nc)
{
for(int i=0*cs;i<(tid+1)*cs;i++)
{
dev_arrb[i]=tex1Dfetch(tex_arr,i);
}
}
}
int main()
{
unsigned int *arr_a,*arr_b,*dev_arra,*dev_arrb;
unsigned int i;
arr_a=(unsigned int*)malloc(N*sizeof(unsigned int));
arr_b=(unsigned int*)malloc(N*sizeof(unsigned int));
cudaMalloc((void**)&dev_arra,N*sizeof(unsigned int));
cudaMalloc((void**)&dev_arrb,N*sizeof(unsigned int));
for(i=0;i<N;i++)
{
arr_a[i]=i+1;
}
/* for(i=0;i<N;i++)
{
printf("%d",arr_a[i]);
}*/
cudaMemcpy(dev_arra,arr_a,N*sizeof(unsigned int),cudaMemcpyHostToDevice);
cudaBindTexture(NULL,tex_arr,dev_arra, N * sizeof(unsigned int) );
/*int NB=1,NT=500;
NB=(nc+NT)/(NT);
*/
dim3 blocks(nc/16,nc/16);
dim3 threads(16,16);
my_kernel<<<blocks,threads>>>(dev_arrb);
//my_kernel<<<NB,NT>>>(dev_arrb);
cudaMemcpy( arr_b, dev_arrb, N * sizeof(unsigned int), cudaMemcpyDeviceToHost );
int cnt=0;
for(i=0;i<N;i++)
{
printf("%5d,",arr_b[i]);
cnt++;
if(cnt%cs==0)
printf("\n\n");
}
cudaUnbindTexture(tex_arr);
return 0;
}