global void GPU_Offload(int nt,int total_np,int num_recv,int l_x,int l_z,float *d_amp,int nx,int nz,int *x_r,int *z_r)
{
printf("\n\n\nin device at start %d ,%d",nx,nz); nx=14000 and nz=3600
//fflush(0);
int a=num_recv;
PREV_PR_o prev;
prev.px_l1=(float*)malloc(sizeof(float)*nz);
prev.px_l2=(float*)malloc(sizeof(float)*nz);
prev.px_r1=(float*)malloc(sizeof(float)*nz);
prev.px_r2=(float*)malloc(sizeof(float)*nz);
prev.pz_t1=(float*)malloc(sizeof(float)*nx);
prev.pz_t2=(float*)malloc(sizeof(float)*nx);
prev.pz_b1=(float*)malloc(sizeof(float)*nx);
prev.pz_b2=(float*)malloc(sizeof(float)*nx);
printf("%s %d \n",__FILE__,__LINE__);
int i,t_step,j;
float **p1,**p2,**econs,**tmp,*pw;
pw=(float*)malloc(sizeof(float)*nx*nz);
if(pw==NULL)
printf("error in allocation"); // gives error here do i ran out of memory
for(i=1;i<10;i++)
{
pw[i]=0.0f;
//pw1[threadIdx.x]=0.0f;
}
}