Multi Dimensional Kernel

Hi everyone,

Iam new to cuda programming i have a doubt…i want to implement 1D texture with a multi-Dimensional kernel…Iam attaching my code here…I initially filled the dev_temp with 0 and have to update only some with 1…but i dnt knw what is happening here…i cannot update the value…so can anyone please tell me why…?

kernel launch :

dim3 blocks(v.n_sbimg/16,v.n_sbimg/16);
dim3 threads(16,16);
neighbour_pxls<<<blocks,threads>>>(dev_temp,v);

length of dev_temp array is 225
length of tex_pxl is 675

global void neighbour_pxls(bool *dev_temp,struct var v)
{
unsigned int xdir = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int ydir = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int tid = xdir + ydir * blockDim.x * gridDim.x;

//unsigned int tid=blockIdx.x*blockDim.x+threadIdx.x;

int x,y,nx,ny,rsum,gsum,bsum,gny,gnx,count,tot_sum;
int img_id,n_nx=0,n_ny=0,startx,starty;

if(tid<v.n_sbimg)
{
	for(y=0;y<v.sbimg_hgt;y++)
	{
		for(x=0;x<v.sbimg_wdt;x++)
		{
			gsum=rsum=bsum=count=0;
			startx=(tid%(v.width/v.sbimg_wdt))*v.sbimg_wdt;
			starty=round(tid/(v.width/v.sbimg_wdt)*v.sbimg_hgt,v.sbimg_hgt);
			for(nx=-1;nx<=1;nx++)
			{
				for(ny=-1;ny<=1;ny++)
				{
					gnx=x+nx+startx;
					gny=y+ny+starty;
					if((gnx<v.width)&&(gnx>=0)&&(gny<v.height)&&(gny>=0))
					{
						img_id=round(gnx/v.sbimg_wdt,1)+round(gny/v.sbimg_hgt,1)*(v.width/v.sbimg_wdt);
						n_nx=gnx%v.sbimg_wdt;
						n_ny=gny%v.sbimg_hgt;
						rsum=rsum+abs(tex1Dfetch(tex_pxl,(loc(y,x,v.sbimg_wdt)+ tid*v.sbimg_pxl))-tex1Dfetch(tex_pxl,(loc(n_ny,n_nx,v.sbimg_wdt)+img_id*v.sbimg_pxl)));
						gsum=gsum+abs(tex1Dfetch(tex_pxl,(loc(y,x,v.sbimg_wdt)+1+ tid*v.sbimg_pxl))-tex1Dfetch(tex_pxl,(loc(n_ny,n_nx,v.sbimg_wdt)+1+img_id*v.sbimg_pxl)));
						bsum=bsum+abs(tex1Dfetch(tex_pxl,(loc(y,x,v.sbimg_wdt)+2+ tid*v.sbimg_pxl))-tex1Dfetch(tex_pxl,(loc(n_ny,n_nx,v.sbimg_wdt)+2+img_id*v.sbimg_pxl)));
									
						count++;
					}
				}
			}
			
			tot_sum=(rsum+gsum+bsum)/3;
			
			if((tot_sum/(count-1))>v.treshold)
			{
				dev_temp[(loc_temp(y,x,v.sbimg_wdt)+(tid*(v.sbimg_wdt*v.sbimg_hgt)))]=1;
			}
		}
	}
}

}

Hi everyone,

Iam new to cuda programming i have a doubt…i want to implement 1D texture with a multi-Dimensional kernel…Iam attaching my code here…I initially filled the dev_temp with 0 and have to update only some with 1…but i dnt knw what is happening here…i cannot update the value…so can anyone please tell me why…?

kernel launch :

dim3 blocks(v.n_sbimg/16,v.n_sbimg/16);
dim3 threads(16,16);
neighbour_pxls<<<blocks,threads>>>(dev_temp,v);

length of dev_temp array is 225
length of tex_pxl is 675

global void neighbour_pxls(bool *dev_temp,struct var v)
{
unsigned int xdir = threadIdx.x + blockIdx.x * blockDim.x;
unsigned int ydir = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int tid = xdir + ydir * blockDim.x * gridDim.x;

//unsigned int tid=blockIdx.x*blockDim.x+threadIdx.x;

int x,y,nx,ny,rsum,gsum,bsum,gny,gnx,count,tot_sum;
int img_id,n_nx=0,n_ny=0,startx,starty;

if(tid<v.n_sbimg)
{
	for(y=0;y<v.sbimg_hgt;y++)
	{
		for(x=0;x<v.sbimg_wdt;x++)
		{
			gsum=rsum=bsum=count=0;
			startx=(tid%(v.width/v.sbimg_wdt))*v.sbimg_wdt;
			starty=round(tid/(v.width/v.sbimg_wdt)*v.sbimg_hgt,v.sbimg_hgt);
			for(nx=-1;nx<=1;nx++)
			{
				for(ny=-1;ny<=1;ny++)
				{
					gnx=x+nx+startx;
					gny=y+ny+starty;
					if((gnx<v.width)&&(gnx>=0)&&(gny<v.height)&&(gny>=0))
					{
						img_id=round(gnx/v.sbimg_wdt,1)+round(gny/v.sbimg_hgt,1)*(v.width/v.sbimg_wdt);
						n_nx=gnx%v.sbimg_wdt;
						n_ny=gny%v.sbimg_hgt;
						rsum=rsum+abs(tex1Dfetch(tex_pxl,(loc(y,x,v.sbimg_wdt)+ tid*v.sbimg_pxl))-tex1Dfetch(tex_pxl,(loc(n_ny,n_nx,v.sbimg_wdt)+img_id*v.sbimg_pxl)));
						gsum=gsum+abs(tex1Dfetch(tex_pxl,(loc(y,x,v.sbimg_wdt)+1+ tid*v.sbimg_pxl))-tex1Dfetch(tex_pxl,(loc(n_ny,n_nx,v.sbimg_wdt)+1+img_id*v.sbimg_pxl)));
						bsum=bsum+abs(tex1Dfetch(tex_pxl,(loc(y,x,v.sbimg_wdt)+2+ tid*v.sbimg_pxl))-tex1Dfetch(tex_pxl,(loc(n_ny,n_nx,v.sbimg_wdt)+2+img_id*v.sbimg_pxl)));
									
						count++;
					}
				}
			}
			
			tot_sum=(rsum+gsum+bsum)/3;
			
			if((tot_sum/(count-1))>v.treshold)
			{
				dev_temp[(loc_temp(y,x,v.sbimg_wdt)+(tid*(v.sbimg_wdt*v.sbimg_hgt)))]=1;
			}
		}
	}
}

}