Constant memory, help needed!

Hi all,

please find below the code I have been trying to run on GTX280, the loadfrombitmap and writergbbitmap functions are just for image loading and transformation(so need not consider),dilation is done in dilate function called from mail. con_d is initialised in constant memory(con_d is the structuring element for dilation, using a diamond shape in 7x7 matrix for dilation, so storing in constant memory). this code compiles properly but after execution this error is coming

                             :Unhandled exception at 0x100113e3 in const_mem.exe: 0xC0000005: Access violation reading location 0x0e0e0000.

#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#include<windows.h>
#include<winbase.h>
#include<conio.h>

define HEIGHT 200

define WIDTH 200

#include<cuda_runtime.h>
unsigned char *frame_d;

unsigned char *writeback_d;

unsigned char writeback_h[200200];
unsigned char writeback[200][200][3];
char tempframe[200
200];
unsigned char pchar[HEIGHTWIDTH3];
void loadFromBitmap(char* fileName,unsigned char buffer[HEIGHT][WIDTH]){
FILE fp;
fp=fopen(fileName,“rb”);
if(fp == NULL){
printf(“Not able to open file \n”);
return;
}
fseek(fp,0L,SEEK_END);
fseek(fp,54L,SEEK_SET);
fread(pchar,WIDTH
HEIGHT3sizeof(unsigned char),1,fp);
int i,j,k=0;
for(i=0; i<HEIGHT; i++)
{
for(j=0; j<WIDTH; j++)
{
buffer[HEIGHT-i][j]=0.2989*(float)pchar[k+2]+0.5870*(float)pchar[k+1]+0.11
40*(float)pchar[k];
k=k+3;
}
}
fclose(fp);

}
void writeRGBBitmap(char* fileName,unsigned char buffer[HEIGHT][WIDTH][3]){
FILE* fp;
fp=fopen(fileName,“wb”);
char c1=‘B’,c2=‘M’;
fwrite(&c1,sizeof(char),1,fp);
fwrite(&c2,sizeof(char),1,fp);
unsigned int i=HEIGHTWIDTH3+54;
fwrite(&i,sizeof(int),1,fp);
short s=0;
fwrite(&s,sizeof(short),1,fp);
fwrite(&s,sizeof(short),1,fp);
i=54;
fwrite(&i,sizeof(int),1,fp);
i=40;
fwrite(&i,sizeof(int),1,fp);
i=WIDTH;
fwrite(&i,sizeof(int),1,fp);
i=HEIGHT;
fwrite(&i,sizeof(int),1,fp);
s=1;
fwrite(&s,sizeof(short),1,fp);
s=24;
fwrite(&s,sizeof(short),1,fp);
i=0;
fwrite(&i,sizeof(int),1,fp);
i=HEIGHTWIDTH3;
fwrite(&i,sizeof(int),1,fp);
i=0;
fwrite(&i,sizeof(int),1,fp);
fwrite(&i,sizeof(int),1,fp);
fwrite(&i,sizeof(int),1,fp);
fwrite(&i,sizeof(int),1,fp);
int j;
for(i=0; i<HEIGHT; i++){
for(j=0; j<WIDTH; j++){
unsigned char b,g,r;
r=buffer[HEIGHT-1-i][j][0];
g=buffer[HEIGHT-1-i][j][1];
b=buffer[HEIGHT-1-i][j][2];
fwrite(&b,sizeof(char),1,fp);
fwrite(&g,sizeof(char),1,fp);
fwrite(&r,sizeof(char),1,fp);
}
}
fclose(fp);
}
device constant char con_d[49];
global void dilate(unsigned char * frame, unsigned char wb)
{
int t=blockDim.x
blockIdx.x+threadIdx.x;
int base_add;
int temp_number=1;
base_add=t;int sz=200200;int start=-1temp_number;int end=-1*temp_number+7;

int ker_add,image_add;

for(int i1=start;i1<end;i1++)
{
	for(int i2=start;i2<end;i2++)            //dilation function
	{
		 ker_add=7*(i1+3)+i2+temp_number;
		 image_add=base_add+i1*200+i2;
		if(image_add<0 || image_add>=sz)
			continue;
		if(frame[image_add]==254 && con_d[ker_add]==1)
		{
			  
		      wb[base_add]=255;
	      goto done;
		}	
					
	}
}

done:

}
unsigned char tempframe1[HEIGHT][WIDTH];
void main(void)
{

loadFromBitmap(“t.bmp”,tempframe1);
char kernel_h[49];

int kernel_size=7;

for(int i=0;i<kernel_size;i++)             //structuring element
{
	for(int j=0;j<kernel_size;j++)
	{
			kernel_h[i*(kernel_size)+j]=0;
	}
}
kernel_h[3]=1;
kernel_h[9]=1;
kernel_h[15]=1;
kernel_h[21]=1;
kernel_h[29]=1;
kernel_h[37]=1;
kernel_h[45]=1;
kernel_h[39]=1;
kernel_h[33]=1;
kernel_h[27]=1;
kernel_h[19]=1;
kernel_h[11]=1;
for(int i=0;i<200;i++)
{
	for(int j=0;j<200;j++)
	{
		
		tempframe[i*200+j]=tempframe1[i][j];	
		
	}
}

cudaMalloc((void **)&writeback_d,200*200*sizeof(unsigned char));
cudaMalloc((void **)&frame_d,200*200*sizeof(unsigned char));

cudaMemcpyToSymbol(con_d,kernel_h,49*sizeof(char));   //constant memory

cudaMemcpy((void **)frame_d,tempframe,200*200*sizeof(unsigned char),cudaMemcpyHostToDevice);


cudaMemset((void **)writeback_d,0,200*200*sizeof(unsigned char));
dilate<<<625,64>>>(frame_d,writeback_d);    //calling kernel
cudaThreadSynchronize();
cudaMemcpy((void **)writeback_h,writeback_d,200*200*sizeof(unsigned char),cudaMemcpyDeviceToHost);


system("PAUSE");	

}

Please help me figure out the problem.

Thanks in advance.