Crashed for unknown reason Newbie question

I am new to CUDA world and after days of reading cuda material I have figured out a program but can’t get it run
Please help look at these messy codes and bring your advice for me
thanks very much.

/************************************************************


  • CUAC.cu
  • THIS IS AN EXAMPLE USING CUDA

*********/

#include <stdio.h>
#include <stdlib.h>
#include <gl\glut.h>
#include <cuda_runtime.h>
#include <cutil.h>
#include <cutil_gl_error.h>

typedef unsigned int TColor;

int picWidth = 128;
int picHeight = 128;
int DataSize = 3*(picWidth*picHeight);

#define IMAGETEXTURE 666

/************************************************************
/
/
Construct Mapping Tables on CPU ---------------------------------/
/
*************************************************
********/
void constructMaps();
float
MapData;//TABLE STORE MAP RELATIONS :HEIGHT
WIDTH
3
float
DeviceMapData;

TColor* ResultImage;
TColor* DeviceResultImage;

/************************************************************
/
/
OPENGL 函数调用声明 -----------------------------------------------/
/
*************************************************
************/
static void display(void);
static void keyboard(unsigned char c, int x, int y);
int DisplayUnitTexture( GLuint texture, float startX = 0.0, float statY = 0.0 );

/************************************************************
/
/
ç¡¬ç¼–ç çš„å›¾åƒ--------------------------------------------------------/
/
*************************************************
**********/
//
GLubyte picData[3
(128
128)] = {
#include “demon_image.h”//this is a hard coded image
};

TColor* DeviceScrImage;

//typedef struct{
// unsigned char x, y, z;
//} uchar3;

texture<uchar3, 2, cudaReadModeNormalizedFloat> texDeamonImage;
texture<float3,2,cudaReadModeNormalizedFloat> texMapData;

/************************************************************
/
/
Init CUDA Programm Data /
/
************************************************
************/

bool InitCudaData( void ){
MapData = (float*)malloc( sizeof(float) * DataSize );
ResultImage = (TColor*)malloc( sizeof(TColor) * DataSize );
constructMaps();//æž„å»ºæ˜ å°„ä¿¡æ¯

CUDA_SAFE_CALL( cudaMalloc((void**) &DeviceMapData, sizeof(float) * DataSize) );
CUT_CHECK_ERROR("Allocate");
CUDA_SAFE_CALL(cudaMemcpy(DeviceMapData, MapData, sizeof(float) * DataSize , cudaMemcpyHostToDevice));
cudaBindTexture(0, texMapData, DeviceMapData, sizeof(float) * DataSize);

CUDA_SAFE_CALL( cudaMalloc((void**) &DeviceScrImage, sizeof(float) * DataSize) );
CUDA_SAFE_CALL(cudaMemcpy(DeviceScrImage, picData, sizeof(char) * DataSize , cudaMemcpyHostToDevice));
cudaBindTexture(0, texDeamonImage, DeviceScrImage, sizeof(char) *DataSize);


CUDA_SAFE_CALL( cudaMalloc((void**) &DeviceResultImage, sizeof(TColor) * DataSize) );
printf(" CUDA Error :%s\n",cudaGetErrorString(cudaGetLastError()));
return true;

}

bool CleanUpData(void){

cudaFree(DeviceMapData);
cudaFree(DeviceScrImage);
cudaFree(DeviceResultImage);
free(MapData);
free(ResultImage);

return true;

}

/************************************************************
/
/
Init CUDA /
/
************************************************
************/
bool InitCUDA(void)
{
int count = 0;
int i = 0;

cudaGetDeviceCount(&count);
if(count == 0) {
	fprintf(stderr, "There is no device.\n");
	return false;
}

for(i = 0; i < count; i++) {
	cudaDeviceProp prop;
	if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
		if(prop.major >= 1) {
			break;
		}
	}
}
if(i == count) {
	fprintf(stderr, "There is no device supporting CUDA 1.x.\n");
	return false;
}
cudaSetDevice(i);
return true;

}

device TColor make_color(float r, float g, float B){
return
( ((int)(b * 255.0f) << 16) |
((int)(g * 255.0f) << 8) |
((int)(r * 255.0f) << 0 ) );
}

global static void CudaMapWorker( TColor* DeviceResultImage, int imageW = 128, int imageH = 128 )
{
const int ix = threadIdx.x;
const int iy = threadIdx.y;
float3 mapinfo = tex2D( texMapData , ix, iy);

float3 fresult = tex2D( texDeamonImage , mapinfo.x , mapinfo.y );
DeviceResultImage[imageW * iy + ix] = make_color(fresult.x, fresult.y, fresult.z);

}

int main(int argc, char** argv)
{
glutInitWindowSize(400, 400);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE );
glutInit(&argc, argv);
glutCreateWindow(“Acceleration Worker Procedure”);
glutPositionWindow( 600, 0 );
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glClearColor(0.1, 0.3, 0.6, 0.0);

if(!InitCUDA()) 	return 0;
printf("CUDA initialized.\n");

InitCudaData();
dim3 dimBlock( picHeight, picWidth);
dim3 dimGrid(1, 1);
CudaMapWorker<<<dimGrid, dimBlock>>>(DeviceResultImage, 128 , 128);


CUDA_SAFE_CALL(  cudaMemcpy( DeviceResultImage, ResultImage, sizeof(char) *DataSize, cudaMemcpyDeviceToHost) );

glutMainLoop();
CleanUpData();

getchar();

return 0;

}

/************************************************************
/
/下面的函数是OPENGL的显示函数 /
/
**********************************************
************/
static void display(void)
{
glClear(GL_COLOR_BUFFER_BIT );

glBindTexture(GL_TEXTURE_2D,IMAGETEXTURE);
glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB8,  picWidth, picHeight , 0,GL_RGB, GL_UNSIGNED_BYTE, picData);
DisplayUnitTexture( IMAGETEXTURE,-1.0,0.0 );

glutSwapBuffers();

}

//Utilå‡½æ•°ï¼Œæ–¹ä¾¿çš„å°†å›¾åƒæ˜¾ç¤ºåœ¨ä¸€ä¸ªå•ä½æ–¹æ ¼å†…
inline int DisplayUnitTexture( GLuint texture, float startX , float statY ){
glEnable( GL_TEXTURE_2D );
glBindTexture( GL_TEXTURE_2D, texture );
glBegin(GL_QUADS);
glTexCoord2f(0, 0);
glVertex2f( startX, statY );

glTexCoord2f(1, 0);
glVertex2f( startX +1 , statY );

glTexCoord2f(1, 1);
glVertex2f( startX +1 , statY+1 );

glTexCoord2f(0, 1);
glVertex2f( startX  , statY+1 );
glEnd();
glDisable( GL_TEXTURE_2D );
return 0;

}

static void keyboard(unsigned char c, int x, int y)
{
switch © {
case 27: /* Esc key */
exit(0);
break;

case ‘t’:
//showTimerResult();
break;
}
}

/************************************************************
/
/ä¸‹é¢çš„å‡½æ•°ç”¨æ¥æž„å»ºæ˜ å°„è¡¨ /
/
**********************************************
**********/
void swapBlocks( int
fromIndex, int
toIndex, int blockWidth )
{
int temp = 0; //temp variable for swap
int i = 0;
for ( i = 0 ; i< blockWidth ; i++)
{
temp = *fromIndex;
*fromIndex = *toIndex;
*toIndex = temp;
fromIndex++;
toIndex++;
}
}

//åˆ›å»ºä¸€ä¸ªæ··ä¹±çš„æ˜ å°„
void constructMaps()
{
int cellCount = 4;

int RowMap[4]		={ 3,2,1,0 }/*{ 0, 1, 2, 3 }*/ ;
int ColMap[4]		={ 0, 1, 2, 3 };
int* Row = (int*)malloc(sizeof(int)*picHeight);//new int[picHeight];
int* Col = (int*)malloc(sizeof(int)*picWidth);//new int[picWidth];

int i=0, j=0;
float* p = NULL;
int loc = 0;
int RowBlockWidth ;	
int ColBlockWidth;

for (  i=0 ; i<picHeight ; i++) Row[i] = i;
for (  i=0 ; i<picWidth ; i++) Col[i] = i;

RowBlockWidth	= picHeight/cellCount;
ColBlockWidth		= picHeight/cellCount;

for (  i=0; i<2; i++ )//只要做前两个的置换,如果做完4个就回去了 
{
	swapBlocks(	&Row[ i*RowBlockWidth ], 
		&Row[  RowMap[i] *RowBlockWidth],
		RowBlockWidth );

	swapBlocks(	&Col[ i*ColBlockWidth ], 
		&Col[  RowMap[i] *ColBlockWidth],
		ColBlockWidth );
	//打乱行列对应关系
}

for (  j=0; j < picWidth; j++)
{
	for (  i=0; i< picHeight; i++ )
	{
		loc = ( i * picWidth + j ) *3;
		p = &MapData[loc];
		p[0] = /*(GLubyte)*/ (float) ( Row[i] );
		p[1] = /*(GLubyte)*/ (float) ( Col[j] );
		p[2] = 0;
	}
}

}