I am new to CUDA world and after days of reading cuda material I have figured out a program but can’t get it run
Please help look at these messy codes and bring your advice for me
thanks very much.
/************************************************************
- CUAC.cu
- THIS IS AN EXAMPLE USING CUDA
*********/
#include <stdio.h>
#include <stdlib.h>
#include <gl\glut.h>
#include <cuda_runtime.h>
#include <cutil.h>
#include <cutil_gl_error.h>
typedef unsigned int TColor;
int picWidth = 128;
int picHeight = 128;
int DataSize = 3*(picWidth*picHeight);
#define IMAGETEXTURE 666
/************************************************************
/
/ Construct Mapping Tables on CPU ---------------------------------/
/*************************************************
********/
void constructMaps();
float MapData;//TABLE STORE MAP RELATIONS :HEIGHTWIDTH3
float DeviceMapData;
TColor* ResultImage;
TColor* DeviceResultImage;
/************************************************************
/
/ OPENGL 函数调用声明 -----------------------------------------------/
/*************************************************
************/
static void display(void);
static void keyboard(unsigned char c, int x, int y);
int DisplayUnitTexture( GLuint texture, float startX = 0.0, float statY = 0.0 );
/************************************************************
/
/ 硬编ç 的图åƒ--------------------------------------------------------/
/*************************************************
**********/
//
GLubyte picData[3(128128)] = {
#include “demon_image.h”//this is a hard coded image
};
TColor* DeviceScrImage;
//typedef struct{
// unsigned char x, y, z;
//} uchar3;
texture<uchar3, 2, cudaReadModeNormalizedFloat> texDeamonImage;
texture<float3,2,cudaReadModeNormalizedFloat> texMapData;
/************************************************************
/
/ Init CUDA Programm Data /
/************************************************
************/
bool InitCudaData( void ){
MapData = (float*)malloc( sizeof(float) * DataSize );
ResultImage = (TColor*)malloc( sizeof(TColor) * DataSize );
constructMaps();//æž„å»ºæ˜ å°„ä¿¡æ¯
CUDA_SAFE_CALL( cudaMalloc((void**) &DeviceMapData, sizeof(float) * DataSize) );
CUT_CHECK_ERROR("Allocate");
CUDA_SAFE_CALL(cudaMemcpy(DeviceMapData, MapData, sizeof(float) * DataSize , cudaMemcpyHostToDevice));
cudaBindTexture(0, texMapData, DeviceMapData, sizeof(float) * DataSize);
CUDA_SAFE_CALL( cudaMalloc((void**) &DeviceScrImage, sizeof(float) * DataSize) );
CUDA_SAFE_CALL(cudaMemcpy(DeviceScrImage, picData, sizeof(char) * DataSize , cudaMemcpyHostToDevice));
cudaBindTexture(0, texDeamonImage, DeviceScrImage, sizeof(char) *DataSize);
CUDA_SAFE_CALL( cudaMalloc((void**) &DeviceResultImage, sizeof(TColor) * DataSize) );
printf(" CUDA Error :%s\n",cudaGetErrorString(cudaGetLastError()));
return true;
}
bool CleanUpData(void){
cudaFree(DeviceMapData);
cudaFree(DeviceScrImage);
cudaFree(DeviceResultImage);
free(MapData);
free(ResultImage);
return true;
}
/************************************************************
/
/ Init CUDA /
/************************************************
************/
bool InitCUDA(void)
{
int count = 0;
int i = 0;
cudaGetDeviceCount(&count);
if(count == 0) {
fprintf(stderr, "There is no device.\n");
return false;
}
for(i = 0; i < count; i++) {
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
if(prop.major >= 1) {
break;
}
}
}
if(i == count) {
fprintf(stderr, "There is no device supporting CUDA 1.x.\n");
return false;
}
cudaSetDevice(i);
return true;
}
device TColor make_color(float r, float g, float B){
return
( ((int)(b * 255.0f) << 16) |
((int)(g * 255.0f) << 8) |
((int)(r * 255.0f) << 0 ) );
}
global static void CudaMapWorker( TColor* DeviceResultImage, int imageW = 128, int imageH = 128 )
{
const int ix = threadIdx.x;
const int iy = threadIdx.y;
float3 mapinfo = tex2D( texMapData , ix, iy);
float3 fresult = tex2D( texDeamonImage , mapinfo.x , mapinfo.y );
DeviceResultImage[imageW * iy + ix] = make_color(fresult.x, fresult.y, fresult.z);
}
int main(int argc, char** argv)
{
glutInitWindowSize(400, 400);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE );
glutInit(&argc, argv);
glutCreateWindow(“Acceleration Worker Procedure”);
glutPositionWindow( 600, 0 );
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glClearColor(0.1, 0.3, 0.6, 0.0);
if(!InitCUDA()) return 0;
printf("CUDA initialized.\n");
InitCudaData();
dim3 dimBlock( picHeight, picWidth);
dim3 dimGrid(1, 1);
CudaMapWorker<<<dimGrid, dimBlock>>>(DeviceResultImage, 128 , 128);
CUDA_SAFE_CALL( cudaMemcpy( DeviceResultImage, ResultImage, sizeof(char) *DataSize, cudaMemcpyDeviceToHost) );
glutMainLoop();
CleanUpData();
getchar();
return 0;
}
/************************************************************
/
/下é¢çš„函数是OPENGL的显示函数 /
/**********************************************
************/
static void display(void)
{
glClear(GL_COLOR_BUFFER_BIT );
glBindTexture(GL_TEXTURE_2D,IMAGETEXTURE);
glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB8, picWidth, picHeight , 0,GL_RGB, GL_UNSIGNED_BYTE, picData);
DisplayUnitTexture( IMAGETEXTURE,-1.0,0.0 );
glutSwapBuffers();
}
//Utilå‡½æ•°ï¼Œæ–¹ä¾¿çš„å°†å›¾åƒæ˜¾ç¤ºåœ¨ä¸€ä¸ªå•使–¹æ ¼å†…
inline int DisplayUnitTexture( GLuint texture, float startX , float statY ){
glEnable( GL_TEXTURE_2D );
glBindTexture( GL_TEXTURE_2D, texture );
glBegin(GL_QUADS);
glTexCoord2f(0, 0);
glVertex2f( startX, statY );
glTexCoord2f(1, 0);
glVertex2f( startX +1 , statY );
glTexCoord2f(1, 1);
glVertex2f( startX +1 , statY+1 );
glTexCoord2f(0, 1);
glVertex2f( startX , statY+1 );
glEnd();
glDisable( GL_TEXTURE_2D );
return 0;
}
static void keyboard(unsigned char c, int x, int y)
{
switch © {
case 27: /* Esc key */
exit(0);
break;
case ‘t’:
//showTimerResult();
break;
}
}
/************************************************************
/
/下é¢çš„å‡½æ•°ç”¨æ¥æž„å»ºæ˜ å°„è¡¨ /
/**********************************************
**********/
void swapBlocks( int fromIndex, int toIndex, int blockWidth )
{
int temp = 0; //temp variable for swap
int i = 0;
for ( i = 0 ; i< blockWidth ; i++)
{
temp = *fromIndex;
*fromIndex = *toIndex;
*toIndex = temp;
fromIndex++;
toIndex++;
}
}
//åˆ›å»ºä¸€ä¸ªæ··ä¹±çš„æ˜ å°„
void constructMaps()
{
int cellCount = 4;
int RowMap[4] ={ 3,2,1,0 }/*{ 0, 1, 2, 3 }*/ ;
int ColMap[4] ={ 0, 1, 2, 3 };
int* Row = (int*)malloc(sizeof(int)*picHeight);//new int[picHeight];
int* Col = (int*)malloc(sizeof(int)*picWidth);//new int[picWidth];
int i=0, j=0;
float* p = NULL;
int loc = 0;
int RowBlockWidth ;
int ColBlockWidth;
for ( i=0 ; i<picHeight ; i++) Row[i] = i;
for ( i=0 ; i<picWidth ; i++) Col[i] = i;
RowBlockWidth = picHeight/cellCount;
ColBlockWidth = picHeight/cellCount;
for ( i=0; i<2; i++ )//åªè¦åšå‰ä¸¤ä¸ªçš„ç½®æ¢ï¼Œå¦‚æžœåšå®Œ4个就回去了
{
swapBlocks( &Row[ i*RowBlockWidth ],
&Row[ RowMap[i] *RowBlockWidth],
RowBlockWidth );
swapBlocks( &Col[ i*ColBlockWidth ],
&Col[ RowMap[i] *ColBlockWidth],
ColBlockWidth );
//打乱行列对应关系
}
for ( j=0; j < picWidth; j++)
{
for ( i=0; i< picHeight; i++ )
{
loc = ( i * picWidth + j ) *3;
p = &MapData[loc];
p[0] = /*(GLubyte)*/ (float) ( Row[i] );
p[1] = /*(GLubyte)*/ (float) ( Col[j] );
p[2] = 0;
}
}
}