Hi all,
i’ m new of CUDA programming and i want to test this technology on my new 8600m GT because i like parallel programming. I spent some day reading the documentation but i don’t have much time to dedicate to it and today i have tried one program taken from this forum. I create one file .cu with this code:
#include <stdio.h>
#include <stdlib.h>
#include <cutil.h>
#include <cuda_runtime_api.h>
#include <device_launch_parameters.h>
#define PARALLEL 1
#define BLOCK_SIZE 16
#define GRID_SIZE 20
#define FLAG_SET 8
#define FLAG_UNSET 0
__global__ void work(char* table)
{
// Block index
int bx = blockIdx.x;
int by = blockIdx.y;
// Thread index
int tx = threadIdx.x;
int ty = threadIdx.y;
// do a heavy job
unsigned int times = 4294180900;
unsigned int i = 0;
for (unsigned int k=0;k<4294180900;k++)
for (unsigned int w=0;w<4294180900;w++)
for (unsigned int e=0;e<4294180900;e++)
for (unsigned int r=0;r<4294180900;r++)
for (unsigned int t=0;t<4294180900;t++)
for (unsigned int y=0;y<4294180900;y++)
for (unsigned int u=0;u<4294180900;u++)
for (unsigned int a=0;a<4294180900;a++)
for (unsigned int s=0;s<4294180900;s++)
for (unsigned int d=0;d<4294180900;d++)
for (unsigned int z=0;z<4294180900;z++)
for (i=0;i<times;i++) {
float b = (3809.023 * 238.3901) / 66.215 - 222.321;
float bb = 33.02;
bb += b * b / (tx + 1);
}
if (i == times) {
int width = BLOCK_SIZE * GRID_SIZE;
int offset = width * BLOCK_SIZE * by + BLOCK_SIZE * bx;
table[offset + width * ty + tx] = FLAG_SET;
}
}
void runTest(int argc, char** argv)
{
CUT_DEVICE_INIT();
printf("sizeof(char)=%d\n", sizeof(char));
printf("sizeof(short)=%d\n", sizeof(short));
printf("sizeof(int)=%d\n", sizeof(int));
printf("sizeof(long)=%d\n", sizeof(long));
// init a table to be filled
unsigned int size = (BLOCK_SIZE * GRID_SIZE) * (BLOCK_SIZE * GRID_SIZE);
unsigned int mem_size = sizeof(char) * size;
printf("table size = %d\n", size);
printf("mem_size = %d bytes, which is %d kb, or %d mb\n",
mem_size, mem_size / 1024, mem_size / 1024 / 1024);
char* h_table = (char*) malloc(mem_size);
if (h_table == NULL) printf("Memory allocation failed!\n");
// unset table
for (unsigned int i=0;i<size;++i)
h_table[i] = FLAG_UNSET;
// allocate device memory
char* d_table;
CUDA_SAFE_CALL(cudaMalloc((void**) &d_table, mem_size));
// copy host memory to device
CUDA_SAFE_CALL(cudaMemcpy(d_table, h_table, mem_size,
cudaMemcpyHostToDevice));
// create and start timer
unsigned int timer = 0;
CUT_SAFE_CALL(cutCreateTimer(&timer));
CUT_SAFE_CALL(cutStartTimer(timer));
// setup execution parameters
dim3 block(BLOCK_SIZE, BLOCK_SIZE);
dim3 grid(GRID_SIZE, GRID_SIZE);
// execute the kernel
printf("Ready to send job.\n");
work<<< grid, block >>>(d_table);
printf("Job sent!\n");
// check if kernel execution generated and error
CUT_CHECK_ERROR("Kernel execution failed");
if (PARALLEL)
printf("Parallel time!\n");
else {
printf("Before Thread Sync.\n");
cudaThreadSynchronize();
printf("After Thread Sync.\n");
}
// copy result from device to host
CUDA_SAFE_CALL(cudaMemcpy(h_table, d_table, mem_size,
cudaMemcpyDeviceToHost) );
printf("Data copied back!\n");
// stop and destroy timer
CUT_SAFE_CALL(cutStopTimer(timer));
printf("Processing time: %f (ms) \n", cutGetTimerValue(timer));
CUT_SAFE_CALL(cutDeleteTimer(timer));
// check result
unsigned int fail_count = 0;
for (unsigned int i=0;i<size;++i) {
if (h_table[i] != FLAG_SET)
fail_count++;
}
if (fail_count == 0)
printf("=========== Success! All data is set! =============\n");
else
printf("*********** Failed! %d unset! **************\n", fail_count);
// clean up memory
free(h_table);
CUDA_SAFE_CALL(cudaFree(d_table));
}
int main(int argc, char** argv)
{
runTest(argc, argv);
}
and i’ ve tried to compile it with
/usr/local/cuda/bin/nvcc test1cpp.cu -o test1 -I /home/Amirecron/NVIDIA_CUDA_SDK/common/inc -L /usr/local/cuda/include/
Result was:
/tmp/tmpxft_000055cf_00000000-9.o: In function `runTest(int, char**)':
tmpxft_000055cf_00000000-8.i:(.text+0x6e46): undefined reference to `cutCreateTimer'
tmpxft_000055cf_00000000-8.i:(.text+0x6e51): undefined reference to `cutStartTimer'
tmpxft_000055cf_00000000-8.i:(.text+0x6f24): undefined reference to `cutStopTimer'
tmpxft_000055cf_00000000-8.i:(.text+0x6f2f): undefined reference to `cutGetTimerValue'
tmpxft_000055cf_00000000-8.i:(.text+0x6f4a): undefined reference to `cutDeleteTimer'
collect2: ld returned 1 exit status
Anyone can help me? I’m not a real programmer, i do it for hobby and i don’t know much about C programming.
Thanks for helping,
Rief