Hello :) I have tried CUDA about a month,and
I’m working hard on a pi-computing program.
After debuging a few times,still I can’t fix an error…
1>------ Build started: Project: win32-console-app-cuda-msvc2005, Configuration: Debug Win32 ------
1>Compiling...
1>PI.cu
1>"C:\CUDA\include\common_functions.h", line 55: warning: dllexport/dllimport
1> conflict with "clock" (declared at line 176 of "C:\Program
1> Files\Microsoft Visual Studio 8\VC\INCLUDE\time.h");
1> dllimport/dllexport dropped
1> extern __declspec(__host__) __declspec(__device__) clock_t clock(void);
1> ^
1>"PI.cu", line 39: error: call can not be configured
1> _incircle<<<grid,threads>>>(dev_sum,log_r,i,j);
1> ^
1>"PI.cu", line 102: error: identifier "atomicAdd" is undefined
1> atomicAdd(&dev_sum[bidx_x+gdim_x*i],5);
1> ^
1>2 errors detected in the compilation of "C:\DOCUME~1\bbs\LOCALS~1\Temp/tmpxft_00000c70_00000000-5.ii".
1>Build log was saved at "file://c:\Documents and Settings\circ\My Documents\Visual Studio 2005\Projects\PI\Debug\BuildLog.htm"
1>win32-console-app-cuda-msvc2005 - 2 error(s), 1 warning(s)
========== Build: 0 succeeded, 1 failed, 0 up-to-date, 0 skipped ==========
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "cuda_runtime.h"
#include "cutil.h"
#include "device_functions.h"
#include "sm_11_atomic_functions.h"
#define radium 100000000
void cudaInit(void);
void _incircle(int *dev_sum,float log_r,int i,int j);
int _CONFIG(int *sum,long long r);
int main() {
cudaInit();//Initialize the CUDA device and display device properties
printf("Start Calculating PI~\n");
//Allocate the array for PI on host
size_t data_size=sizeof(int)*250000;
int *sum=(int*)malloc(data_size);
//Allocate the array on device and write data from the host array
int *dev_sum;
CUDA_SAFE_CALL(cudaMalloc((void**)&dev_sum,data_size));
CUDA_SAFE_CALL(cudaMemset((void*)dev_sum,-1,data_size));//Memset the device array to -1
//Indicate the grid size and block size
dim3 grid(62500, 31250, 1);
dim3 threads(32, 16, 1);
float log_r=log((float)radium);
//Run the kernel
for(int i=0;i<50;i++){
for(int j=0;j<200;j++){
incircle<<<grid,threads>>>(dev_sum,log_r,i,j);
}
}
CUDA_SAFE_CALL(cudaMemcpy(sum,dev_sum,data_size,cudaMemcpyDeviceToHost));
int size=_CONFIG(sum,radium);//Configure the value of array
//write the result into "pi_data.txt"
FILE *PI;
fopen_s(&PI,"pi_data.txt","w,ccs=<UNICODE>");
for(int i=249999+size;i<250000;i++) {
fprintf_s(PI,"%d",sum[i]);
}
fclose(PI);
printf("Calculating succeeded --> pi_data.txt\n");
cudaFree(dev_sum);
free(sum);
system("pause");
return 0;
}
It seems the block and grid size should make sense,
and so should the arrays in shared memory.
But I just can’t figure it out…
The other error could result from the nvcc command line
nvcc.exe -ccbin “C:\Program Files\Microsoft Visual Studio 8\VC\bin” -c -DWIN32 -D_DEBUG -D_CONSOLE -Xcompiler “/EHsc /W3 /nologo /Wp64 /Od /Zi /MDd /GR” -I"C:\CUDA\include" -o Debug\win32-console-app-cuda-msvc2005.obj win32-console-app-cuda-msvc2005.vcproj