The kernel can not be configured CUDA 1.1 on VC 2005

Hello :) I have tried CUDA about a month,and

I’m working hard on a pi-computing program.

After debuging a few times,still I can’t fix an error…

1>------ Build started: Project: win32-console-app-cuda-msvc2005, Configuration: Debug Win32 ------



1>"C:\CUDA\include\common_functions.h", line 55: warning: dllexport/dllimport

1>          conflict with "clock" (declared at line 176 of "C:\Program

1>          Files\Microsoft Visual Studio 8\VC\INCLUDE\time.h");

1>          dllimport/dllexport dropped

1>  extern __declspec(__host__) __declspec(__device__) clock_t clock(void);

1>                                                             ^

1>"", line 39: error: call can not be configured

1>    _incircle<<<grid,threads>>>(dev_sum,log_r,i,j);

1>    ^

1>"", line 102: error: identifier "atomicAdd" is undefined

1>    atomicAdd(&dev_sum[bidx_x+gdim_x*i],5);

1>    ^

1>2 errors detected in the compilation of "C:\DOCUME~1\bbs\LOCALS~1\Temp/tmpxft_00000c70_00000000-5.ii".

1>Build log was saved at "file://c:\Documents and Settings\circ\My Documents\Visual Studio 2005\Projects\PI\Debug\BuildLog.htm"

1>win32-console-app-cuda-msvc2005 - 2 error(s), 1 warning(s)

========== Build: 0 succeeded, 1 failed, 0 up-to-date, 0 skipped ==========

#include <stdio.h>

#include <stdlib.h>

#include <math.h>

#include "cuda_runtime.h"

#include "cutil.h"

#include "device_functions.h"

#include "sm_11_atomic_functions.h"

#define radium 100000000

void cudaInit(void);

void _incircle(int *dev_sum,float log_r,int i,int j);

int _CONFIG(int *sum,long long r);

int main() {

cudaInit();//Initialize the CUDA device and display device properties

printf("Start Calculating PI~\n");

//Allocate the array for PI on host

size_t data_size=sizeof(int)*250000;

int *sum=(int*)malloc(data_size);

//Allocate the array on device and write data from the host array

int *dev_sum;


CUDA_SAFE_CALL(cudaMemset((void*)dev_sum,-1,data_size));//Memset the device array to -1

//Indicate the grid size and block size

dim3 grid(62500, 31250, 1);

dim3 threads(32, 16, 1); 

float log_r=log((float)radium);

//Run the kernel

for(int i=0;i<50;i++){

	for(int j=0;j<200;j++){





int size=_CONFIG(sum,radium);//Configure the value of array

//write the result into "pi_data.txt"



for(int i=249999+size;i<250000;i++) {




printf("Calculating succeeded --> pi_data.txt\n");




return 0;


It seems the block and grid size should make sense,

and so should the arrays in shared memory.

But I just can’t figure it out…

The other error could result from the nvcc command line

nvcc.exe -ccbin “C:\Program Files\Microsoft Visual Studio 8\VC\bin” -c -DWIN32 -D_DEBUG -D_CONSOLE -Xcompiler “/EHsc /W3 /nologo /Wp64 /Od /Zi /MDd /GR” -I"C:\CUDA\include" -o Debug\win32-console-app-cuda-msvc2005.obj win32-console-app-cuda-msvc2005.vcproj

where is the define of _incircle ?
where is include file about atomicAdd?

check it more :)

Thank you~I found that if I separate the from the .cpp code

and compile the with nvcc command line before building with VC++ 2005,It works fine!

But I am just able to compile it…

My program stops at the kernel function

I have to complete it in two days~ <img src=‘http://hqnveipbwb20/public/style_emoticons/<#EMO_DIR#>/crying.gif’ class=‘bbc_emoticon’ alt=’:’(’ />


#include <stdio.h>

#include "cuda_runtime.h"

#include "sm_11_atomic_functions.h"

__global__ void _incircle(int *dev_sum,float log_r,int i,int j){


	__shared__ float X[32];

	__shared__ float Y[16];









extern "C" void kernel(int *dev_sum,float log_r,dim3 grid,dim3 threads);

void kernel(int *dev_sum,float log_r,dim3 grid,dim3 threads) {

	for(int i=0;i<50;i++) {

  for(int j=0;j<200;j++)