atomic functions

hello

__global__ void cAdd(int *A, int *B, int *C)

{

	int x =  threadIdx.x + (blockIdx.x << 4);

	int y =  threadIdx.y + (blockIdx.x << 4);

	int ind = x + (y << 8);

	int z = atomicExch(&A[ind],4);

}

I have Visual Studio 2005 team edition and 8600GTS installed on my computer.

8600gts should have computional capability 1.1 but when i try to use atomic functionse

(ie atomicExch) compiler reports

1>sample.cu(58): error: identifier “atomicExch” is undefined

anyone have idea what is wrong ?

:ike:

:|

You sure you are including the right libraries?
Can you post complete Code? This part looks fine.

#include <stdio.h>

#include <stdlib.h>

#include <cuda_runtime.h>

#include <cutil.h>

/************************************************************************/

/* Init CUDA                                                            */

/************************************************************************/

#if __DEVICE_EMULATION__

bool InitCUDA(void){return true;}

#else

bool InitCUDA(void)

{

	int count = 0;

	int i = 0;

	cudaGetDeviceCount(&count);

	if(count == 0) {

  fprintf(stderr, "There is no device.\n");

  return false;

	}

	

	for(i = 0; i < count; i++) {

  cudaDeviceProp prop;

  if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {

  	if(prop.major >= 1) {

    break;

  	}

  }

	}

	if(i == count) {

  fprintf(stderr, "There is no device supporting CUDA.\n");

  return false;

	}

	cudaSetDevice(i);

	printf("CUDA initialized.\n");

	return true;

}

#endif

__global__ void cAdd(int *A, int *B, int *C)

{

	int x =  threadIdx.x + (blockIdx.x << 4);

	int y =  threadIdx.y + (blockIdx.y << 4);

	int ind = x + (y << 8);

	int z = atomicExch(&A[ind],4);

}

int main(int argc, char* argv[])

{

	if(!InitCUDA()) {

  return 0;

	}

	#if defined CUDA_NO_SM_11_ATOMIC_INTRINSICS

  printf("Nije definisano sta god da je \n");

	#endif

	char	*device_result	= 0;

	char	host_result[12]	={0};

	float	*zec;

	int A[256][256];

	int B[256][256];

	int C[256][256];

	int *dA;

	int *dB;

	int *dC;

	int i,j,mSize = 256 * 256 * sizeof(int);

	cudaError_t err;

	for (i = 0; i <256; i++)

  for (j =0; j<256; j++)

  {

  	A[i][j] = 3;

  	B[i][j] = 2;

  	

  	C[i][j] = 0;

  }

	

	err = cudaMalloc( (void **) &dA, 256 * 256 * sizeof(int) );

	if (err != cudaSuccess )

  return 1;

	err = cudaMalloc( (void **) &dB, 256 * 256 * sizeof(int) );

	if (err != cudaSuccess )

  return 1;

	err = cudaMalloc( (void **) &dC, 256 * 256 * sizeof(int) );

	if (err != cudaSuccess )

  return 1;

	cudaMemcpy(dA,A,mSize,cudaMemcpyHostToDevice);

	if (err != cudaSuccess )

  return 1;

	cudaMemcpy(dB,B,mSize,cudaMemcpyHostToDevice);

	if (err != cudaSuccess )

  return 1;

	cudaMemcpy(dC,C,mSize,cudaMemcpyHostToDevice);

	if (err != cudaSuccess )

  return 1;

	dim3 blok(16,16);

	dim3 grid(16,16);

	cAdd<<<grid,blok>>>(dA,dB,dC);

	cudaMemcpy(A,dA,mSize,cudaMemcpyDeviceToHost);

	cudaMemcpy(B,dB,mSize,cudaMemcpyDeviceToHost);

	cudaMemcpy(C,dC,mSize,cudaMemcpyDeviceToHost);

	

	cudaFree(dA);

	cudaFree(dB);

	cudaFree(dC);

	for (i = 20; i<30; i++)

  printf("%d \n", C[i][i]);

	getchar();

	return 0;

}

Are you compiling with “-arch=sm_11” on the nvcc command line?

yes i did. BUT as i said im using vs2005 with CUDA PROJECT WIZARD and that was the problem. When i go to project properties/CUDA/COMMAND LINE/ and add -arch sm_11

option compiler actualy ignores it when compiling code. I had to go visual sutdio/VC/VCprojectDefaults/cuda.properties to change command line. now it works fine

thanks

I’m having the same problem. Where are you changing the code? within VS08 or regedit? I try to look in both but I was unable to find the correct line to change

I was able to add the “-arch sm_11” to the custom build of my .cu file in VS08 (right click on .cu file and then select the custom build property). But when I do that the execution end with a “invalid device function” even if I do not use any atomic function. Without the “-arch sm_11” the program executes correctly, but I cannot use atomic.

Any ideas?

Thanks

PS: I’m running on Windows XP with 9800gx2

I solved my problem and now it compiles correctly. What I had to do is remove the tesla card C870 and leave inside the PC just the 9800gx2. For some reasons even if I was setting the CUDA device to 1 to skip the Tesla card, the compiler was still complaining.

The CUDA wizard doesn’t let you add compile flags? That should really be fixed…

Where do I add “-arch sm_11” when I compile a CUDA mex-file for Matlab ?

Hi my problem is same. “Where do I add “-arch sm_11” when I compile a CUDA mex-file for Matlab ?”

Please help.

In the file nvopts.sh

Thank you very much. Would please tell me where is the location of nvopts.sh file. I am new in CUDA programming and couldn’t find out the file. please help again.

Hello,
I couldn’t find nvopts.sh in Matlab_Cuda_1.1 for Windows. My atomic functions in mex files are showing error “undefined” during compilation. Please tell me what can I do now?

I’ve only used Matlab CUDA for Linux.

I am also having trouble compiling my CUDA code after adding atomic functions. I am using VS2010.

I have included the line:

#include <sm_12_atomic_functions.h>

This is the error I receive:

1>CudaBuild:

1>  Compiling CUDA source file main.cu...

1>  

1>  C:\Users\Santos\Documents\Visual Studio 2010\Projects\CUDA_SPH\CUDA_OpenGL>"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\bin\nvcc.exe" -gencode=arch=compute_10,code=\"sm_10,compute_10\" --use-local-env --cl-version 2008 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin"  -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include"  -G0  --keep-dir "Debug\" -maxrregcount=32  --machine 32 --compile -arch sm_12  -D_NEXUS_DEBUG -g    -Xcompiler "/EHsc /nologo /Od /Zi  /MDd " -o "Debug\main.obj" "C:\Users\Santos\Documents\Visual Studio 2010\Projects\CUDA_SPH\CUDA_OpenGL\main.cu" 

1>  main.cu

1>  tmpxft_00000e2c_00000000-3_main.compute_12.cudafe1.gpu

1>  tmpxft_00000e2c_00000000-7_main.compute_12.cudafe2.gpu

1>  main.cu

1>C:/Users/Santos/Documents/Visual Studio 2010/Projects/CUDA_SPH/CUDA_OpenGL/main.cu(405): error : identifier "atomicExch" is undefined

1>  

1>C:/Users/Santos/Documents/Visual Studio 2010/Projects/CUDA_SPH/CUDA_OpenGL/main.cu(411): error : identifier "atomicAdd" is undefined

1>  

1>  2 errors detected in the compilation of "C:/Users/Santos/AppData/Local/Temp/tmpxft_00000e2c_00000000-9_main.compute_10.cpp1.ii".

1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 3.2.targets(272,3): error MSB3721: The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\bin\nvcc.exe" -gencode=arch=compute_10,code=\"sm_10,compute_10\" --use-local-env --cl-version 2008 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin"  -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include"  -G0  --keep-dir "Debug\" -maxrregcount=32  --machine 32 --compile -arch sm_12  -D_NEXUS_DEBUG -g    -Xcompiler "/EHsc /nologo /Od /Zi  /MDd " -o "Debug\main.obj" "C:\Users\Santos\Documents\Visual Studio 2010\Projects\CUDA_SPH\CUDA_OpenGL\main.cu"" exited with code 2.

1>

1>Build FAILED.

I can see that the flag -arch sm_12 is set, so I am unsure as to why this is failing. If anyone has any insight I would very much appreciate it. Thanks for your time.

Ok, I got it to compile and I just wanted to post the solution for any other VS2010 users.
Navigate to:
Project->Properties->Configuration Properties->CUDA C/C+±>Device
Then under “Code Generation”
Change “compute_10,sm_10” to “compute_20,sm_20”.

Thanks for your reply. Is there anyone who have used atomic functions of cuda in matlab mex files in windows??? I badly need it for my research, plz help me.