Maybe I’m making a stupid mistake; the following code fails on my machine.
#include <assert.h>
#include <cuda.h>
#include <stdio.h>
#define CU_ASSERT(a) (cudaAssert(a, #a, __PRETTY_FUNCTION__, __LINE__))
void cudaAssert(cudaError err, const char *function_name,
const char *function, int line);
__global__ void simple_shared() {
__shared__ int a;
atomicAdd(&a, 1);
}
int main() {
simple_shared<<<1, 1>>>();
CU_ASSERT(cudaGetLastError());
CU_ASSERT(cudaThreadSynchronize());
return 0;
}
cudaAssert is pretty much just what it says – it just prints cudaGetErrorString. The output is
ERROR - [int main():16] - ‘cudaGetLastError()’ failed, error 'invalid device function '. It seems to work fine when I take out the shared variable, or do standard operations (read/write). I have an 8600gts, nvcc built “Tue_Jun_10_05:42:45_PDT_2008”, nvidia driver 177.13, and am running SuSE 10.3 (kernel 2.6.22).
Thanks in advance.