CUDA code doesn't run on Release mode

Hello

I’ve tried to run some basic code of CUDA (add 2 arrays of vectors).

In Emu-mode everything’s OK, but in Release and Debug mode I’ve got an error “invalid device function”.

Program is running, got cudaGetErrorString() returns this error. Device function doesn’t modify any data.

Could you look at my code?:

[codebox]

#include <stdio.h>

#include <cutil.h>

global void vecAdd (float3 a[10], float3 b[10]) {

int i = threadIdx.x;

a[i].x += b[i].x;

a[i].y += b[i].y;

a[i].z += b[i].z;

}

int main(int argc, char** argv) {

CUT_DEVICE_INIT(argc, argv);

float3 *a_h, *b_h, *a_d, *b_d, *result;

int arraySize = 10;

int sizeInBytes = arraySize * sizeof(float3);

a_h = (float3*)malloc(sizeInBytes);

b_h = (float3*)malloc(sizeInBytes);

result = (float3*)malloc(sizeInBytes);

CUDA_SAFE_CALL(cudaMalloc((void**)&a_d, sizeInBytes));

CUDA_SAFE_CALL(cudaMalloc((void**)&b_d, sizeInBytes));

for(int i = 0; i < arraySize; i++) {

	a_h[i].x = 100.0f + i;

	a_h[i].y = 200.0f + i;

	a_h[i].z = 300.0f + i;

	b_h[i].x = 50.0f + i;

	b_h[i].y = 50.0f + i;

	b_h[i].z = 50.0f + i;

}

CUDA_SAFE_CALL(cudaMemcpy(a_d, a_h, sizeInBytes, cudaMemcpyHostToDevice));

CUDA_SAFE_CALL(cudaMemcpy(b_d, b_h, sizeInBytes, cudaMemcpyHostToDevice));



vecAdd<<<1, arraySize>>>(a_d, b_d); 

printf("%s\n", cudaGetErrorString( cudaGetLastError() ) );

CUDA_SAFE_CALL(cudaMemcpy(result, a_d, sizeInBytes, cudaMemcpyDeviceToHost));



for(int i = 0; i < arraySize; i++){

	printf("Wartość 8. elementu %f, %f, %f \n", result[i].x, result[i].y, result[i].z);

}

CUT_EXIT(0, NULL);

}

[/codebox]

Which device?
What about your compiler commands?

PS: in my computer it’s running well…

I used CUDA VS Wizard 2.0 (I didn’t change anything).

This is compiler commandline:
nvcc.exe -ccbin “C:\Program Files\Microsoft Visual Studio 8\VC\bin” -I"C:\CUDA\include" -I"C:\Program Files\NVIDIA Corporation\NVIDIA CUDA SDK\common\inc" -O2 -D_CONSOLE -arch sm_10 -code sm_10 --host-compilation C++ -c -m 32 -o “Release\CUDAWinApp1.obj” -odir “Release” -ext none -int none CUDAWinApp1.vcproj

nvcc.exe -ccbin “C:\Program Files\Microsoft Visual Studio 8\VC\bin” -I"C:\CUDA\include" -I"C:\Program Files\NVIDIA Corporation\NVIDIA CUDA SDK\common\inc" -O2 -D_CONSOLE -arch sm_10 -code sm_10 --host-compilation C++ -c -m 32 -o “Release\CUDAWinApp1.obj” -odir “Release” -ext none -int real CUDAWinApp1.vcproj

change -int none to real