Hello
I’ve tried to run some basic code of CUDA (add 2 arrays of vectors).
In Emu-mode everything’s OK, but in Release and Debug mode I’ve got an error “invalid device function”.
Program is running, got cudaGetErrorString() returns this error. Device function doesn’t modify any data.
Could you look at my code?:
[codebox]
#include <stdio.h>
#include <cutil.h>
global void vecAdd (float3 a[10], float3 b[10]) {
int i = threadIdx.x;
a[i].x += b[i].x;
a[i].y += b[i].y;
a[i].z += b[i].z;
}
int main(int argc, char** argv) {
CUT_DEVICE_INIT(argc, argv);
float3 *a_h, *b_h, *a_d, *b_d, *result;
int arraySize = 10;
int sizeInBytes = arraySize * sizeof(float3);
a_h = (float3*)malloc(sizeInBytes);
b_h = (float3*)malloc(sizeInBytes);
result = (float3*)malloc(sizeInBytes);
CUDA_SAFE_CALL(cudaMalloc((void**)&a_d, sizeInBytes));
CUDA_SAFE_CALL(cudaMalloc((void**)&b_d, sizeInBytes));
for(int i = 0; i < arraySize; i++) {
a_h[i].x = 100.0f + i;
a_h[i].y = 200.0f + i;
a_h[i].z = 300.0f + i;
b_h[i].x = 50.0f + i;
b_h[i].y = 50.0f + i;
b_h[i].z = 50.0f + i;
}
CUDA_SAFE_CALL(cudaMemcpy(a_d, a_h, sizeInBytes, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(b_d, b_h, sizeInBytes, cudaMemcpyHostToDevice));
vecAdd<<<1, arraySize>>>(a_d, b_d);
printf("%s\n", cudaGetErrorString( cudaGetLastError() ) );
CUDA_SAFE_CALL(cudaMemcpy(result, a_d, sizeInBytes, cudaMemcpyDeviceToHost));
for(int i = 0; i < arraySize; i++){
printf("Wartość 8. elementu %f, %f, %f \n", result[i].x, result[i].y, result[i].z);
}
CUT_EXIT(0, NULL);
}
[/codebox]