I would do it like this:
[font=“Courier New”][codebox]include <stdio.h>
include “cutil_inline.h”
define ELEMS(a) (sizeof((a))/sizeof((a)[0]))
int a = {1, 2, 3};
constant int a_c[ELEMS(a)];
char *cudaErrorString=
{
"No errors",
"Missing configuration error",
"Memory allocation error",
"Initialization error",
"Launch failure",
"Prior launch failure",
"Launch timeout error",
"Launch out of resources error",
"Invalid device function",
"Invalid configuration",
"Invalid device",
"Invalid value",
"Invalid pitch value",
"Invalid symbol",
"Map buffer object failed",
"Unmap buffer object failed",
"Invalid host pointer",
"Invalid device pointer",
"Invalid texture",
"Invalid texture binding",
"Invalid channel descriptor",
"Invalid memcpy direction"
};
global void testkernel( int *b, int n )
{
for( int i = 0; i < n; i++ ) b[i] = a_c[i];
}
int main()
{
int b[ ELEMS( a )];
int *b_d;
cudaError_t cerr;
cudaMalloc((void**)&b_d, sizeof(B));
cerr=cudaMemcpyToSymbol("a_c", a, sizeof(a_c), 0, cudaMemcpyHostToDevice);
if( cerr != cudaSuccess ) puts( cudaErrorString[ cerr ]);
testkernel <<< 1, 1 >>> ( b_d, ELEMS(a) );
cutilCheckMsg("Kernel execution failed");
cudaMemcpy( b, b_d, sizeof(B), cudaMemcpyDeviceToHost );
int succes = 1;
for( int i = 0; i < ELEMS(a); i++) { printf( "%d %d %d\n", i, a[i], b[i] ); if( a[i] != b[i] ) succes=0; }
puts( succes ? "Passed" : "Failed" );
}[/codebox][/font]
I’ve used constant memory here. If your arrays do not fit in constant memory, you can put them in global memory, using the normal cudaMalloc() and cudaMemcpy() as used in (all) the SDK examples. I think constant memory is faster.
The array of errorstrings is a bit unnecessary, should have used cudaGetErrorString() as you did.