__device__ int addDevice( int a, int b ) {
return a + b;
}
__global__ void add( int a, int b, int *c ) {
*c = addDevice( a, b );
}
int main( void ) {
int c;
int *dev_c;
HANDLE_ERROR( cudaHostAlloc( (void**)&dev_c, sizeof(int), cudaHostAllocDefault ) );
add<<<1,1>>>( 1, 9, dev_c );
HANDLE_ERROR( cudaMemcpy( &c, dev_c, sizeof(int),
cudaMemcpyDeviceToHost ) );
printf( "1 + 9 = %d\n", c );
HANDLE_ERROR( cudaFreeHost( dev_c ) );
return 0;
}
It seems that the problem comes from the Memcpy function : invalid argument.
cudaHostAlloc() returns a host pointer, even if the [font=“Courier New”]cudaHostAllocMapped[/font] flag were specified (which isn’t in the example above). You still need to call cudaHostGetDevicePointer() to obtain the corresponding device pointer for mapped memory. Only under certain conditions (UVA) will these pointers be the same.