Hi!
Is it correct that the host cannot access memory allocated on the GPU heap by a kernel calling malloc? The CUDA Guide doesn’t mention anything, but given all the other restrictions, it wouldn’t surprise me.
The following code fails at cudaMemcpy on line 27 on a K80:
#include <cstdio>
#include <cassert>
__device__ int *ptr = NULL;
__global__ void kernel()
{
if ((threadIdx.x==0 && threadIdx.y == 0 && threadIdx.z == 0) &&
(blockIdx.x==0 && blockIdx.y == 0 && blockIdx.z == 0) ) {
ptr = (int*)malloc(4);
*ptr = 42;
}
}
int main(int argc, char *argv[])
{
kernel<<<1,1>>>();
cudaDeviceSynchronize();
assert( cudaSuccess == cudaGetLastError() );
int *hptr = NULL;
assert( cudaSuccess == cudaMemcpyFromSymbol(&hptr, ptr, 4, 0) );
printf("ptr = %p\n", hptr);
int val;
assert( cudaSuccess == cudaMemcpy(&val, hptr, 4, cudaMemcpyDeviceToHost) );
printf("*ptr = %d\n", val);
return 0;
}
Thanks,
nargin