How to control where cudaMallocManaged allocates buffer (device or host)

I’m starting to develop an application where I would like to use the unified memory to allocate the buffer on the device.

However, I notice that on my desktop system (Dell 7920 with Quadro P2000) the memory is allocated on the host but on my laptop (Acer aspire with GeForce GTX860M)it is allocated on the device.

I am verifying this by using the call cudaMemGetInfo() to see how much memory is available before and after the cudaMallocManaged call.

I seemed to have tried every combination of IOMMU settings as well, thinking that had something to do with it, but it doesn’t seem to correct the problem.

Also, I am doing this under Ubuntu 18.04 with kernel 4.15 and cuda 10.0

here is the rest of the code below:
//compile with:
// nvcc -arch=sm_52 cudaMallocManagedTest.cu
//test:
// CUDA_VISIBLE_DEVICES=“0” ./a.out

#include <stdio.h>
#include <unistd.h>
#include <cuda.h>
#include “cuda_runtime.h”

void report_gpu_mem()
{
size_t free, total;
cudaMemGetInfo(&free, &total);
printf(“Free = %lui Total = %lui\n”,free, total);
}

void main()
{
short* a,* a_out;
size_t size = 4096;
int status;
unsigned int flag = 1;

//printf("--init--\n");
report_gpu_mem();
//cudaMalloc((void**)&a, size);
//status = cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,(CUdeviceptr) a); //requires -lcuda for compile
cudaMallocManaged((void**)&a, size, cudaMemAttachGlobal);
//printf("--malloc managed--\n");
report_gpu_mem();
cudaMallocHost((void**)&a_out, size);
cudaDeviceSynchronize();
//printf("--malloc host--\n");
report_gpu_mem();

cudaMemcpy(a, a_out, size, cudaMemcpyDefault);
struct cudaPointerAttributes attributes;
cudaError_t error = cudaPointerGetAttributes (&attributes, a );
cudaError_t err = 0;
printf("error=%d, %s, %s\n", err, cudaGetErrorName(err),cudaGetErrorString(err));

printf("Memory type for d_data(0=unregistered, 1=host, 2=device, 3=managed): %i\n",attributes.type);
printf("device:  %i\n",attributes.device);

cudaFree(a);
report_gpu_mem();
cudaFree(a_out);
report_gpu_mem();
cudaDeviceReset();
return;

}