cudaMallocManaged suceeds, but memory access fails, for size greater than hardware memory

I’m using a GTX1080Ti in a Fedora machine running kernel 4.10.11.

Using cudaMallocManaged, I can allocate up to the amount of hardware memory in my machine, and touch it all. cudaMallocManaged will also successfully return from an allocation request for more than the available hardware memory, presumably because the machine has more virtual memory available. However, touching all of that memory causes cuda-memcheck to report an error.

If using all the memory I request is going to cause an error, shouldn’t cudaMallocManaged return an error when I request too much memory in the first place?

Below is an example program reproing the issue:

#include <iostream>
#include <cstring>
#include <unistd.h>

static void CheckCudaErrorAux (const char *, unsigned, const char *, cudaError_t);
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)

// My hardware memory size is 128GB
// (My virtual memory size is 168GB)

size_t hardwareMemoryAvaialble(){
  // this works on Linux 4.10.11
  return (size_t)sysconf( _SC_PHYS_PAGES ) *
          (size_t)sysconf( _SC_PAGESIZE );
}

// I'm just going to assume you have at least 10%
// more virtual than hw memory available.
const size_t gHwMemory = hardwareMemoryAvaialble();
const size_t gDeltaMemory = gHwMemory/10;

int main(void)
{
  // It's no problem allocating and touching memory allocated
  // with cudaMallocManaged, when we allocate 90% of real memory:
  std::cout << "Hardware memory is " << gHwMemory << std::endl;
  {
    char* fillDataA = 0;
    const size_t fillSizeA = gHwMemory - gDeltaMemory;
    CUDA_CHECK_RETURN(cudaMallocManaged((void ** )&fillDataA, fillSizeA));
    std::cout << "successfully allocated " << fillSizeA << " bytes."
        << std::endl;

    std::memset(fillDataA, 'a', fillSizeA);
    CUDA_CHECK_RETURN(cudaFree(fillDataA));
    std::cout << "all ok for allocating less than hw mem" << std::endl;
  }

  // I can allocate 110% of hardware memory, but if I touch it all,
  // cuda-memcheck will report something similar to:
  // ========= Fatal UVM CPU fault due to out of memory
  // =========     during write access to address 0x4588480000

  // Shouldn't I expect cudaMallocManaged to fail, if touching all the memory is going to fail?
  {
    char* fillDataB = 0;

    const size_t fillSizeB = gHwMemory + gDeltaMemory;
    CUDA_CHECK_RETURN(cudaMallocManaged((void ** )&fillDataB, fillSizeB));
    std::cout << "successfully allocated " << fillSizeB << " bytes."
        << std::endl;
    std::memset(fillDataB, 'b', fillSizeB);
    CUDA_CHECK_RETURN(cudaFree(fillDataB));
    std::cout
        << "all ok for allocating more than hw mem "
        << "(don't expect to get here using cuda-memcheck)"
        << std::endl;
  }
  return 0;
}

static void CheckCudaErrorAux (const char *file, unsigned line,
    const char *statement, cudaError_t err)
{
	if (err == cudaSuccess)
		return;
	std::cerr << statement<<" returned " << cudaGetErrorString(err)
	    << "("<<err<< ") at "<<file<<":"<<line << std::endl;
	exit (1);
}

On my machine, the result of running the above code under cuda-memcheck is:

========= CUDA-MEMCHECK
Hardware memory is 135124774912
successfully allocated 121612297421 bytes.
all ok for allocating less than hw mem
successfully allocated 148637252403 bytes.
========= Error: process didn't terminate successfully
========= Fatal UVM CPU fault due to out of memory
=========     during write access to address 0x4589880000
=========
========= ERROR SUMMARY: 1 error

The out of memory issue may not be discoverable by cudaMallocManaged. I think the advice is somewhat similar to many other malloc type recommendations: don’t attempt to allocate all your system memory.

If you like, file an RFE, which is just a bug, at developer.nvidia.com, with the RFE or request for enhancement indicated somewhere in your text.

Thanks Bob. “Doc, it hurts when I do that…”