Description
I monitored the memory footprint on the GPU when using the CUDA green context API.
Observation
Creating a green context increases the memory footprint on the GPU – this is expected.
But when the green context is destroyed with cuGreenCtxDestroy(), the allocated memory on GPU is not / only partly freed.
I checked the amount of leaking memory across different CUDA versions.
Using versions 12.6, 12.8 and 13.0 allocate ~4MiB per green context and do not free any of that memory on destruction.
I created a minimal working example to reproduce this:
#include "cuda.h"
#include "cuda_runtime.h"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <vector>
static uint64_t GetMemoryUsage() {
uint64_t free_byte = 0;
uint64_t total_byte = 0;
auto cuda_status = cudaMemGetInfo(&free_byte, &total_byte);
return (total_byte - free_byte) / 1000000;
}
int main(int /*argc*/, const char * /*argv*/[]) {
cudaDeviceProp device_properties{};
CUdevResource full_gpu;
cudaGetDeviceProperties(&device_properties, 0);
cuDeviceGetDevResource(0, &full_gpu, CU_DEV_RESOURCE_TYPE_SM);
// Assume the minimum SM count per green context is 4.
uint32_t minimum_sm_count = 4;
uint32_t split_count =
device_properties.multiProcessorCount / minimum_sm_count;
auto resources = std::vector<CUdevResource>(split_count);
// Split "full_gpu" into "split_count" SM partitions, each of size
// "minimum_sm_count".
cuDevSmResourceSplitByCount(
resources.data(), &split_count, &full_gpu, nullptr,
CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING, minimum_sm_count);
std::cout << "initial memory footprint=" << GetMemoryUsage() << std::endl;
for (uint32_t i = 0; i < split_count; ++i) {
CUgreenCtx green_context = nullptr;
// Create a resource descriptor.
CUdevResourceDesc resource_descriptor = nullptr;
cuDevResourceGenerateDesc(&resource_descriptor, resources.data() + i, 1);
cuGreenCtxCreate(&green_context, resource_descriptor, 0,
CU_GREEN_CTX_DEFAULT_STREAM);
CUcontext context = nullptr;
assert(cuCtxFromGreenCtx(&context, green_context) == CUDA_SUCCESS);
assert(cuGreenCtxDestroy(green_context) == CUDA_SUCCESS);
// The memory footprint keeps growing although the context is destroyed.
std::cout << "i=" << i << " memory footprint=" << GetMemoryUsage()
<< std::endl;
}
return 0;
}
Is this expected behavior?