Hello everyone!
I am using OptiX 8.0 for a project, and while optimizing memory usage I noticed something that looks like a GPU memory leak, but which is likely to also be a misunderstanding on my part, hence, my question below.
I set up a simple for-loop in C++ which calls optixDeviceContextCreate and optixDeviceContextDestroy repeatedly. I notice that as this runs, my GPU memory usage grows slow and steady.
- The VRAM usage keeps growing with more iterations. E.g., if I set it to 2.5k, the program will go from ~230 MiB VRAM to >1.1 GiB VRAM.
- This isn’t a critical issue—I understand it’s uncommon to actually create/destroy thousands of contexts in a single application; I just want to confirm my understanding of how the API works.
- Is this expected? If not, what is the correct way to fully clean up the OptiX device context?
Thank you in advance!
Please find additional information like a repro program below
My system:
Ubuntu 20.04.6 LTS
GPU: NVIDIA RTX 4500 Ada Generation
Driver: 570.133.20
CUDA Driver Version: 12.8
CUDA Runtime Version: 12.6
OptiX Header Version: 8.0.0
Snippet to reproduce this leak (requires only CUDA RT API and the OptiX headers). Disclaimer: After I identified the strange memory behavior, I created the repro snippet with help from an AI code assistant.
// Standalone OptiX context memory leak reproducer.
// Tests whether optixDeviceContextCreate/Destroy leaks memory.
#include <cuda_runtime_api.h>
#include <optix.h>
#include <optix_function_table_definition.h>
#include <optix_stubs.h>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <thread>
#define CUDA_CHECK(call) \
do { \
cudaError_t rc = call; \
if (rc != cudaSuccess) { \
std::cerr << "CUDA error " << cudaGetErrorName(rc) << " at " << __FILE__ << ":" << __LINE__ << ": " \
<< cudaGetErrorString(rc) << std::endl; \
exit(1); \
} \
} while (0)
#define OPTIX_CHECK(call) \
do { \
OptixResult res = call; \
if (res != OPTIX_SUCCESS) { \
std::cerr << "OptiX error " << res << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
exit(1); \
} \
} while (0)
static void context_log_cb(unsigned int level, const char *tag, const char *message, void *) {
// Silence logs
}
double get_gpu_memory_used_mb() {
size_t free_bytes, total_bytes;
CUDA_CHECK(cudaMemGetInfo(&free_bytes, &total_bytes));
return static_cast<double>(total_bytes - free_bytes) / (1024.0 * 1024.0);
}
int main(int argc, char **argv) {
int num_iterations = 150;
int warmup_iterations = 10;
if (argc > 1) {
num_iterations = std::atoi(argv[1]);
}
std::cout << "OptiX Context Memory Leak Reproducer" << std::endl;
std::cout << "=====================================" << std::endl;
// Initialize CUDA
CUDA_CHECK(cudaSetDevice(0));
cudaDeviceProp props;
CUDA_CHECK(cudaGetDeviceProperties(&props, 0));
std::cout << "GPU: " << props.name << std::endl;
CUDA_CHECK(cudaFree(0)); // Force context creation
// Initialize OptiX (once)
OPTIX_CHECK(optixInit());
std::cout << "OptiX initialized" << std::endl;
// Warmup
std::cout << "\nWarmup (" << warmup_iterations << " iterations)..." << std::endl;
for (int i = 0; i < warmup_iterations; i++) {
OptixDeviceContext context = nullptr;
OPTIX_CHECK(optixDeviceContextCreate(nullptr, 0, &context));
OPTIX_CHECK(optixDeviceContextSetLogCallback(context, context_log_cb, nullptr, 3));
OPTIX_CHECK(optixDeviceContextSetCacheEnabled(context, 0)); // Disable cache
OPTIX_CHECK(optixDeviceContextDestroy(context));
}
CUDA_CHECK(cudaDeviceSynchronize());
std::this_thread::sleep_for(std::chrono::milliseconds(200));
// Measure starting memory
double start_gpu_mb = get_gpu_memory_used_mb();
std::cout << "\nStarting GPU memory: " << std::fixed << std::setprecision(2) << start_gpu_mb << " MB" << std::endl;
// Main test loop
std::cout << "Running " << num_iterations << " create/destroy cycles..." << std::endl;
auto start_time = std::chrono::steady_clock::now();
for (int i = 0; i < num_iterations; i++) {
OptixDeviceContext context = nullptr;
OPTIX_CHECK(optixDeviceContextCreate(nullptr, 0, &context));
OPTIX_CHECK(optixDeviceContextSetLogCallback(context, context_log_cb, nullptr, 3));
OPTIX_CHECK(optixDeviceContextSetCacheEnabled(context, 0));
OPTIX_CHECK(optixDeviceContextDestroy(context));
if ((i + 1) % 100 == 0) {
CUDA_CHECK(cudaDeviceSynchronize());
double current_mb = get_gpu_memory_used_mb();
std::cout << " Iteration " << (i + 1) << ": GPU = " << current_mb << " MB" << std::endl;
}
}
auto end_time = std::chrono::steady_clock::now();
CUDA_CHECK(cudaDeviceSynchronize());
std::this_thread::sleep_for(std::chrono::milliseconds(200));
// Measure ending memory
double end_gpu_mb = get_gpu_memory_used_mb();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "\n=====================================" << std::endl;
std::cout << "Results:" << std::endl;
std::cout << " Iterations: " << num_iterations << std::endl;
std::cout << " Duration: " << duration.count() << " ms" << std::endl;
std::cout << " Start GPU memory: " << start_gpu_mb << " MB" << std::endl;
std::cout << " End GPU memory: " << end_gpu_mb << " MB" << std::endl;
std::cout << " Memory change: " << (end_gpu_mb - start_gpu_mb) << " MB" << std::endl;
double leak_per_instance_kib = (end_gpu_mb - start_gpu_mb) * 1024.0 / num_iterations;
std::cout << " Leak per context: " << leak_per_instance_kib << " KiB" << std::endl;
if (leak_per_instance_kib > 1.0) {
std::cout << "\n⚠️ LEAK CONFIRMED: ~" << leak_per_instance_kib
<< " KiB per optixDeviceContextCreate/Destroy cycle" << std::endl;
} else {
std::cout << "\nâś… No significant leak detected" << std::endl;
}
return 0;
}
Sample output on my machine:
GPU: NVIDIA RTX 4500 Ada Generation
OptiX initialized
Warmup (10 iterations)...
Starting GPU memory: 232.69 MB
Running 2500 create/destroy cycles...
Iteration 100: GPU = 270.69 MB
<snip>
Iteration 2500: GPU = 1182.69 MB
=====================================
Results:
Iterations: 2500
Duration: 166714 ms
Start GPU memory: 232.69 MB
End GPU memory: 1182.69 MB
Memory change: 950.00 MB
Leak per context: 389.12 KiB
⚠️ LEAK CONFIRMED: ~389.12 KiB per optixDeviceContextCreate/Destroy cycle