Hello!
How can I get amount of shared VRAM (used and total) from command line (the same information I can see in the windows task manager)?
Is it possible somehow using nvidia-smi?
This is how it appears in task manager:
Thank you.
Hello!
How can I get amount of shared VRAM (used and total) from command line (the same information I can see in the windows task manager)?
Is it possible somehow using nvidia-smi?
This is how it appears in task manager:
Thank you.
In nvidia-smi -q
there seem to be BAR1 section that should reflect shared VRAM - though total is correct, but used values seems to be always off:
BAR1 Memory Usage
Total : 8192 MiB
Used : 8165 MiB
Free : 27 MiB
Actual usage - Imgur: The magic of the Internet
I’ve wrote a small c++ program but it seems to get the exactly same results and couldn’t retrieve the actual used shared memory. Results of the program:
Found 1 CUDA devices.
Before allocation:
Device 0 VRAM Usage: 0.15 GB VRAM Free: 5.85 GB (Total: 6.00 GB)
BAR1 Memory Total: 8165 MB
BAR1 Memory Total: 8192 MB
BAR1 Memory Free: 26 MB
Device 0 VRAM Usage: 0.25 GB VRAM Free: 5.75 GB (Total: 6.00 GB)
BAR1 Memory Total: 8165 MB
BAR1 Memory Total: 8192 MB
BAR1 Memory Free: 26 MB
Press any key to continue . . .
Checking task manager before pressing any key and finishing the program:
Program code:
#include <chrono>
#include <cuda_runtime.h>
#include <iomanip>
#include <iostream>
#include <nvml.h>
#include <thread>
void checkCudaError(cudaError_t error, const char *file, int line) {
if (error != cudaSuccess) {
std::cerr << file << ":" << line << " CUDA error: " << cudaGetErrorString(error)
<< std::endl;
exit(EXIT_FAILURE);
}
}
#define CHECK_CUDA(call) checkCudaError(call, __FILE__, __LINE__)
void printVRAMUsage(nvmlDevice_t device, int deviceIndex) {
// Add a small delay to allow for any potential lag in memory reporting
std::this_thread::sleep_for(std::chrono::seconds(1));
nvmlMemory_t memory;
nvmlReturn_t result = nvmlDeviceGetMemoryInfo(device, &memory);
if (NVML_SUCCESS != result) {
std::cerr << "Failed to get memory info: " << nvmlErrorString(result) << std::endl;
return;
}
std::cout << "Device " << deviceIndex << " VRAM Usage: " << std::fixed << std::setprecision(2)
<< static_cast<double>(memory.used) / (1024 * 1024 * 1024) << " GB"
<< " VRAM Free: " << std::fixed << std::setprecision(2)
<< static_cast<double>(memory.free) / (1024 * 1024 * 1024) << " GB"
<< " (Total: " << static_cast<double>(memory.total) / (1024 * 1024 * 1024) << " GB)"
<< std::endl;
nvmlBAR1Memory_t bar1Memory;
result = nvmlDeviceGetBAR1MemoryInfo(device, &bar1Memory);
if (NVML_SUCCESS == result) {
std::cout << "BAR1 Memory Total: " << bar1Memory.bar1Used / (1024 * 1024) << " MB"
<< std::endl;
std::cout << "BAR1 Memory Total: " << bar1Memory.bar1Total / (1024 * 1024) << " MB"
<< std::endl;
std::cout << "BAR1 Memory Free: " << bar1Memory.bar1Free / (1024 * 1024) << " MB"
<< std::endl;
}
}
void run_cudaHostAlloc(size_t gb) {
size_t size = gb * 1024 * 1024 * 1024;
void *ptr;
cudaError_t result = cudaHostAlloc(&ptr, size, cudaHostAllocMapped);
if (result != cudaSuccess) {
std::cerr << "Failed to allocate zero-copy memory: " << cudaGetErrorString(result)
<< std::endl;
return;
}
}
int main() {
nvmlReturn_t result;
unsigned int device_count, i;
nvmlDevice_t device;
result = nvmlInit();
if (NVML_SUCCESS != result) {
std::cerr << "Failed to initialize NVML: " << nvmlErrorString(result) << std::endl;
return 1;
}
result = nvmlDeviceGetCount(&device_count);
if (NVML_SUCCESS != result) {
std::cerr << "Failed to get device count: " << nvmlErrorString(result) << std::endl;
nvmlShutdown();
return 1;
}
std::cout << "Found " << device_count << " CUDA devices." << std::endl;
for (i = 0; i < device_count; i++) {
result = nvmlDeviceGetHandleByIndex(i, &device);
if (NVML_SUCCESS != result) {
std::cerr << "Failed to get device handle: " << nvmlErrorString(result) << std::endl;
continue;
}
std::cout << "Before allocation:" << std::endl;
printVRAMUsage(device, i);
run_cudaHostAlloc(4);
printVRAMUsage(device, i);
system("pause");
}
nvmlShutdown();
return 0;
}
Can be compiled with
g++ -o vram_usage vram_usage.cpp -I "\NVIDIA GPU Computing Toolkit\CUDA\v12.6\include" -L "\NVIDIA GPU Computing Toolkit\CUDA\v12.6\lib\x64" -lcuda -lcudart -lnvml && .\vram_usage.exe
I’ve found that I can seem to retrieve shared memory from performance counters using PowerShell: Get-Counter -Counter '\GPU Adapter Memory(*)\Shared Usage'
.
No idea what’s the proper way to decode those luids but it seems first one is shared gpu memory by cpu integrated videocard, couldn’t figure what’s the second one (8192) and the third one is nvidia videocard shared gpu memory, exactly the same number as in task manager. So adding them all up gives us actual shared VRAM usage.
Still no idea why I can’t just retrieve it from nvidia-smi
.
Timestamp CounterSamples
--------- --------------
11.08.2024 00:02:25 \\pc\gpu adapter memory(luid_0x00000000_0x00013245_phys_0)\shared usage :
1200861184
\\pc\gpu adapter memory(luid_0x00000000_0x00013658_phys_0)\shared usage :
8192
\\pc\gpu adapter memory(luid_0x00000000_0x0001368a_phys_0)\shared usage :
262144