On a “standard” CUDA linux install, there is an NVML example code given in /usr/local/cuda/nvml/example
.
If I modify that code (CUDA 12.2) as follows:
// This is a simple example on how you can modify GPU's state
result = nvmlDeviceGetComputeMode(device, &compute_mode);
if (NVML_ERROR_NOT_SUPPORTED == result)
printf("\t This is not CUDA capable device\n");
else if (NVML_SUCCESS != result)
{
printf("Failed to get compute mode for device %u: %s\n", i, nvmlErrorString(result));
goto Error;
}
else
{
#if 0
// try to change compute mode
printf("\t Changing device's compute mode from '%s' to '%s'\n",
convertToComputeModeString(compute_mode),
convertToComputeModeString(NVML_COMPUTEMODE_PROHIBITED));
result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED);
if (NVML_ERROR_NO_PERMISSION == result)
printf("\t\t Need root privileges to do that: %s\n", nvmlErrorString(result));
else if (NVML_ERROR_NOT_SUPPORTED == result)
printf("\t\t Compute mode prohibited not supported. You might be running on\n"
"\t\t windows in WDDM driver model or on non-CUDA capable GPU\n");
else if (NVML_SUCCESS != result)
{
printf("\t\t Failed to set compute mode for device %u: %s\n", i, nvmlErrorString(result));
goto Error;
}
else
{
printf("\t Restoring device's compute mode back to '%s'\n",
convertToComputeModeString(compute_mode));
result = nvmlDeviceSetComputeMode(device, compute_mode);
if (NVML_SUCCESS != result)
{
printf("\t\t Failed to restore compute mode for device %u: %s\n", i, nvmlErrorString(result));
goto Error;
}
}
#else
unsigned int infoCount = 1;
nvmlProcessInfo_t infos[8];
result = nvmlDeviceGetComputeRunningProcesses_v2(device, &infoCount, infos);
if (NVML_SUCCESS != result) printf("get compute running processes returned: %d, %s\n", (int)result, nvmlErrorString(result));
else printf("infoCount = %u\n", infoCount);
#endif
}
}
result = nvmlShutdown();
and then use the supplied makefile to build it, I get an output like this on a machine with a single L4 GPU, when no compute process is running on that GPU:
# ./example
Found 1 device
Listing devices:
0. NVIDIA L4 [00000000:82:00.0]
infoCount = 0
All done.
Press ENTER to continue...
OTOH if I run a trivial compute process that does a cudaSetDevice(0)
and then sleep()
for a number of seconds, and concurrently run the same example, I get this:
# ./example
Found 1 device
Listing devices:
0. NVIDIA L4 [00000000:82:00.0]
infoCount = 1
All done.
Press ENTER to continue...
So the mechanism seems to work for me.
(For future readers who may find my usage of the _v2 variant of the API call a bit unusual, there was a kerfuffle recently with the development path of NVML. I don’t wish to go into it here. See here for some detail. I happened to be using a 535.86.10 driver)