I am using 1080Ti
on Ubuntu 16.04
with CUDA 10.2
and NVIDIA driver 440.59
.
1. I am trying to profile my PyTorch code using scalene. When I run my code as scalene main.py
, I get the following error:
Error in program being profiled:
Function Not Found
Traceback (most recent call last):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 782, in _nvmlGetFunctionPointer
_nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 386, in __getattr__
func = self.__getitem__(name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 391, in __getitem__
func = self._FuncPtr((name_or_ordinal, self))
AttributeError: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1: undefined symbol: nvmlDeviceGetComputeRunningProcesses_v2
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/scalene/scalene_profiler.py", line 1612, in profile_code
exec(code, the_globals, the_locals)
File "./code/main.py", line 1, in <module>
import numpy as np
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/numpy/__init__.py", line 140, in <module>
from . import core
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/numpy/core/__init__.py", line 22, in <module>
from . import multiarray
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/numpy/core/multiarray.py", line 12, in <module>
from . import overrides
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/numpy/core/overrides.py", line 9, in <module>
from numpy.compat._inspect import getargspec
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/numpy/compat/__init__.py", line 14, in <module>
from .py3k import *
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/scalene/scalene_profiler.py", line 719, in cpu_signal_handler
(gpu_load, gpu_mem_used) = Scalene.__gpu.get_stats()
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/scalene/scalene_gpu.py", line 110, in get_stats
mem_used = self.gpu_memory_usage(self.__pid)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/scalene/scalene_gpu.py", line 101, in gpu_memory_usage
for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2223, in nvmlDeviceGetComputeRunningProcesses
return nvmlDeviceGetComputeRunningProcesses_v2(handle);
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2191, in nvmlDeviceGetComputeRunningProcesses_v2
fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses_v2")
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 785, in _nvmlGetFunctionPointer
raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND)
pynvml.nvml.NVMLError_FunctionNotFound: Function Not Found
To validate whether or not this issue is coming from scalene
library, I run the following commands:
>>> from pynvml import *
>>> nvmlInit()
>>> nvmlSystemGetDriverVersion()
b'440.59'
>>> handle = nvmlDeviceGetHandleByIndex(0)
>>> nvmlDeviceGetComputeRunningProcesses(handle)
Traceback (most recent call last):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 782, in _nvmlGetFunctionPointer
_nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 386, in __getattr__
func = self.__getitem__(name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 391, in __getitem__
func = self._FuncPtr((name_or_ordinal, self))
AttributeError: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1: undefined symbol: nvmlDeviceGetComputeRunningProcesses_v2
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2223, in nvmlDeviceGetComputeRunningProcesses
return nvmlDeviceGetComputeRunningProcesses_v2(handle);
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2191, in nvmlDeviceGetComputeRunningProcesses_v2
fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses_v2")
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 785, in _nvmlGetFunctionPointer
raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND)
pynvml.nvml.NVMLError_FunctionNotFound: Function Not Found
>>> nvmlDeviceGetGraphicsRunningProcesses(handle)
Traceback (most recent call last):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 782, in _nvmlGetFunctionPointer
_nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 386, in __getattr__
func = self.__getitem__(name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 391, in __getitem__
func = self._FuncPtr((name_or_ordinal, self))
AttributeError: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1: undefined symbol: nvmlDeviceGetGraphicsRunningProcesses_v2
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2260, in nvmlDeviceGetGraphicsRunningProcesses
return nvmlDeviceGetGraphicsRunningProcesses_v2(handle)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2228, in nvmlDeviceGetGraphicsRunningProcesses_v2
fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses_v2")
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 785, in _nvmlGetFunctionPointer
raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND)
pynvml.nvml.NVMLError_FunctionNotFound: Function Not Found
>>> list(map(str, nvmlDeviceGetGraphicsRunningProcesses(handle)))
Traceback (most recent call last):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 782, in _nvmlGetFunctionPointer
_nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 386, in __getattr__
func = self.__getitem__(name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 391, in __getitem__
func = self._FuncPtr((name_or_ordinal, self))
AttributeError: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1: undefined symbol: nvmlDeviceGetGraphicsRunningProcesses_v2
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2260, in nvmlDeviceGetGraphicsRunningProcesses
return nvmlDeviceGetGraphicsRunningProcesses_v2(handle)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2228, in nvmlDeviceGetGraphicsRunningProcesses_v2
fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses_v2")
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 785, in _nvmlGetFunctionPointer
raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND)
pynvml.nvml.NVMLError_FunctionNotFound: Function Not Found
>>> nvmlDeviceGetComputeRunningProcesses_v2(handle)
Traceback (most recent call last):
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 782, in _nvmlGetFunctionPointer
_nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 386, in __getattr__
func = self.__getitem__(name)
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/ctypes/__init__.py", line 391, in __getitem__
func = self._FuncPtr((name_or_ordinal, self))
AttributeError: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1: undefined symbol: nvmlDeviceGetComputeRunningProcesses_v2
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 2191, in nvmlDeviceGetComputeRunningProcesses_v2
fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses_v2")
File "/home/kube-admin/miniconda3/envs/temporl/lib/python3.8/site-packages/pynvml/nvml.py", line 785, in _nvmlGetFunctionPointer
raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND)
pynvml.nvml.NVMLError_FunctionNotFound: Function Not Found