I am using a virtual server and I have an A40 GPU, but I have the following problems:
Traceback (most recent call last):
File "/data/eperez/Workbench/cuda/pyCuda.py", line 42, in <module>
main()
File "/data/eperez/Workbench/cuda/pyCuda.py", line 26, in main
d_A = cuda.to_device(A)
^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 231, in _require_cuda_context
with _runtime.ensure_context():
File "/data/eperez/anaconda3/envs/test/lib/python3.12/contextlib.py", line 137, in __enter__
return next(self.gen)
^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 123, in ensure_context
newctx = self.get_or_create_context(None)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 138, in get_or_create_context
return self._get_or_create_context_uncached(devnum)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 155, in _get_or_create_context_uncached
return self._activate_context_for(0)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 177, in _activate_context_for
newctx = gpu.get_primary_context()
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/driver.py", line 671, in get_primary_context
driver.cuDevicePrimaryCtxRetain(byref(hctx), self.id)
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/driver.py", line 327, in safe_cuda_api_call
self._check_ctypes_error(fname, retcode)
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/driver.py", line 395, in _check_ctypes_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [801] Call to cuDevicePrimaryCtxRetain results in CUDA_ERROR_NOT_SUPPORTED
and the code used is:
from numba import cuda
2 import numpy as np
3
4 # Definir el kernel para realizar la suma de vectores en la GPU
5 @cuda.jit
6 def vector_sum(A, B, C):
7 # Obtener el índice único del hilo
8 idx = cuda.grid(1)
9
10 if idx < A.size: # Asegurar que el índice no exceda los límites
11 C[idx] = A[idx] + B[idx] # Sumar los elementos correspondientes
12
13 # Configurar y lanzar el kernel
14 def main():
15 # Tamaño de los vectores
16 n = 100000000
17 threads_per_block = 256 # Número de hilos por bloque
18 blocks_per_grid = (n + (threads_per_block - 1)) // threads_per_block # Número de b loques
19
20 # Crear los vectores en la memoria del host
21 A = np.random.rand(n).astype(np.float32)
22 B = np.random.rand(n).astype(np.float32)
23 C = np.zeros(n, dtype=np.float32) # Vector de salida
24
25 # Copiar los vectores a la memoria de la GPU
26 d_A = cuda.to_device(A)
27 d_B = cuda.to_device(B)
28 d_C = cuda.to_device(C)
29
30 # Lanzar el kernel para realizar la suma de vectores
31 vector_sum[blocks_per_grid, threads_per_block](d_A, d_B, d_C)
32
33 # Copiar el resultado de vuelta a la memoria del host
34 d_C.copy_to_host(C)
35
36 # Imprimir los primeros 10 elementos del resultado para verificar
37 print("Primeros 10 resultados de la suma de vectores:")
38 print(C[:10])
39
40 # Ejecutar el programa
41 if __name__ == "__main__":
42 main()
I have a similar problem with the following code:
import pycuda.driver as cuda
import pycuda.autoinit # Esto inicializa el entorno CUDA
def get_gpu_info():
# Obtener el número de GPUs disponibles
device_count = cuda.Device.count()
print(f"Se encontraron {device_count} dispositivos GPU.\n")
# Mostrar información de cada GPU disponible
for i in range(device_count):
device = cuda.Device(i)
print(f"Información del dispositivo {i}:")
print(f" Nombre: {device.name()}")
print(f" Memoria total: {device.total_memory() / (1024 ** 2):.2f} MB")
# Usar cuda.mem_get_info() para obtener memoria libre y total
free_mem, total_mem = cuda.mem_get_info()
print(f" Memoria libre: {free_mem / (1024 ** 2):.2f} MB")
print(f" Memoria ocupada: {(total_mem - free_mem) / (1024 ** 2):.2f} MB")
# Verificar si la GPU soporta managed memory
if device.compute_capability() >= (3, 0): # CUDA 3.0 o superior es necesario
if device.get_attribute(cuda.device_attribute.MANAGED_MEMORY):
print(" Soporta Managed Memory")
else:
print(" No soporta Managed Memory")
else:
print(" La capacidad de computación es menor a 3.0, no soporta Managed Memory")
# Obtener el número de multiprocesadores (cores) de la GPU
multiprocessors = device.get_attributes().get(cuda.device_attribute.MULTIPROCESSOR_COUNT)
print(f" Núcleos de procesamiento: {multiprocessors}")
# Obtener la capacidad de cómputo
compute_capability = device.compute_capability()
print(f" Capacidad de computación: {compute_capability[0]}.{compute_capability[1]}")
print()
# Ejecutar la función para obtener información de la GPU
if __name__ == "__main__":
get_gpu_info()
and I get the following error:
python pyCudaInfo.py
Traceback (most recent call last):
File "/data/eperez/Workbench/cuda/pyCudaInfo.py", line 2, in <module>
import pycuda.autoinit # Esto inicializa el entorno CUDA
^^^^^^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/pycuda/autoinit.py", line 10, in <module>
context = make_default_context()
^^^^^^^^^^^^^^^^^^^^^^
File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/pycuda/tools.py", line 226, in make_default_context
raise RuntimeError(
RuntimeError: make_default_context() wasn't able to create a context on any of the 1 detected devices
The nvidia-smi info is:
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01 Driver Version: 535.183.01 CUDA Version: 12.2 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 NVIDIA A40-8Q Off | 00000000:06:10.0 Off | 0 |
| N/A N/A P8 N/A / N/A | 0MiB / 8064MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+----------------------+----------------------+
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| No running processes found |
+---------------------------------------------------------------------------------------+
the version of cuda I use in python is:
cuda-cudart 12.1.105 0 nvidia
cuda-cupti 12.1.105 0 nvidia
cuda-libraries 12.1.0 0 nvidia
cuda-nvrtc 12.1.105 0 nvidia
cuda-nvtx 12.1.105 0 nvidia
cuda-opencl 12.4.127 0 nvidia
cuda-runtime 12.1.0 0 nvidia
cuda-version 11.8 hcce14f8_3
cudatoolkit 11.8.0 h6a678d5_0
pycuda 2024.1 py312hf7b93a0_3 conda-forge
pytorch 2.5.1 py3.12_cuda12.1_cudnn9.1.0_0 pytorch
pytorch-cuda 12.1 ha16c6d3_6 pytorch
pytorch-mutex 1.0 cuda pytorch
the same problem I have when using OpenAcc, I get this error: [801] CUDA_ERROR_NOT_SUPPORTED