[801] Call to cuDevicePrimaryCtxRetain results in CUDA_ERROR_NOT_SUPPORTED

I am using a virtual server and I have an A40 GPU, but I have the following problems:

Traceback (most recent call last):
  File "/data/eperez/Workbench/cuda/pyCuda.py", line 42, in <module>
    main()
  File "/data/eperez/Workbench/cuda/pyCuda.py", line 26, in main
    d_A = cuda.to_device(A)
          ^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 231, in _require_cuda_context
    with _runtime.ensure_context():
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/contextlib.py", line 137, in __enter__
    return next(self.gen)
           ^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 123, in ensure_context
    newctx = self.get_or_create_context(None)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 138, in get_or_create_context
    return self._get_or_create_context_uncached(devnum)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 155, in _get_or_create_context_uncached
    return self._activate_context_for(0)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/devices.py", line 177, in _activate_context_for
    newctx = gpu.get_primary_context()
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/driver.py", line 671, in get_primary_context
    driver.cuDevicePrimaryCtxRetain(byref(hctx), self.id)
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/driver.py", line 327, in safe_cuda_api_call
    self._check_ctypes_error(fname, retcode)
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/numba/cuda/cudadrv/driver.py", line 395, in _check_ctypes_error
    raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: [801] Call to cuDevicePrimaryCtxRetain results in CUDA_ERROR_NOT_SUPPORTED

and the code used is:

from numba import cuda
  2 import numpy as np
  3 
  4 # Definir el kernel para realizar la suma de vectores en la GPU
  5 @cuda.jit
  6 def vector_sum(A, B, C):
  7     # Obtener el índice único del hilo
  8     idx = cuda.grid(1)
  9 
 10     if idx < A.size:  # Asegurar que el índice no exceda los límites
 11         C[idx] = A[idx] + B[idx]  # Sumar los elementos correspondientes
 12 
 13 # Configurar y lanzar el kernel
 14 def main():
 15     # Tamaño de los vectores
 16     n = 100000000
 17     threads_per_block = 256  # Número de hilos por bloque
 18     blocks_per_grid = (n + (threads_per_block - 1)) // threads_per_block  # Número de b    loques
 19 
 20     # Crear los vectores en la memoria del host
 21     A = np.random.rand(n).astype(np.float32)
 22     B = np.random.rand(n).astype(np.float32)
 23     C = np.zeros(n, dtype=np.float32)  # Vector de salida
 24 
 25     # Copiar los vectores a la memoria de la GPU
 26     d_A = cuda.to_device(A)
 27     d_B = cuda.to_device(B)
 28     d_C = cuda.to_device(C)
 29 
 30     # Lanzar el kernel para realizar la suma de vectores
 31     vector_sum[blocks_per_grid, threads_per_block](d_A, d_B, d_C)
 32 
 33     # Copiar el resultado de vuelta a la memoria del host
 34     d_C.copy_to_host(C)
 35 
 36     # Imprimir los primeros 10 elementos del resultado para verificar
 37     print("Primeros 10 resultados de la suma de vectores:")
 38     print(C[:10])
 39 
 40 # Ejecutar el programa
 41 if __name__ == "__main__":
 42     main()

I have a similar problem with the following code:

import pycuda.driver as cuda
import pycuda.autoinit  # Esto inicializa el entorno CUDA

def get_gpu_info():
    # Obtener el número de GPUs disponibles
    device_count = cuda.Device.count()
    print(f"Se encontraron {device_count} dispositivos GPU.\n")
    
    # Mostrar información de cada GPU disponible
    for i in range(device_count):
        device = cuda.Device(i)
        
        print(f"Información del dispositivo {i}:")
        print(f"  Nombre: {device.name()}")
        print(f"  Memoria total: {device.total_memory() / (1024 ** 2):.2f} MB")
        
        # Usar cuda.mem_get_info() para obtener memoria libre y total
        free_mem, total_mem = cuda.mem_get_info()
        print(f"  Memoria libre: {free_mem / (1024 ** 2):.2f} MB")
        print(f"  Memoria ocupada: {(total_mem - free_mem) / (1024 ** 2):.2f} MB")

        # Verificar si la GPU soporta managed memory
        if device.compute_capability() >= (3, 0):  # CUDA 3.0 o superior es necesario
            if device.get_attribute(cuda.device_attribute.MANAGED_MEMORY):
                print("   Soporta Managed Memory")
            else:
                print("   No soporta Managed Memory")
        else:
            print("   La capacidad de computación es menor a 3.0, no soporta Managed Memory")
        
        # Obtener el número de multiprocesadores (cores) de la GPU
        multiprocessors = device.get_attributes().get(cuda.device_attribute.MULTIPROCESSOR_COUNT)
        print(f"  Núcleos de procesamiento: {multiprocessors}")
        
        # Obtener la capacidad de cómputo
        compute_capability = device.compute_capability()
        print(f"  Capacidad de computación: {compute_capability[0]}.{compute_capability[1]}")
        
        print()

# Ejecutar la función para obtener información de la GPU
if __name__ == "__main__":
    get_gpu_info()

and I get the following error:

python pyCudaInfo.py 
Traceback (most recent call last):
  File "/data/eperez/Workbench/cuda/pyCudaInfo.py", line 2, in <module>
    import pycuda.autoinit  # Esto inicializa el entorno CUDA
    ^^^^^^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/pycuda/autoinit.py", line 10, in <module>
    context = make_default_context()
              ^^^^^^^^^^^^^^^^^^^^^^
  File "/data/eperez/anaconda3/envs/test/lib/python3.12/site-packages/pycuda/tools.py", line 226, in make_default_context
    raise RuntimeError(
RuntimeError: make_default_context() wasn't able to create a context on any of the 1 detected devices

The nvidia-smi info is:

+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  NVIDIA A40-8Q                  Off | 00000000:06:10.0 Off |                    0 |
| N/A   N/A    P8              N/A /  N/A |      0MiB /  8064MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                                         
+---------------------------------------------------------------------------------------+
| Processes:                                                                            |
|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
|        ID   ID                                                             Usage      |
|=======================================================================================|
|  No running processes found                                                           |
+---------------------------------------------------------------------------------------+

the version of cuda I use in python is:
cuda-cudart 12.1.105 0 nvidia
cuda-cupti 12.1.105 0 nvidia
cuda-libraries 12.1.0 0 nvidia
cuda-nvrtc 12.1.105 0 nvidia
cuda-nvtx 12.1.105 0 nvidia
cuda-opencl 12.4.127 0 nvidia
cuda-runtime 12.1.0 0 nvidia
cuda-version 11.8 hcce14f8_3
cudatoolkit 11.8.0 h6a678d5_0
pycuda 2024.1 py312hf7b93a0_3 conda-forge
pytorch 2.5.1 py3.12_cuda12.1_cudnn9.1.0_0 pytorch
pytorch-cuda 12.1 ha16c6d3_6 pytorch
pytorch-mutex 1.0 cuda pytorch

the same problem I have when using OpenAcc, I get this error: [801] CUDA_ERROR_NOT_SUPPORTED