I want to have communication between Nvidia orn NX and my Linux PC (without GPU) using UCX. I have used UCX with Holoscan on Nvidia side as you can see in following code:
from holoscan.core import Application, Operator, OperatorSpec
import numpy as np
import asyncio
import cupy as cp
import threading
# Ensure we use the right UCX; if ‘import ucp’ fails, try ‘import ucxx as ucp’
try:
import ucp
# Check if this is the real UCX-Py (should have ‘init’)
if not hasattr(ucp, ‘init’):
raise ImportError(“Wrong ‘ucp’ library found”)
except (ImportError, AttributeError):
import ucxx as ucp
class UCXEchoServerOp(Operator):
def setup(self, spec: OperatorSpec):
# Port to listen on
spec.param(“port”, 13337)
def start(self):
self.loop = asyncio.new_event_loop()
self.stop_event = asyncio.Event()
async def echo_handler(ep):
try:
# 1. Receive size header
header = np.empty(1, dtype=np.int64)
await ep.recv(header)
size = header[0]
# 2. Receive into CPU buffer (more compatible for handshake)
cpu_buffer = np.empty(size, dtype=np.uint8)
await ep.recv(cpu_buffer)
print(f"Server received {size} bytes on CPU")
# 3. Move to GPU (inside the server) if you need to process it
gpu_buffer = cp.array(cpu_buffer)
print(“Data moved to GPU for processing”)
# 4. Echo back from CPU
await ep.send(cpu_buffer)
print(“Echoed data back.”)
except Exception as e:
print(f"Connection error: {e}")
def run_loop():
asyncio.set_event_loop(self.loop)
ucp.init()
# In newer UCX-Py (ucxx), use ucp.create_listener
self.listener = ucp.create_listener(echo_handler, port=self.port)
print(f"UCX Server listening on port {self.listener.port}…")
# CRITICAL: UCX needs to be “progressed” to finish the handshake
async def ucp_progress_task():
while not self.stop_event.is_set():
ucp.progress()
await asyncio.sleep(0) # Yield to the event loop
self.loop.create_task(ucp_progress_task())
self.loop.run_forever()
self.server_thread = threading.Thread(target=run_loop, daemon=True)
self.server_thread.start()
def compute(self, op_input, op_output, context):
# The server runs in the background thread.
# You could pass data from the server to the rest of the Holoscan
# pipeline here using a thread-safe queue if needed.
pass
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
self.server_thread.join()
class UCXServerApp(Application):
def compose(self):
self.add_operator(UCXEchoServerOp(self, name=“echo_server”, port=13337))
if _name_ == “_main_”:
app = UCXServerApp()
app.run()
This is creating server with listening on desired port but when client wants to connect to it, it is showing error as follows:
nvidia@nvidia-tegra:~/Holoscan/Example_code/UCX$ python3 ucx_nvidia.py
/home/nvidia/.local/lib/python3.10/site-packages/holoscan/core/init.py:111: RuntimeWarning: Current stack size (8.0 MB) is below the recommended minimum (32.0 MB). This may cause segmentation faults or crashes. Consider increasing the stack size with ‘ulimit -s 32768’, or if using Docker, launch the container with ‘–ulimit stack=33554432’.
warnings.warn(
[warning] [application.cpp:548] Current stack size limit (8388608 bytes / 8192 KB) is below the recommended minimum (33554432 bytes / 32768 KB). Consider increasing it with ‘ulimit -s 32768’. For Docker, use ‘–ulimit stack=33554432’
[info] [fragment.cpp:1116] Loading extensions from configs…
[info] [gxf_executor.cpp:433] Creating context
[info] [gxf_executor.cpp:2459] Activating Graph…
[info] [gxf_executor.cpp:2600] Running Graph…
[info] [gxf_executor.cpp:2602] Waiting for completion…
[info] [greedy_scheduler.cpp:191] Scheduling 1 entities
[1772783036.018107] [nvidia-tegra:6290 :0] cuda_ipc_md.c:485 UCX ERROR nvmlDeviceGetGpuFabricInfo(device, &fabric_info) failed: Not Supported
[1772783036.100788] [nvidia-tegra:6290 :0] parser.c:2359 UCX WARN unused environment variable: UCX_NVML_ENABLE (maybe: UCX_VFS_ENABLE, UCX_GVA_ENABLE?)
[1772783036.100788] [nvidia-tegra:6290 :0] parser.c:2359 UCX WARN (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)
UCX Server listening on port 13337…
[1772783039.443717] [nvidia-tegra:6290 :0] address.c:985 UCX ERROR failed to unpack address, invalid bandwidth 0.00
Task exception was never retrieved
future: <Task finished name=‘Task-2’ coro=<_listener_handler_coroutine() done, defined at /home/nvidia/.local/lib/python3.10/site-packages/ucp/core.py:142> exception=UCXError(‘Invalid parameter’)>
Traceback (most recent call last):
File “/home/nvidia/.local/lib/python3.10/site-packages/ucp/core.py”, line 149, in _listener_handler_coroutine
endpoint = ucx_api.UCXEndpoint.create_from_conn_request(
File “ucp/_libs/ucx_endpoint.pyx”, line 315, in ucp._libs.ucx_api.UCXEndpoint.create_from_conn_request
File “ucp/_libs/ucx_endpoint.pyx”, line 231, in ucp._libs.ucx_api.UCXEndpoint.init
File “ucp/_libs/utils.pyx”, line 107, in ucp._libs.ucx_api.assert_ucs_status
ucp._libs.exceptions.UCXError: Invalid parameter
Following is client code for your reference.
import ucp
import asyncio
import numpy as np
import sys
async def run_client(server_ip):
# Force UCX to initialize with only TCP support
ucp.init()
try:
print(f"Connecting to Holoscan board at {server_ip} on port 13337…")
# We add a 10-second timeout so it doesn’t hang forever
ep = await asyncio.wait_for(
ucp.create_endpoint(server_ip, 13337),
timeout=10.0
)
print(f"CONNECTED successfully to {server_ip}!")
# Create 1MB of dummy data
data_to_send = np.random.randint(0, 255, size=1024*1024, dtype=np.uint8)
size = np.array([data_to_send.nbytes], dtype=np.int64)
print(f"Sending {data_to_send.nbytes / 1024:.2f} KB…")
await ep.send(size)
await ep.send(data_to_send)
# Receive Echo
echo_size = np.empty(1, dtype=np.int64)
await ep.recv(echo_size)
echo_data = np.empty(echo_size[0], dtype=np.uint8)
await ep.recv(echo_data)
if np.array_equal(data_to_send, echo_data):
print(“SUCCESS: Round-trip (CPU → GPU → CPU) verified!”)
else:
print(“DATA CORRUPTION: Echo did not match.”)
except asyncio.TimeoutError:
print(“\nERROR: Connection timed out.”)
print(“Check if a firewall on the PC is blocking outgoing UCX traffic.”)
except Exception as e:
print(f"\nUCX ERROR: {type(e)._name_} - {e}")
finally:
if ‘ep’ in locals():
await ep.close()
if _name_ == “_main_”:
# Your board IP from netstat
BOARD_IP = “172.29.129.227”
asyncio.run(run_client(BOARD_IP))
Could anyone please help me for this