May I know, how you confirmed that it is running on GPU?
import onnx
import onnx.helper as helper
import onnxruntime as ort
import numpy as np
import time
import os
def create_matmul_onnx(file_name):
X = helper.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [1024, 1024])
W = helper.make_tensor_value_info("W", onnx.TensorProto.FLOAT, [1024, 1024])
Y = helper.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [1024, 1024])
node = helper.make_node("MatMul", ["X", "W"], ["Y"])
graph = helper.make_graph([node], "MatMulGraph", [X, W], [Y])
opset_import = [helper.make_operatorsetid("", 20)]
model = helper.make_model(graph, producer_name="matmul_test", opset_imports=opset_import)
model.ir_version = 9
onnx.save(model, file_name)
print(f"ONNX model saved as {file_name} with IR version 9 and Opset 20.")
def run_matmul(model_path, provider_name):
try:
# Suppress ONNX Runtime Logs
os.environ["ORT_LOG_SEVERITY"] = "0" # 0 = Verbose, 1 = Info, 2 = Warning, 3 = Error, 4 = Fatal
session_options = ort.SessionOptions()
session_options.log_severity_level = 2
session_options.log_verbosity_level = 2
ort_session = ort.InferenceSession(model_path, session_options, providers=[provider_name])
print("Session Providers:", ort_session.get_providers())
print(f"🚀 Using {provider_name} for inference.")
X_data = np.random.rand(1024, 1024).astype(np.float32)
W_data = np.random.rand(1024, 1024).astype(np.float32)
print("Running MatMul...")
start_time = time.time()
outputs = ort_session.run(None, {"X": X_data, "W": W_data})
end_time = time.time()
print(f"Output Shape: {outputs[0].shape}")
print(f"Execution Time on {provider_name}: {end_time - start_time:.6f} seconds\n")
except Exception as e:
print(f"Error during inference on {provider_name}:", e)
if __name__ == "__main__":
create_matmul_onnx("matmul.onnx")
providers = ort.get_available_providers()
print("Available Providers:", providers)
if "CUDAExecutionProvider" in providers:
run_matmul("matmul.onnx", "CUDAExecutionProvider")
else:
print("❗ CUDAExecutionProvider not available. Check your GPU and CUDA setup.")
if "CPUExecutionProvider" in providers:
run_matmul("matmul.onnx", "CPUExecutionProvider")
else:
print("❗ CPUExecutionProvider not available. Check your ONNX Runtime installation.")
I tried the code. The first model ran on CUDA, and the second on the CPU. However, I see that the CPU is faster compared to the GPU.
could you please check and confirm?
Also, are you seeing any warnings similar to below?
/usr/lib/aarch64-linux-gnu/libcudnn.so.8: version `libcudnn.so.8’ not found (required by /home/jetson/.conda/envs/test/lib/python3.10/site-packages/onnxruntime/capi/libonnxruntime_providers_cuda.so)